1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ 26 str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 # Internal methods. 60 61 def _binary_op(self, op, other): 62 63 "Perform 'op' on this object and 'other' if appropriate." 64 65 # Refuse to operate on specialisations of this class. 66 67 if self.__class__ is not other.__class__: 68 return NotImplemented 69 70 # Otherwise, perform the operation on the operands' data. 71 72 else: 73 return op(self.__data__, other.__data__) 74 75 def _binary_op_rev(self, op, other): 76 77 "Perform 'op' on 'other' and this object if appropriate." 78 79 # Refuse to operate on specialisations of this class. 80 81 if self.__class__ is not other.__class__: 82 return NotImplemented 83 84 # Otherwise, perform the operation on the operands' data. 85 86 else: 87 return op(other.__data__, self.__data__) 88 89 def _quote(self, quote): 90 91 "Return a quoted representation of this string." 92 93 b = buffer([quote]) 94 i = last = 0 95 end = self.__len__() 96 97 while i < end: 98 c = self[i] 99 100 # Handle quotes before anything else. 101 102 if c == quote: 103 b.append("\\") 104 b.append(quote) 105 i += 1 106 last = i 107 continue 108 109 # Extended unquoted text. 110 111 n = ord(c) 112 113 if 32 <= n < 128: 114 i += 1 115 continue 116 117 # Before quoting, emit unquoted text. 118 119 b.append(self[last:i]) 120 121 # Add quoted value. 122 123 if c == "\t": 124 b.append("\\t") 125 elif c == "\n": 126 b.append("\\n") 127 elif c == "\r": 128 b.append("\\r") 129 else: 130 if n < 0: 131 n += 256 132 b.append("\\x") 133 x = hex(n, "") 134 if len(x) < 2: 135 b.append("0") 136 b.append(x) 137 138 i += 1 139 last = i 140 141 # Emit remaining unquoted text. 142 143 b.append(self[last:]) 144 b.append(quote) 145 return str(b) 146 147 def bytelength(self): 148 149 "Return the number of bytes in this string." 150 151 return str_len(self.__data__) 152 153 # General type methods. 154 155 def __bool__(self): 156 157 "Return whether the string provides any data." 158 159 return str_nonempty(self.__data__) 160 161 def __contains__(self, value): 162 163 "Return whether this string contains 'value'." 164 165 return self.find(value) != -1 166 167 def __hash__(self): 168 169 "Return a value for hashing purposes." 170 171 return self._hashvalue(ord) 172 173 __len__ = bytelength 174 175 def __repr__(self): 176 177 "Return a program representation." 178 179 return self._quote('"') 180 181 def __str__(self): 182 183 "Return a string representation." 184 185 return self 186 187 # Operator methods. 188 189 def __iadd__(self, other): 190 191 "Return a string combining this string with 'other'." 192 193 return self._binary_op(str_add, other) 194 195 __add__ = __iadd__ 196 197 def __radd__(self, other): 198 199 "Return a string combining this string with 'other'." 200 201 return self._binary_op_rev(str_add, other) 202 203 def __mod__(self, other): pass 204 def __rmod__(self, other): pass 205 206 def __mul__(self, other): 207 208 "Multiply the string by 'other'." 209 210 b = buffer() 211 212 while other > 0: 213 b.append(self) 214 other -= 1 215 216 return str(b) 217 218 __rmul__ = __mul__ 219 220 # Comparison methods. 221 222 def __eq__(self, other): 223 224 "Return whether this string is equal to 'other'." 225 226 return self._binary_op(str_eq, other) 227 228 def __ge__(self, other): 229 230 "Return whether this string is greater than or equal to 'other'." 231 232 return _negate(self.__lt__(other)) 233 234 def __gt__(self, other): 235 236 "Return whether this string is greater than 'other'." 237 238 return self._binary_op(str_gt, other) 239 240 def __le__(self, other): 241 242 "Return whether this string is less than or equal to 'other'." 243 244 return _negate(self.__gt__(other)) 245 246 def __lt__(self, other): 247 248 "Return whether this string is less than 'other'." 249 250 return self._binary_op(str_lt, other) 251 252 def __ne__(self, other): 253 254 "Return whether this string is not equal to 'other'." 255 256 return _negate(self.__eq__(other)) 257 258 # String-specific methods. 259 260 def endswith(self, s): 261 262 "Return whether this string ends with 's'." 263 264 return self[-s.__len__():] == s 265 266 def find(self, sub, start=None, end=None): 267 268 """ 269 Find 'sub' in the string if it occurs from or after the 'start' position 270 (or 0, if omitted) and before the 'end' position (or the end of the 271 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 272 not present. 273 """ 274 275 sublen = sub.__len__() 276 277 if end is None: 278 end = self.__len__() 279 280 end -= sublen 281 282 i = start or 0 283 284 while i <= end: 285 if sub == self[i:i+sublen]: 286 return i 287 i += 1 288 289 return -1 290 291 def index(self, sub, start=None, end=None): 292 293 """ 294 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 295 at 'end' (or the end of the string, if omitted), raising ValueError if 296 'sub' is not present. 297 """ 298 299 i = self.find(sub, start, end) 300 301 if i == -1: 302 raise ValueError(sub) 303 else: 304 return i 305 306 def join(self, l): 307 308 "Join the elements in 'l' with this string." 309 310 # Empty strings just cause the list elements to be concatenated. 311 312 if not self.__bool__(): 313 return str(buffer(l)) 314 315 # Non-empty strings join the elements together in a buffer. 316 317 b = buffer() 318 first = True 319 320 for s in l: 321 if first: 322 first = False 323 else: 324 b.append(self) 325 b.append(s) 326 327 return str(b) 328 329 def lower(self): pass 330 331 def lstrip(self, chars=None): 332 333 """ 334 Strip any of the given 'chars' from the start of the string, or strip 335 whitespace characters is 'chars' is omitted or None. 336 """ 337 338 if chars is not None and not chars: 339 return self 340 341 i = 0 342 end = self.__len__() 343 344 while i < end and self[i] in (chars or WHITESPACE): 345 i += 1 346 347 return self[i:] 348 349 def replace(self, old, new, count=None): pass 350 351 def rfind(self, sub, start=None, end=None): 352 353 """ 354 Find 'sub' in the string if it occurs from or after the 'start' position 355 (or 0, if omitted) and before the 'end' position (or the end of the 356 string, if omitted), returning the latest occurrence or -1 if 'sub' is 357 not present. 358 """ 359 360 sublen = sub.__len__() 361 362 start = start or 0 363 364 if end is None: 365 end = self.__len__() 366 367 i = end - sublen 368 369 while i >= start: 370 if sub == self[i:i+sublen]: 371 return i 372 i -= 1 373 374 return -1 375 376 def rsplit(self, sep=None, maxsplit=None): 377 378 """ 379 Split the string using the given 'sep' as separator (or any whitespace 380 character if omitted or specified as None), splitting at most 'maxsplit' 381 times (or as many times as is possible if omitted or specified as None). 382 Where 'maxsplit' is given, the number of split points is counted from 383 the end of the string. 384 """ 385 386 if not maxsplit: 387 return self.split(sep, maxsplit) 388 389 if sep is not None and not sep: 390 raise ValueError, sep 391 392 seplen = sep and len(sep) or 1 393 start = seplen 394 splits = 0 395 396 l = [] 397 i = last = self.__len__() 398 399 while i >= start and (maxsplit is None or splits < maxsplit): 400 401 # Find any specified separator. 402 403 if sep and self[i-seplen:i] == sep: 404 l.insert(0, self[i:last]) 405 i -= seplen 406 last = i 407 splits += 1 408 409 # Find any whitespace character and skip adjacent characters. 410 411 elif not sep and self[i-1] in WHITESPACE: 412 l.insert(0, self[i:last]) 413 while i > start: 414 i -= 1 415 if self[i-1] not in WHITESPACE: 416 break 417 else: 418 break 419 last = i 420 splits += 1 421 422 # Check the next character. 423 424 else: 425 i -= 1 426 427 l.insert(0, self[:last]) 428 return l 429 430 def rstrip(self, chars=None): 431 432 """ 433 Strip any of the given 'chars' from the end of the string, or strip 434 whitespace characters is 'chars' is omitted or None. 435 """ 436 437 if chars is not None and not chars: 438 return self 439 440 i = self.__len__() - 1 441 442 while i >= 0 and self[i] in (chars or WHITESPACE): 443 i -= 1 444 445 return self[:i+1] 446 447 def split(self, sep=None, maxsplit=None): 448 449 """ 450 Split the string using the given 'sep' as separator (or any whitespace 451 character if omitted or specified as None), splitting at most 'maxsplit' 452 times (or as many times as is possible if omitted or specified as None). 453 Where 'maxsplit' is given, the number of split points is counted from 454 the start of the string. 455 """ 456 457 if sep is not None and not sep: 458 raise ValueError, sep 459 460 if maxsplit is not None and not maxsplit: 461 return [self] 462 463 seplen = sep and len(sep) or 1 464 end = self.__len__() - seplen 465 splits = 0 466 467 l = [] 468 i = last = 0 469 470 while i <= end and (maxsplit is None or splits < maxsplit): 471 472 # Find any specified separator. 473 474 if sep and self[i:i+seplen] == sep: 475 l.append(self[last:i]) 476 i += seplen 477 last = i 478 splits += 1 479 480 # Find any whitespace character and skip adjacent characters. 481 482 elif not sep and self[i] in WHITESPACE: 483 l.append(self[last:i]) 484 while i < end: 485 i += 1 486 if self[i] not in WHITESPACE: 487 break 488 else: 489 break 490 last = i 491 splits += 1 492 493 # Check the next character. 494 495 else: 496 i += 1 497 498 l.append(self[last:]) 499 return l 500 501 def splitlines(self, keepends=False): pass 502 503 def startswith(self, s): 504 505 "Return whether this string starts with 's'." 506 507 return self[:s.__len__()] == s 508 509 def strip(self, chars=None): 510 511 """ 512 Strip any of the given 'chars' from the start and end of the string, or 513 strip whitespace characters is 'chars' is omitted or None. 514 """ 515 516 return self.lstrip(chars).rstrip(chars) 517 518 def upper(self): pass 519 520 class string(basestring): 521 522 "A plain string of bytes." 523 524 # Special implementation methods. 525 526 def __get_single_item__(self, index): 527 528 "Return the item at the normalised (positive) 'index'." 529 530 self._check_index(index) 531 return str_substr(self.__data__, index, index + 1, 1) 532 533 def __get_multiple_items__(self, start, end, step): 534 535 """ 536 Return items from 'start' until (but excluding) 'end', at 'step' 537 intervals. 538 """ 539 540 start = self._confine_index(start) 541 end = self._confine_index(end) 542 check_int(step) 543 544 if step == 0: 545 raise ValueError(step) 546 547 if start == end: 548 return "" 549 550 return str_substr(self.__data__, start, end, step) 551 552 def str(obj): 553 554 "Return the string representation of 'obj'." 555 556 # Class attributes of instances provide __str__. 557 558 return obj.__str__() 559 560 # vim: tabstop=4 expandtab shiftwidth=4