1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \ 26 str_nonempty, str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 # Internal methods. 60 61 def _binary_op(self, op, other): 62 63 "Perform 'op' on this object and 'other' if appropriate." 64 65 # Refuse to operate on specialisations of this class. 66 67 if self.__class__ is not other.__class__: 68 return NotImplemented 69 70 # Otherwise, perform the operation on the operands' data. 71 72 else: 73 return op(self.__data__, other.__data__) 74 75 def _binary_op_rev(self, op, other): 76 77 "Perform 'op' on 'other' and this object if appropriate." 78 79 # Refuse to operate on specialisations of this class. 80 81 if self.__class__ is not other.__class__: 82 return NotImplemented 83 84 # Otherwise, perform the operation on the operands' data. 85 86 else: 87 return op(other.__data__, self.__data__) 88 89 def _quote(self, quote): 90 91 "Return a quoted representation of this string." 92 93 b = buffer([quote]) 94 i = last = 0 95 end = self.__len__() 96 97 while i < end: 98 c = self[i] 99 100 # Handle quotes before anything else. 101 102 if c == quote: 103 b.append("\\") 104 b.append(quote) 105 i += 1 106 last = i 107 continue 108 109 # Extended unquoted text. 110 111 n = ord(c) 112 113 if 32 <= n < 128: 114 i += 1 115 continue 116 117 # Before quoting, emit unquoted text. 118 119 b.append(self[last:i]) 120 121 # Add quoted value. 122 123 if c == "\t": 124 b.append("\\t") 125 elif c == "\n": 126 b.append("\\n") 127 elif c == "\r": 128 b.append("\\r") 129 else: 130 if n < 0: 131 n += 256 132 b.append("\\x") 133 x = hex(n, "") 134 if len(x) < 2: 135 b.append("0") 136 b.append(x) 137 138 i += 1 139 last = i 140 141 # Emit remaining unquoted text. 142 143 b.append(self[last:]) 144 b.append(quote) 145 return str(b) 146 147 def bytelength(self): 148 149 "Return the number of bytes in this string." 150 151 return str_len(self.__data__) 152 153 # General type methods. 154 155 def __bool__(self): 156 157 "Return whether the string provides any data." 158 159 return str_nonempty(self.__data__) 160 161 def __contains__(self, value): 162 163 "Return whether this string contains 'value'." 164 165 return self.find(value) != -1 166 167 def __hash__(self): 168 169 "Return a value for hashing purposes." 170 171 return self._hashvalue(ord) 172 173 __len__ = bytelength 174 175 def __repr__(self): 176 177 "Return a program representation." 178 179 return self._quote('"') 180 181 def __str__(self): 182 183 "Return a string representation." 184 185 return self 186 187 # Operator methods. 188 189 def __iadd__(self, other): 190 191 "Return a string combining this string with 'other'." 192 193 return self._binary_op(str_add, other) 194 195 __add__ = __iadd__ 196 197 def __radd__(self, other): 198 199 "Return a string combining this string with 'other'." 200 201 return self._binary_op_rev(str_add, other) 202 203 def __mod__(self, other): pass 204 def __rmod__(self, other): pass 205 206 def __mul__(self, other): 207 208 "Multiply the string by 'other'." 209 210 b = buffer() 211 212 while other > 0: 213 b.append(self) 214 other -= 1 215 216 return str(b) 217 218 __rmul__ = __mul__ 219 220 # Comparison methods. 221 222 def __eq__(self, other): 223 224 "Return whether this string is equal to 'other'." 225 226 return self._binary_op(str_eq, other) 227 228 def __ge__(self, other): 229 230 "Return whether this string is greater than or equal to 'other'." 231 232 return _negate(self.__lt__(other)) 233 234 def __gt__(self, other): 235 236 "Return whether this string is greater than 'other'." 237 238 return self._binary_op(str_gt, other) 239 240 def __le__(self, other): 241 242 "Return whether this string is less than or equal to 'other'." 243 244 return _negate(self.__gt__(other)) 245 246 def __lt__(self, other): 247 248 "Return whether this string is less than 'other'." 249 250 return self._binary_op(str_lt, other) 251 252 def __ne__(self, other): 253 254 "Return whether this string is not equal to 'other'." 255 256 return _negate(self.__eq__(other)) 257 258 # String-specific methods. 259 260 def __ord__(self): 261 262 "Return the value of the string, if only a single character." 263 264 if self.__len__() == 1: 265 return str_ord(self.__data__) 266 else: 267 raise ValueError, self 268 269 def endswith(self, s): 270 271 "Return whether this string ends with 's'." 272 273 return self[-s.__len__():] == s 274 275 def find(self, sub, start=None, end=None): 276 277 """ 278 Find 'sub' in the string if it occurs from or after the 'start' position 279 (or 0, if omitted) and before the 'end' position (or the end of the 280 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 281 not present. 282 """ 283 284 sublen = sub.__len__() 285 286 if end is None: 287 end = self.__len__() 288 289 end -= sublen 290 291 i = start or 0 292 293 while i <= end: 294 if sub == self[i:i+sublen]: 295 return i 296 i += 1 297 298 return -1 299 300 def index(self, sub, start=None, end=None): 301 302 """ 303 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 304 at 'end' (or the end of the string, if omitted), raising ValueError if 305 'sub' is not present. 306 """ 307 308 i = self.find(sub, start, end) 309 310 if i == -1: 311 raise ValueError(sub) 312 else: 313 return i 314 315 def join(self, l): 316 317 "Join the elements in 'l' with this string." 318 319 # Empty strings just cause the list elements to be concatenated. 320 321 if not self.__bool__(): 322 return str(buffer(l)) 323 324 # Non-empty strings join the elements together in a buffer. 325 326 b = buffer() 327 first = True 328 329 for s in l: 330 if first: 331 first = False 332 else: 333 b.append(self) 334 b.append(s) 335 336 return str(b) 337 338 def lower(self): pass 339 340 def lstrip(self, chars=None): 341 342 """ 343 Strip any of the given 'chars' from the start of the string, or strip 344 whitespace characters is 'chars' is omitted or None. 345 """ 346 347 if chars is not None and not chars: 348 return self 349 350 i = 0 351 end = self.__len__() 352 353 while i < end and self[i] in (chars or WHITESPACE): 354 i += 1 355 356 return self[i:] 357 358 def replace(self, old, new, count=None): pass 359 360 def rfind(self, sub, start=None, end=None): 361 362 """ 363 Find 'sub' in the string if it occurs from or after the 'start' position 364 (or 0, if omitted) and before the 'end' position (or the end of the 365 string, if omitted), returning the latest occurrence or -1 if 'sub' is 366 not present. 367 """ 368 369 sublen = sub.__len__() 370 371 start = start or 0 372 373 if end is None: 374 end = self.__len__() 375 376 i = end - sublen 377 378 while i >= start: 379 if sub == self[i:i+sublen]: 380 return i 381 i -= 1 382 383 return -1 384 385 def rsplit(self, sep=None, maxsplit=None): 386 387 """ 388 Split the string using the given 'sep' as separator (or any whitespace 389 character if omitted or specified as None), splitting at most 'maxsplit' 390 times (or as many times as is possible if omitted or specified as None). 391 Where 'maxsplit' is given, the number of split points is counted from 392 the end of the string. 393 """ 394 395 if not maxsplit: 396 return self.split(sep, maxsplit) 397 398 if sep is not None and not sep: 399 raise ValueError, sep 400 401 seplen = sep and len(sep) or 1 402 start = seplen 403 splits = 0 404 405 l = [] 406 i = last = self.__len__() 407 408 while i >= start and (maxsplit is None or splits < maxsplit): 409 410 # Find any specified separator. 411 412 if sep and self[i-seplen:i] == sep: 413 l.insert(0, self[i:last]) 414 i -= seplen 415 last = i 416 splits += 1 417 418 # Find any whitespace character and skip adjacent characters. 419 420 elif not sep and self[i-1] in WHITESPACE: 421 l.insert(0, self[i:last]) 422 while i > start: 423 i -= 1 424 if self[i-1] not in WHITESPACE: 425 break 426 else: 427 break 428 last = i 429 splits += 1 430 431 # Check the next character. 432 433 else: 434 i -= 1 435 436 l.insert(0, self[:last]) 437 return l 438 439 def rstrip(self, chars=None): 440 441 """ 442 Strip any of the given 'chars' from the end of the string, or strip 443 whitespace characters is 'chars' is omitted or None. 444 """ 445 446 if chars is not None and not chars: 447 return self 448 449 i = self.__len__() - 1 450 451 while i >= 0 and self[i] in (chars or WHITESPACE): 452 i -= 1 453 454 return self[:i+1] 455 456 def split(self, sep=None, maxsplit=None): 457 458 """ 459 Split the string using the given 'sep' as separator (or any whitespace 460 character if omitted or specified as None), splitting at most 'maxsplit' 461 times (or as many times as is possible if omitted or specified as None). 462 Where 'maxsplit' is given, the number of split points is counted from 463 the start of the string. 464 """ 465 466 if sep is not None and not sep: 467 raise ValueError, sep 468 469 if maxsplit is not None and not maxsplit: 470 return [self] 471 472 seplen = sep and len(sep) or 1 473 end = self.__len__() - seplen 474 splits = 0 475 476 l = [] 477 i = last = 0 478 479 while i <= end and (maxsplit is None or splits < maxsplit): 480 481 # Find any specified separator. 482 483 if sep and self[i:i+seplen] == sep: 484 l.append(self[last:i]) 485 i += seplen 486 last = i 487 splits += 1 488 489 # Find any whitespace character and skip adjacent characters. 490 491 elif not sep and self[i] in WHITESPACE: 492 l.append(self[last:i]) 493 while i < end: 494 i += 1 495 if self[i] not in WHITESPACE: 496 break 497 else: 498 break 499 last = i 500 splits += 1 501 502 # Check the next character. 503 504 else: 505 i += 1 506 507 l.append(self[last:]) 508 return l 509 510 def splitlines(self, keepends=False): pass 511 512 def startswith(self, s): 513 514 "Return whether this string starts with 's'." 515 516 return self[:s.__len__()] == s 517 518 def strip(self, chars=None): 519 520 """ 521 Strip any of the given 'chars' from the start and end of the string, or 522 strip whitespace characters is 'chars' is omitted or None. 523 """ 524 525 return self.lstrip(chars).rstrip(chars) 526 527 def upper(self): pass 528 529 class string(basestring): 530 531 "A plain string of bytes." 532 533 # Special implementation methods. 534 535 def __get_single_item__(self, index): 536 537 "Return the item at the normalised (positive) 'index'." 538 539 self._check_index(index) 540 return str_substr(self.__data__, index, index + 1, 1) 541 542 def __get_multiple_items__(self, start, end, step): 543 544 """ 545 Return items from 'start' until (but excluding) 'end', at 'step' 546 intervals. 547 """ 548 549 if start == end: 550 return "" 551 552 check_int(step) 553 554 if step == 0: 555 raise ValueError(step) 556 557 l = get_using(basestring.__get_multiple_items__, self)(start, end, step) 558 return "".join(l) 559 560 def str(obj): 561 562 "Return the string representation of 'obj'." 563 564 # Class attributes of instances provide __str__. 565 566 return obj.__str__() 567 568 # vim: tabstop=4 expandtab shiftwidth=4