Lichen (file lib/__builtins__/str.py at 9ec67eea98b5)

     1 #!/usr/bin/env python     2      3 """     4 String objects.     5      6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from __builtins__.operator import _negate    23 from __builtins__.sequence import hashable, itemaccess    24 from __builtins__.types import check_int    25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_ord, \    26                    str_nonempty, str_substr    27     28 WHITESPACE = (" ", "\f", "\n", "\r", "\t")    29     30 class basestring(hashable):    31     32     "The base class for all strings."    33     34     def __init__(self, other=None):    35     36         "Initialise the string, perhaps from 'other'."    37     38         # Note the __data__ member. Since strings are either initialised from    39         # literals or converted using routines defined for other types, no form    40         # of actual initialisation is performed here.    41     42         # NOTE: Cannot perform "other and other.__data__ or None" since the    43         # NOTE: __data__ attribute is not a normal attribute.    44     45         if other:    46             self.__data__ = other.__data__    47         else:    48             self.__data__ = None    49     50         # Note the __key__ member. This is also initialised statically. Where    51         # a string is the same as an attribute name, the __key__ member contains    52         # attribute position and code details.    53     54         if other:    55             self.__key__ = other.__key__    56         else:    57             self.__key__ = None    58     59     # Internal methods.    60     61     def _binary_op(self, op, other):    62     63         "Perform 'op' on this object and 'other' if appropriate."    64     65         # Refuse to operate on specialisations of this class.    66     67         if self.__class__ is not other.__class__:    68             return NotImplemented    69     70         # Otherwise, perform the operation on the operands' data.    71     72         else:    73             return op(self.__data__, other.__data__)    74     75     def _binary_op_rev(self, op, other):    76     77         "Perform 'op' on 'other' and this object if appropriate."    78     79         # Refuse to operate on specialisations of this class.    80     81         if self.__class__ is not other.__class__:    82             return NotImplemented    83     84         # Otherwise, perform the operation on the operands' data.    85     86         else:    87             return op(other.__data__, self.__data__)    88     89     def _quote(self, quote):    90     91         "Return a quoted representation of this string."    92     93         b = buffer([quote])    94         i = last = 0    95         end = self.__len__()    96     97         while i < end:    98             c = self[i]    99    100             # Handle quotes before anything else.   101    102             if c == quote:   103                 b.append("\\")   104                 b.append(quote)   105                 i += 1   106                 last = i   107                 continue   108    109             # Extended unquoted text.   110    111             n = ord(c)   112    113             if 32 <= n < 128:   114                 i += 1   115                 continue   116    117             # Before quoting, emit unquoted text.   118    119             b.append(self[last:i])   120    121             # Add quoted value.   122    123             if c == "\t":   124                 b.append("\\t")   125             elif c == "\n":   126                 b.append("\\n")   127             elif c == "\r":   128                 b.append("\\r")   129             else:   130                 if n < 0:   131                     n += 256   132                 b.append("\\x")   133                 x = hex(n, "")   134                 if len(x) < 2:   135                     b.append("0")   136                 b.append(x)   137    138             i += 1   139             last = i   140    141         # Emit remaining unquoted text.   142    143         b.append(self[last:])   144         b.append(quote)   145         return str(b)   146    147     def bytelength(self):   148    149         "Return the number of bytes in this string."   150    151         return str_len(self.__data__)   152    153     # General type methods.   154    155     def __bool__(self):   156    157         "Return whether the string provides any data."   158    159         return str_nonempty(self.__data__)   160    161     def __contains__(self, value):   162    163         "Return whether this string contains 'value'."   164    165         return self.find(value) != -1   166    167     def __hash__(self):   168    169         "Return a value for hashing purposes."   170    171         return self._hashvalue(ord)   172    173     __len__ = bytelength   174    175     def __repr__(self):   176    177         "Return a program representation."   178    179         return self._quote('"')   180    181     def __str__(self):   182    183         "Return a string representation."   184    185         return self   186    187     # Operator methods.   188    189     def __iadd__(self, other):   190    191         "Return a string combining this string with 'other'."   192    193         return self._binary_op(str_add, other)   194    195     __add__ = __iadd__   196    197     def __radd__(self, other):   198    199         "Return a string combining this string with 'other'."   200    201         return self._binary_op_rev(str_add, other)   202    203     def __mod__(self, other): pass   204     def __rmod__(self, other): pass   205    206     def __mul__(self, other):   207    208         "Multiply the string by 'other'."   209    210         b = buffer()   211    212         while other > 0:   213             b.append(self)   214             other -= 1   215    216         return str(b)   217    218     __rmul__ = __mul__   219    220     # Comparison methods.   221    222     def __eq__(self, other):   223    224         "Return whether this string is equal to 'other'."   225    226         return self._binary_op(str_eq, other)   227    228     def __ge__(self, other):   229    230         "Return whether this string is greater than or equal to 'other'."   231    232         return _negate(self.__lt__(other))   233    234     def __gt__(self, other):   235    236         "Return whether this string is greater than 'other'."   237    238         return self._binary_op(str_gt, other)   239    240     def __le__(self, other):   241    242         "Return whether this string is less than or equal to 'other'."   243    244         return _negate(self.__gt__(other))   245    246     def __lt__(self, other):   247    248         "Return whether this string is less than 'other'."   249    250         return self._binary_op(str_lt, other)   251    252     def __ne__(self, other):   253    254         "Return whether this string is not equal to 'other'."   255    256         return _negate(self.__eq__(other))   257    258     # String-specific methods.   259    260     def __ord__(self):   261    262         "Return the value of the string, if only a single character."   263    264         if self.__len__() == 1:   265             return str_ord(self.__data__)   266         else:   267             raise ValueError, self   268    269     def endswith(self, s):   270    271         "Return whether this string ends with 's'."   272    273         return self[-s.__len__():] == s   274    275     def find(self, sub, start=None, end=None):   276    277         """   278         Find 'sub' in the string if it occurs from or after the 'start' position   279         (or 0, if omitted) and before the 'end' position (or the end of the   280         string, if omitted), returning the earliest occurrence or -1 if 'sub' is   281         not present.   282         """   283    284         sublen = sub.__len__()   285    286         if end is None:   287             end = self.__len__()   288    289         end -= sublen   290    291         i = start or 0   292    293         while i <= end:   294             if sub == self[i:i+sublen]:   295                 return i   296             i += 1   297    298         return -1   299    300     def index(self, sub, start=None, end=None):   301    302         """   303         Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending   304         at 'end' (or the end of the string, if omitted), raising ValueError if   305         'sub' is not present.   306         """   307    308         i = self.find(sub, start, end)   309    310         if i == -1:   311             raise ValueError(sub)   312         else:   313             return i   314    315     def join(self, l):   316    317         "Join the elements in 'l' with this string."   318    319         # Empty strings just cause the list elements to be concatenated.   320    321         if not self.__bool__():   322             return str(buffer(l))   323    324         # Non-empty strings join the elements together in a buffer.   325    326         b = buffer()   327         first = True   328    329         for s in l:   330             if first:   331                 first = False   332             else:   333                 b.append(self)   334             b.append(s)   335    336         return str(b)   337    338     def lower(self): pass   339    340     def lstrip(self, chars=None):   341    342         """   343         Strip any of the given 'chars' from the start of the string, or strip   344         whitespace characters is 'chars' is omitted or None.   345         """   346    347         if chars is not None and not chars:   348             return self   349    350         i = 0   351         end = self.__len__()   352    353         while i < end and self[i] in (chars or WHITESPACE):   354             i += 1   355    356         return self[i:]   357    358     def replace(self, old, new, count=None): pass   359    360     def rfind(self, sub, start=None, end=None):   361    362         """   363         Find 'sub' in the string if it occurs from or after the 'start' position   364         (or 0, if omitted) and before the 'end' position (or the end of the   365         string, if omitted), returning the latest occurrence or -1 if 'sub' is   366         not present.   367         """   368    369         sublen = sub.__len__()   370    371         start = start or 0   372    373         if end is None:   374             end = self.__len__()   375    376         i = end - sublen   377    378         while i >= start:   379             if sub == self[i:i+sublen]:   380                 return i   381             i -= 1   382    383         return -1   384    385     def rsplit(self, sep=None, maxsplit=None):   386    387         """   388         Split the string using the given 'sep' as separator (or any whitespace   389         character if omitted or specified as None), splitting at most 'maxsplit'   390         times (or as many times as is possible if omitted or specified as None).   391         Where 'maxsplit' is given, the number of split points is counted from   392         the end of the string.   393         """   394    395         if not maxsplit:   396             return self.split(sep, maxsplit)   397    398         if sep is not None and not sep:   399             raise ValueError, sep   400    401         seplen = sep and len(sep) or 1   402         start = seplen   403         splits = 0   404    405         l = []   406         i = last = self.__len__()   407    408         while i >= start and (maxsplit is None or splits < maxsplit):   409    410             # Find any specified separator.   411    412             if sep and self[i-seplen:i] == sep:   413                 l.insert(0, self[i:last])   414                 i -= seplen   415                 last = i   416                 splits += 1   417    418             # Find any whitespace character and skip adjacent characters.   419    420             elif not sep and self[i-1] in WHITESPACE:   421                 l.insert(0, self[i:last])   422                 while i > start:   423                     i -= 1   424                     if self[i-1] not in WHITESPACE:   425                         break   426                 else:   427                     break   428                 last = i   429                 splits += 1   430    431             # Check the next character.   432    433             else:   434                 i -= 1   435    436         l.insert(0, self[:last])   437         return l   438    439     def rstrip(self, chars=None):   440    441         """   442         Strip any of the given 'chars' from the end of the string, or strip   443         whitespace characters is 'chars' is omitted or None.   444         """   445    446         if chars is not None and not chars:   447             return self   448    449         i = self.__len__() - 1   450    451         while i >= 0 and self[i] in (chars or WHITESPACE):   452             i -= 1   453    454         return self[:i+1]   455    456     def split(self, sep=None, maxsplit=None):   457    458         """   459         Split the string using the given 'sep' as separator (or any whitespace   460         character if omitted or specified as None), splitting at most 'maxsplit'   461         times (or as many times as is possible if omitted or specified as None).   462         Where 'maxsplit' is given, the number of split points is counted from   463         the start of the string.   464         """   465    466         if sep is not None and not sep:   467             raise ValueError, sep   468    469         if maxsplit is not None and not maxsplit:   470             return [self]   471    472         seplen = sep and len(sep) or 1   473         end = self.__len__() - seplen   474         splits = 0   475    476         l = []   477         i = last = 0   478    479         while i <= end and (maxsplit is None or splits < maxsplit):   480    481             # Find any specified separator.   482    483             if sep and self[i:i+seplen] == sep:   484                 l.append(self[last:i])   485                 i += seplen   486                 last = i   487                 splits += 1   488    489             # Find any whitespace character and skip adjacent characters.   490    491             elif not sep and self[i] in WHITESPACE:   492                 l.append(self[last:i])   493                 while i < end:   494                     i += 1   495                     if self[i] not in WHITESPACE:   496                         break   497                 else:   498                     break   499                 last = i   500                 splits += 1   501    502             # Check the next character.   503    504             else:   505                 i += 1   506    507         l.append(self[last:])   508         return l   509    510     def splitlines(self, keepends=False): pass   511    512     def startswith(self, s):   513    514         "Return whether this string starts with 's'."   515    516         return self[:s.__len__()] == s   517    518     def strip(self, chars=None):   519    520         """   521         Strip any of the given 'chars' from the start and end of the string, or   522         strip whitespace characters is 'chars' is omitted or None.   523         """   524    525         return self.lstrip(chars).rstrip(chars)   526    527     def upper(self): pass   528    529 class string(basestring):   530    531     "A plain string of bytes."   532    533     # Special implementation methods.   534    535     def __get_single_item__(self, index):   536    537         "Return the item at the normalised (positive) 'index'."   538    539         self._check_index(index)   540         return str_substr(self.__data__, index, index + 1, 1)   541    542     def __get_multiple_items__(self, start, end, step):   543    544         """   545         Return items from 'start' until (but excluding) 'end', at 'step'   546         intervals.   547         """   548    549         if start == end:   550             return ""   551    552         check_int(step)   553    554         if step == 0:   555             raise ValueError(step)   556    557         l = get_using(basestring.__get_multiple_items__, self)(start, end, step)   558         return "".join(l)   559    560 def str(obj):   561    562     "Return the string representation of 'obj'."   563    564     # Class attributes of instances provide __str__.   565    566     return obj.__str__()   567    568 # vim: tabstop=4 expandtab shiftwidth=4