1 #!/usr/bin/env python 2 3 """ 4 String objects. 5 6 Copyright (C) 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from __builtins__.operator import _negate 23 from __builtins__.sequence import hashable, itemaccess 24 from __builtins__.types import check_int 25 from native import str_add, str_lt, str_gt, str_eq, str_len, str_nonempty, \ 26 str_substr 27 28 WHITESPACE = (" ", "\f", "\n", "\r", "\t") 29 30 class basestring(hashable): 31 32 "The base class for all strings." 33 34 def __init__(self, other=None): 35 36 "Initialise the string, perhaps from 'other'." 37 38 # Note the __data__ member. Since strings are either initialised from 39 # literals or converted using routines defined for other types, no form 40 # of actual initialisation is performed here. 41 42 # NOTE: Cannot perform "other and other.__data__ or None" since the 43 # NOTE: __data__ attribute is not a normal attribute. 44 45 if other: 46 self.__data__ = other.__data__ 47 else: 48 self.__data__ = None 49 50 # Note the __key__ member. This is also initialised statically. Where 51 # a string is the same as an attribute name, the __key__ member contains 52 # attribute position and code details. 53 54 if other: 55 self.__key__ = other.__key__ 56 else: 57 self.__key__ = None 58 59 def __hash__(self): 60 61 "Return a value for hashing purposes." 62 63 return self._hashvalue(ord) 64 65 def _binary_op(self, op, other): 66 67 "Perform 'op' on this object and 'other' if appropriate." 68 69 # Refuse to operate on specialisations of this class. 70 71 if self.__class__ is not other.__class__: 72 return NotImplemented 73 74 # Otherwise, perform the operation on the operands' data. 75 76 else: 77 return op(self.__data__, other.__data__) 78 79 def _binary_op_rev(self, op, other): 80 81 "Perform 'op' on 'other' and this object if appropriate." 82 83 # Refuse to operate on specialisations of this class. 84 85 if self.__class__ is not other.__class__: 86 return NotImplemented 87 88 # Otherwise, perform the operation on the operands' data. 89 90 else: 91 return op(other.__data__, self.__data__) 92 93 def __iadd__(self, other): 94 95 "Return a string combining this string with 'other'." 96 97 return self._binary_op(str_add, other) 98 99 __add__ = __iadd__ 100 101 def __radd__(self, other): 102 103 "Return a string combining this string with 'other'." 104 105 return self._binary_op_rev(str_add, other) 106 107 def __mul__(self, other): 108 109 "Multiply the string by 'other'." 110 111 b = buffer() 112 113 while other > 0: 114 b.append(self) 115 other -= 1 116 117 return str(b) 118 119 __rmul__ = __mul__ 120 121 def __mod__(self, other): pass 122 def __rmod__(self, other): pass 123 124 def __lt__(self, other): 125 126 "Return whether this string is less than 'other'." 127 128 return self._binary_op(str_lt, other) 129 130 def __gt__(self, other): 131 132 "Return whether this string is greater than 'other'." 133 134 return self._binary_op(str_gt, other) 135 136 def __le__(self, other): 137 138 "Return whether this string is less than or equal to 'other'." 139 140 return _negate(self.__gt__(other)) 141 142 def __ge__(self, other): 143 144 "Return whether this string is greater than or equal to 'other'." 145 146 return _negate(self.__lt__(other)) 147 148 def __eq__(self, other): 149 150 "Return whether this string is equal to 'other'." 151 152 return self._binary_op(str_eq, other) 153 154 def __ne__(self, other): 155 156 "Return whether this string is not equal to 'other'." 157 158 return _negate(self.__eq__(other)) 159 160 def bytelength(self): 161 162 "Return the number of bytes in this string." 163 164 return str_len(self.__data__) 165 166 __len__ = bytelength 167 168 def __str__(self): 169 170 "Return a string representation." 171 172 return self 173 174 def __repr__(self): 175 176 "Return a program representation." 177 178 # NOTE: To be implemented with proper quoting. 179 b = buffer(['"', self, '"']) 180 return str(b) 181 182 def __bool__(self): 183 184 "Return whether the string provides any data." 185 186 return str_nonempty(self.__data__) 187 188 def __contains__(self, value): 189 190 "Return whether this string contains 'value'." 191 192 return self.find(value) != -1 193 194 def endswith(self, s): 195 196 "Return whether this string ends with 's'." 197 198 return self[-s.__len__():] == s 199 200 def find(self, sub, start=None, end=None): 201 202 """ 203 Find 'sub' in the string if it occurs from or after the 'start' position 204 (or 0, if omitted) and before the 'end' position (or the end of the 205 string, if omitted), returning the earliest occurrence or -1 if 'sub' is 206 not present. 207 """ 208 209 sublen = sub.__len__() 210 211 i = start or 0 212 213 if end is None: 214 end = self.__len__() 215 216 while i < end - sublen: 217 if sub == self[i:i+sublen]: 218 return i 219 i += 1 220 221 return -1 222 223 def index(self, sub, start=None, end=None): 224 225 """ 226 Find 'sub' in the string, starting at 'start' (or 0, if omitted), ending 227 at 'end' (or the end of the string, if omitted), raising ValueError if 228 'sub' is not present. 229 """ 230 231 i = self.find(sub, start, end) 232 233 if i == -1: 234 raise ValueError(sub) 235 else: 236 return i 237 238 def join(self, l): 239 240 "Join the elements in 'l' with this string." 241 242 # Empty strings just cause the list elements to be concatenated. 243 244 if not self.__bool__(): 245 return str(buffer(l)) 246 247 # Non-empty strings join the elements together in a buffer. 248 249 b = buffer() 250 first = True 251 252 for s in l: 253 if first: 254 first = False 255 else: 256 b.append(self) 257 b.append(s) 258 259 return str(b) 260 261 def lower(self): pass 262 def lstrip(self, chars=None): pass 263 def replace(self, old, new, count=None): pass 264 def rfind(self, sub, start=None, end=None): 265 266 """ 267 Find 'sub' in the string if it occurs from or after the 'start' position 268 (or 0, if omitted) and before the 'end' position (or the end of the 269 string, if omitted), returning the latest occurrence or -1 if 'sub' is 270 not present. 271 """ 272 273 sublen = sub.__len__() 274 275 start = start or 0 276 277 if end is None: 278 end = self.__len__() 279 280 i = end - sublen 281 282 while i >= start: 283 if sub == self[i:i+sublen]: 284 return i 285 i -= 1 286 287 return -1 288 289 def rsplit(self, sep=None, maxsplit=None): pass 290 def rstrip(self, chars=None): pass 291 292 def split(self, sep=None, maxsplit=None): 293 294 """ 295 Split the string using the given 'sep' as separator (or any whitespace 296 character if omitted or specified as None), splitting at most 'maxsplit' 297 times (or as many times as is possible if omitted or specified as None). 298 """ 299 300 if sep is not None and not sep: 301 raise ValueError, sep 302 303 end = self.__len__() 304 seplen = sep and len(sep) 305 splits = 0 306 307 l = [] 308 i = last = 0 309 310 while i < end and (maxsplit is None or splits < maxsplit): 311 312 # Find any specified separator. 313 314 if sep and self[i:i+seplen] == sep: 315 l.append(self[last:i]) 316 i += seplen 317 last = i 318 splits += 1 319 320 # Find any whitespace character and skip adjacent characters. 321 322 elif not sep and self[i] in WHITESPACE: 323 l.append(self[last:i]) 324 while i < end: 325 i += 1 326 if self[i] not in WHITESPACE: 327 break 328 else: 329 break 330 last = i 331 splits += 1 332 333 # Check the next character. 334 335 else: 336 i += 1 337 338 l.append(self[last:]) 339 return l 340 341 def splitlines(self, keepends=False): pass 342 343 def startswith(self, s): 344 345 "Return whether this string starts with 's'." 346 347 return self[:s.__len__()] == s 348 349 def strip(self, chars=None): pass 350 def upper(self): pass 351 352 class string(basestring): 353 354 "A plain string of bytes." 355 356 # Special implementation methods. 357 358 def __get_single_item__(self, index): 359 360 "Return the item at the normalised (positive) 'index'." 361 362 self._check_index(index) 363 return str_substr(self.__data__, index, index + 1, 1) 364 365 def __get_multiple_items__(self, start, end, step): 366 367 """ 368 Return items from 'start' until (but excluding) 'end', at 'step' 369 intervals. 370 """ 371 372 self._check_index(start) 373 self._check_end_index(end) 374 check_int(step) 375 376 if step == 0: 377 raise ValueError(step) 378 379 if start == end: 380 return "" 381 382 return str_substr(self.__data__, start, end, step) 383 384 def str(obj): 385 386 "Return the string representation of 'obj'." 387 388 # Class attributes of instances provide __str__. 389 390 return obj.__str__() 391 392 # vim: tabstop=4 expandtab shiftwidth=4