1.1 --- a/lib/__builtins__/unicode.py Tue Dec 13 17:58:26 2016 +0100
1.2 +++ b/lib/__builtins__/unicode.py Tue Dec 13 19:19:23 2016 +0100
1.3 @@ -21,14 +21,72 @@
1.4
1.5 from __builtins__.str import basestring
1.6 from posix.iconv import Converter
1.7 +from native import str_add, isinstance as _isinstance
1.8
1.9 class utf8string(basestring):
1.10
1.11 "A character string representation based on UTF-8."
1.12
1.13 - def encode(self, encoding):
1.14 + def __init__(self, other=None, encoding=None):
1.15 +
1.16 + """
1.17 + Initialise the string, perhaps from 'other', with any original
1.18 + 'encoding' indicated.
1.19 + """
1.20 +
1.21 + get_using(basestring.__init__, self)(other)
1.22 + self.encoding = encoding
1.23 +
1.24 + def _binary_op(self, op, other):
1.25 +
1.26 + "Perform 'op' on this object and 'other' if appropriate."
1.27 +
1.28 + # Reject non-strings.
1.29 +
1.30 + if not _isinstance(other, basestring):
1.31 + return NotImplemented
1.32 +
1.33 + # Combining text with bytes.
1.34 +
1.35 + elif not _isinstance(other, utf8string):
1.36 + s = self.encode()
1.37 + return op(s.__data__, other.__data__)
1.38 +
1.39 + # Otherwise, perform the operation on the operands' data.
1.40 +
1.41 + else:
1.42 + return op(self.__data__, other.__data__)
1.43
1.44 - "Encode the string to the given 'encoding'."
1.45 + def _binary_op_rev(self, op, other):
1.46 +
1.47 + "Perform 'op' on 'other' and this object if appropriate."
1.48 +
1.49 + # Reject non-strings.
1.50 +
1.51 + if not _isinstance(other, basestring):
1.52 + return NotImplemented
1.53 +
1.54 + # Combining text with bytes.
1.55 +
1.56 + elif not _isinstance(other, utf8string):
1.57 + s = self.encode()
1.58 + return op(other.__data__, s.__data__)
1.59 +
1.60 + # Otherwise, perform the operation on the operands' data.
1.61 +
1.62 + else:
1.63 + return op(other.__data__, self.__data__)
1.64 +
1.65 + def encode(self, encoding=None):
1.66 +
1.67 + """
1.68 + Encode the string to the given 'encoding' or any original encoding if
1.69 + omitted.
1.70 + """
1.71 +
1.72 + encoding = encoding or self.encoding
1.73 + if not encoding:
1.74 + return self
1.75
1.76 from_utf8 = Converter("UTF-8", encoding)
1.77
1.78 @@ -56,7 +114,7 @@
1.79
1.80 try:
1.81 to_utf8.feed(s)
1.82 - return utf8string(str(to_utf8))
1.83 + return utf8string(str(to_utf8), encoding)
1.84
1.85 finally:
1.86 to_utf8.close()