Lichen

tests/iconv.py

583:aed28d04304d
2017-02-13 Paul Boddie Re-added size information to string instances as the __size__ attribute. This fixes problems introduced when using strlen on data likely to contain embedded nulls, which was the reason for having size information explicitly stored in the first place. attr-strvalue-without-size
     1 # -*- coding: ISO-8859-1 -*-     2      3 from posix.iconv import Converter     4      5 only_utf8 = Converter("UTF-8", "UTF-8")     6 to_utf8 = Converter("ISO-8859-1", "UTF-8")     7 to_utf16 = Converter("ISO-8859-1", "UTF-16")     8 from_utf8 = Converter("UTF-8", "ISO-8859-1")     9 from_utf16 = Converter("UTF-16", "ISO-8859-1")    10     11 try:    12     iso = b"???"    13     print iso                           # ???    14     to_utf8.feed(iso)    15     utf8 = str(to_utf8)    16     print utf8                          # ??????    17     from_utf8.feed(utf8)    18     print str(from_utf8)                # ???    19     to_utf16.feed(iso)    20     utf16 = str(to_utf16)    21     print utf16                         # ...    22     from_utf16.feed(utf16)    23     print str(from_utf16)               # ???    24     25     # Convert UTF-8 to UTF-8.    26     27     only_utf8.feed(utf8)    28     utf8_2 = str(only_utf8)    29     print utf8_2                        # ??????    30     31     # Convert part of a UTF-16 sequence, then convert the remainder, then obtain    32     # the result.    33     34     first = utf16[:3]    35     second = utf16[3:]    36     37     from_utf16.reset()    38     print "first:", first               # ...    39     from_utf16.feed(first)              # should have handled an incomplete input    40     print "second:", second             # ...    41     from_utf16.feed(second)             # should have handled the complete input    42     print str(from_utf16)               # ???    43     44     # Convert part of a UTF-8 sequence, then the remainder, then get the result.    45     46     first = utf8[:3]    47     second = utf8[3:]    48     49     from_utf8.reset()    50     print "first:", first               # ???    51     from_utf8.feed(first)               # should have handled an incomplete input    52     print "second:", second             # ???    53     from_utf8.feed(second)              # should have handled the complete input    54     print str(from_utf8)                # ???    55     56     # Attempt to convert ISO-8859-1 characters as if they were UTF-8.    57     58     from_utf8.reset()    59     60     try:    61         from_utf8.feed(iso)             # should raise an exception    62     except UnicodeDecodeError, exc:    63         print "Not UTF-8 input:", exc.value    64     except OSError, exc:    65         print "OSError:", exc.value    66     67     print str(from_utf8)                #    68     69     # Attempt to convert ISO-8859-1 characters following some UTF-8 ones.    70     71     to_utf8.reset()    72     to_utf8.feed("???")    73     utf8_2 = str(to_utf8)    74     75     from_utf8.reset()    76     77     try:    78         from_utf8.feed(utf8_2 + iso)    # should raise an exception    79     except UnicodeDecodeError, exc:    80         print "Not UTF-8 input:", exc.value    81     except OSError, exc:    82         print "OSError:", exc.value    83     84     print str(from_utf8)                #    85     86 finally:    87     to_utf8.close()    88     to_utf16.close()    89     from_utf8.close()    90     from_utf16.close()    91     92 try:    93     Converter("horses", "giraffes")    94 except OSError, exc:    95     print 'Converter("horses", "giraffes"): not valid encodings; error is', exc.value