Lichen

tests/unicode.py

612:97ec110d65cf
2017-02-23 Paul Boddie Added a "reset all" option removing the data directory. Added option synonyms. method-wrapper-for-context
     1 # -*- coding: ISO-8859-15 -*-     2      3 import sys     4      5 # Print bytes.     6      7 s = b"???"     8 print "ISO-8859-15 values:"     9 print s                             # ???    10 print len(s)                        # 3    11     12 s1 = b"???" \    13       "???"    14 print "ISO-8859-15 values:"    15 print s1                            # ??????    16 print len(s1)                       # 6    17     18 s2 = b"\xe6\xf8\xe5"    19 print "ISO-8859-15 values:"    20 print s2                            # ???    21 print s2.__class__                  # __builtins__.str.string    22 print len(s2)                       # 3    23     24 s3 = "\xe6\xf8\xe5"    25 print "ISO-8859-15 values:"    26 print s3                            # ???    27 print s3.__class__                  # __builtins__.str.string    28 print len(s3)                       # 3    29     30 s4 = b"\u00e6\u00f8\u00e5"    31 print "Untranslated values:"    32 print s4                            # \u00e6\u00f8\u00e5    33 print s4.__class__                  # __builtins__.str.string    34 print len(s4)                       # 18    35     36 s5 = b"\346\370\345"    37 print "ISO-8859-15 values:"    38 print s5                            # ???    39 print s5.__class__                  # __builtins__.str.string    40 print len(s5)                       # 3    41     42 s6 = "\346\370\345"    43 print "ISO-8859-15 values:"    44 print s6                            # ???    45 print s6.__class__                  # __builtins__.str.string    46 print len(s6)                       # 3    47     48 s7 = r"\346\370\345"    49 print "Untranslated values:"    50 print s7                            # \346\370\345    51 print s7.__class__                  # __builtins__.unicode.utf8string    52 print len(s7)                       # 12    53     54 # Obtain text and print it.    55     56 # Explicitly from bytes.    57     58 u = unicode(b"???", "ISO-8859-15")    59 print "Unicode values:"    60 print u                             # ???    61 print u.__class__                   # __builtins__.unicode.utf8string    62 print u.encode("ISO-8859-15")       # ???    63 print u.encoding                    # ISO-8859-15    64 print len(u)                        # 3    65     66 # Explicitly from Unicode literals.    67     68 u2 = u"???"    69 print "Unicode values:"    70 print u2                            # ???    71 print u2.__class__                  # __builtins__.unicode.utf8string    72 print u2.encode("ISO-8859-15")      # ???    73 print u2.encoding                   # ISO-8859-15    74 print len(u2)                       # 3    75     76 # Implicitly from string literals.    77     78 u3 = "???"    79 print "Unicode values:"    80 print u3                            # ???    81 print u3.__class__                  # __builtins__.unicode.utf8string    82 print u3.encode("ISO-8859-15")      # ???    83 print u3.encoding                   # ISO-8859-15    84 print len(u3)                       # 3    85     86 # Explicitly from implicitly-converted literal.    87     88 u4 = unicode("???", "ISO-8859-15")    89 print "Unicode values:"    90 print u4                            # ???    91 print u4.__class__                  # __builtins__.unicode.utf8string    92 print u4.encode("ISO-8859-15")      # ???    93 print u4.encoding                   # ISO-8859-15    94 print len(u4)                       # 3    95     96 # Test Unicode values.    97     98 u5 = "\u00e6\u00f8\u00e5"    99 print "Unicode values:"   100 print u5                            # ???   101 print u5.__class__                  # __builtins__.unicode.ut8string   102 print len(u5)                       # 3   103    104 # Test some untranslated values.   105    106 u6 = "\\u00e6\\u00f8\\u00e5"   107 print "Untranslated values:"   108 print u6                            # \u00e6\u00f8\u00e5   109 print u6.__class__                  # __builtins__.unicode.ut8string   110 print len(u6)                       # 18   111    112 # Test Unicode values.   113    114 u7 = u"\346\370\345"   115 print "Unicode values:"   116 print u7                            # ???   117 print u7.__class__                  # __builtins__.unicode.ut8string   118 print len(u7)                       # 3   119    120 # Test Unicode values.   121    122 u8 = ur"\346\370\345"   123 print "Untranslated values:"   124 print u8                            # \346\370\345   125 print u8.__class__                  # __builtins__.unicode.ut8string   126 print len(u8)                       # 12   127    128 # Test invalid sequences.   129    130 try:   131     u9 = unicode(s, "UTF-8")   132 except UnicodeDecodeError, exc:   133     print "Attempt to decode", s, "as UTF-8 failed."   134    135 # Combine bytes and text.   136 # The text should be decoded.   137    138 su = s + u   139 print "ISO-8859-15 values:"   140 print su                            # ??????   141 print su.__class__                  # __builtins__.str.string   142 print len(su)                       # 6   143    144 # Combine text and bytes.   145 # The text should be decoded.   146    147 us = u + s   148 print "ISO-8859-15 values:"   149 print us                            # ??????   150 print us.__class__                  # __builtins__.str.string   151 print len(us)                       # 6   152    153 # Combine text and text.   154    155 uu2 = u + u2   156 print "Unicode values:"   157 print uu2                           # ??????   158 print uu2.__class__                 # __builtins__.unicode.utf8string   159 print uu2.encoding                  # ISO-8859-15   160 print len(uu2)                      # 6   161    162 # Inspect and update the encoding of stdout.   163 # Note that su and us are byte strings and are not recoded.   164    165 print sys.stdout                    # <libc.io.sysstream instance>   166 print sys.stdout.encoding           # None   167    168 sys.stdout.encoding = "ISO-8859-15"   169 print "ISO-8859-15 and Unicode values as ISO-8859-15:"   170 print sys.stdout.encoding           # ISO-8859-15   171 print u                             # ???   172 print su                            # ??????   173 print us                            # ??????   174    175 sys.stdout.encoding = "UTF-8"   176 print "Unicode values as UTF-8:"   177 print sys.stdout.encoding           # UTF-8   178 print u                             # ??????   179 print "ISO-8859-15 values bypassing UTF-8 output encoding:"   180 print su                            # ??????   181 print us                            # ??????   182    183 # Reset the encoding.   184    185 sys.stdout.encoding = "ISO-8859-15"   186    187 # Test character access.   188    189 u0 = u[0]   190 print u0.__class__                  # __builtins__.unicode.utf8string   191 print u0.encoding                   # ISO-8859-15   192 print u0                            # ?   193 print u[-1]                         # ?   194 print len(u[0])                     # 1   195 print len(u[-1])                    # 1   196 print u[:2]                         # ??   197 print len(u[:2])                    # 2   198 print u[-1::-1]                     # ???   199 print len(u[-1::-1])                # 3   200    201 # Test character values.   202    203 print ord(u[0])                     # 230   204    205 try:   206     print ord(u)                    # should raise an exception   207 except ValueError, exc:   208     print "ord(u): value is not appropriate", repr(exc.value)   209    210 euro = "?"   211 print euro                          # ?   212 print repr(euro)                    # "\u20ac"   213 print ord(euro)                     # 8364   214 print "\u20ac"                      # ?   215 print unichr(ord(euro))             # ?   216 print unichr(ord(euro)) == euro     # True