1 # -*- coding: ISO-8859-1 -*- 2 3 import sys 4 5 # Print bytes. 6 7 s = b"???" 8 print "ISO-8859-1 values:" 9 print s # ??? 10 print len(s) # 3 11 12 s2 = b"\xe6\xf8\xe5" 13 print "ISO-8859-1 values:" 14 print s2 # ??? 15 print s2.__class__ # __builtins__.str.string 16 print len(s2) # 3 17 18 s3 = "\xe6\xf8\xe5" 19 print "ISO-8859-1 values:" 20 print s3 # ??? 21 print s3.__class__ # __builtins__.str.string 22 print len(s3) # 3 23 24 s4 = b"\u00e6\u00f8\u00e5" 25 print "Untranslated values:" 26 print s4 # \u00e6\u00f8\u00e5 27 print s4.__class__ # __builtins__.str.string 28 print len(s4) # 18 29 30 s5 = b"\346\370\345" 31 print "ISO-8859-1 values:" 32 print s5 # ??? 33 print s5.__class__ # __builtins__.str.string 34 print len(s5) # 3 35 36 s6 = "\346\370\345" 37 print "ISO-8859-1 values:" 38 print s6 # ??? 39 print s6.__class__ # __builtins__.str.string 40 print len(s6) # 3 41 42 s7 = r"\346\370\345" 43 print "Untranslated values:" 44 print s7 # \346\370\345 45 print s7.__class__ # __builtins__.unicode.utf8string 46 print len(s7) # 12 47 48 # Obtain text and print it. 49 50 # Explicitly from bytes. 51 52 u = unicode(b"???", "ISO-8859-1") 53 print "Unicode values:" 54 print u # ??? 55 print u.__class__ # __builtins__.unicode.utf8string 56 print u.encode("ISO-8859-1") # ??? 57 print u.encoding # ISO-8859-1 58 print len(u) # 3 59 60 # Explicitly from Unicode literals. 61 62 u2 = u"???" 63 print "Unicode values:" 64 print u2 # ??? 65 print u2.__class__ # __builtins__.unicode.utf8string 66 print u2.encode("ISO-8859-1") # ??? 67 print u2.encoding # ISO-8859-1 68 print len(u2) # 3 69 70 # Implicitly from string literals. 71 72 u3 = "???" 73 print "Unicode values:" 74 print u3 # ??? 75 print u3.__class__ # __builtins__.unicode.utf8string 76 print u3.encode("ISO-8859-1") # ??? 77 print u3.encoding # ISO-8859-1 78 print len(u3) # 3 79 80 # Explicitly from implicitly-converted literal. 81 82 u4 = unicode("???", "ISO-8859-1") 83 print "Unicode values:" 84 print u4 # ??? 85 print u4.__class__ # __builtins__.unicode.utf8string 86 print u4.encode("ISO-8859-1") # ??? 87 print u4.encoding # ISO-8859-1 88 print len(u4) # 3 89 90 # Test Unicode values. 91 92 u5 = "\u00e6\u00f8\u00e5" 93 print "Unicode values:" 94 print u5 # ??? 95 print u5.__class__ # __builtins__.unicode.ut8string 96 print len(u5) # 3 97 98 # Test some untranslated values. 99 100 u6 = "\\u00e6\\u00f8\\u00e5" 101 print "Untranslated values:" 102 print u6 # \u00e6\u00f8\u00e5 103 print u6.__class__ # __builtins__.unicode.ut8string 104 print len(u6) # 18 105 106 # Test Unicode values. 107 108 u7 = u"\346\370\345" 109 print "Unicode values:" 110 print u7 # ??? 111 print u7.__class__ # __builtins__.unicode.ut8string 112 print len(u7) # 3 113 114 # Test Unicode values. 115 116 u8 = ur"\346\370\345" 117 print "Untranslated values:" 118 print u8 # \346\370\345 119 print u8.__class__ # __builtins__.unicode.ut8string 120 print len(u8) # 12 121 122 # Test invalid sequences. 123 124 try: 125 u9 = unicode(s, "UTF-8") 126 except UnicodeDecodeError, exc: 127 print "Attempt to decode", s, "as UTF-8 failed." 128 129 # Combine bytes and text. 130 # The text should be decoded. 131 132 su = s + u 133 print "ISO-8859-1 values:" 134 print su # ?????? 135 print su.__class__ # __builtins__.str.string 136 print len(su) # 6 137 138 # Combine text and bytes. 139 # The text should be decoded. 140 141 us = u + s 142 print "ISO-8859-1 values:" 143 print us # ?????? 144 print us.__class__ # __builtins__.str.string 145 print len(us) # 6 146 147 # Combine text and text. 148 149 uu2 = u + u2 150 print "Unicode values:" 151 print uu2 # ?????? 152 print uu2.__class__ # __builtins__.unicode.utf8string 153 print uu2.encoding # ISO-8859-1 154 print len(uu2) # 6 155 156 # Inspect and update the encoding of stdout. 157 # Note that su and us are byte strings and are not recoded. 158 159 print sys.stdout # <libc.io.sysstream instance> 160 print sys.stdout.encoding # None 161 162 sys.stdout.encoding = "ISO-8859-1" 163 print "ISO-8859-1 and Unicode values as ISO-8859-1:" 164 print sys.stdout.encoding # ISO-8859-1 165 print u # ??? 166 print su # ?????? 167 print us # ?????? 168 169 sys.stdout.encoding = "UTF-8" 170 print "Unicode values as UTF-8:" 171 print sys.stdout.encoding # UTF-8 172 print u # ?????? 173 print "ISO-8859-1 values bypassing UTF-8 output encoding:" 174 print su # ?????? 175 print us # ?????? 176 177 # Reset the encoding. 178 179 sys.stdout.encoding = "ISO-8859-1" 180 181 # Test character access. 182 183 u0 = u[0] 184 print u0.__class__ # __builtins__.unicode.utf8string 185 print u0.encoding # ISO-8859-1 186 print u0 # ? 187 print u[-1] # ? 188 print len(u[0]) # 1 189 print len(u[-1]) # 1 190 print u[:2] # ?? 191 print len(u[:2]) # 2 192 print u[-1::-1] # ??? 193 print len(u[-1::-1]) # 3 194 195 # Test character values. 196 197 print ord(u[0]) # 230 198 199 try: 200 print ord(u) # should raise an exception 201 except ValueError, exc: 202 print "ord(u): value is not appropriate", repr(exc.value)