1 # -*- coding: ISO-8859-15 -*- 2 3 import sys 4 5 # Print bytes. 6 7 s = b"???" 8 print "ISO-8859-15 values:" 9 print s # ??? 10 print len(s) # 3 11 12 s1 = b"???" \ 13 "???" 14 print "ISO-8859-15 values:" 15 print s1 # ?????? 16 print len(s1) # 6 17 18 s2 = b"\xe6\xf8\xe5" 19 print "ISO-8859-15 values:" 20 print s2 # ??? 21 print s2.__class__ # __builtins__.str.string 22 print len(s2) # 3 23 24 s3 = "\xe6\xf8\xe5" 25 print "ISO-8859-15 values:" 26 print s3 # ??? 27 print s3.__class__ # __builtins__.str.string 28 print len(s3) # 3 29 30 s4 = b"\u00e6\u00f8\u00e5" 31 print "Untranslated values:" 32 print s4 # \u00e6\u00f8\u00e5 33 print s4.__class__ # __builtins__.str.string 34 print len(s4) # 18 35 36 s5 = b"\346\370\345" 37 print "ISO-8859-15 values:" 38 print s5 # ??? 39 print s5.__class__ # __builtins__.str.string 40 print len(s5) # 3 41 42 s6 = "\346\370\345" 43 print "ISO-8859-15 values:" 44 print s6 # ??? 45 print s6.__class__ # __builtins__.str.string 46 print len(s6) # 3 47 48 s7 = r"\346\370\345" 49 print "Untranslated values:" 50 print s7 # \346\370\345 51 print s7.__class__ # __builtins__.unicode.utf8string 52 print len(s7) # 12 53 54 # Obtain text and print it. 55 56 # Explicitly from bytes. 57 58 u = unicode(b"???", "ISO-8859-15") 59 print "Unicode values:" 60 print u # ??? 61 print u.__class__ # __builtins__.unicode.utf8string 62 print u.encode("ISO-8859-15") # ??? 63 print u.encoding # ISO-8859-15 64 print len(u) # 3 65 66 # Explicitly from Unicode literals. 67 68 u2 = u"???" 69 print "Unicode values:" 70 print u2 # ??? 71 print u2.__class__ # __builtins__.unicode.utf8string 72 print u2.encode("ISO-8859-15") # ??? 73 print u2.encoding # ISO-8859-15 74 print len(u2) # 3 75 76 # Implicitly from string literals. 77 78 u3 = "???" 79 print "Unicode values:" 80 print u3 # ??? 81 print u3.__class__ # __builtins__.unicode.utf8string 82 print u3.encode("ISO-8859-15") # ??? 83 print u3.encoding # ISO-8859-15 84 print len(u3) # 3 85 86 # Explicitly from implicitly-converted literal. 87 88 u4 = unicode("???", "ISO-8859-15") 89 print "Unicode values:" 90 print u4 # ??? 91 print u4.__class__ # __builtins__.unicode.utf8string 92 print u4.encode("ISO-8859-15") # ??? 93 print u4.encoding # ISO-8859-15 94 print len(u4) # 3 95 96 # Test Unicode values. 97 98 u5 = "\u00e6\u00f8\u00e5" 99 print "Unicode values:" 100 print u5 # ??? 101 print u5.__class__ # __builtins__.unicode.ut8string 102 print len(u5) # 3 103 104 # Test some untranslated values. 105 106 u6 = "\\u00e6\\u00f8\\u00e5" 107 print "Untranslated values:" 108 print u6 # \u00e6\u00f8\u00e5 109 print u6.__class__ # __builtins__.unicode.ut8string 110 print len(u6) # 18 111 112 # Test Unicode values. 113 114 u7 = u"\346\370\345" 115 print "Unicode values:" 116 print u7 # ??? 117 print u7.__class__ # __builtins__.unicode.ut8string 118 print len(u7) # 3 119 120 # Test Unicode values. 121 122 u8 = ur"\346\370\345" 123 print "Untranslated values:" 124 print u8 # \346\370\345 125 print u8.__class__ # __builtins__.unicode.ut8string 126 print len(u8) # 12 127 128 # Test invalid sequences. 129 130 try: 131 u9 = unicode(s, "UTF-8") 132 except UnicodeDecodeError, exc: 133 print "Attempt to decode", s, "as UTF-8 failed." 134 135 # Combine bytes and text. 136 # The text should be decoded. 137 138 su = s + u 139 print "ISO-8859-15 values:" 140 print su # ?????? 141 print su.__class__ # __builtins__.str.string 142 print len(su) # 6 143 144 # Combine text and bytes. 145 # The text should be decoded. 146 147 us = u + s 148 print "ISO-8859-15 values:" 149 print us # ?????? 150 print us.__class__ # __builtins__.str.string 151 print len(us) # 6 152 153 # Combine text and text. 154 155 uu2 = u + u2 156 print "Unicode values:" 157 print uu2 # ?????? 158 print uu2.__class__ # __builtins__.unicode.utf8string 159 print uu2.encoding # ISO-8859-15 160 print len(uu2) # 6 161 162 # Inspect and update the encoding of stdout. 163 # Note that su and us are byte strings and are not recoded. 164 165 print sys.stdout # <libc.io.sysstream instance> 166 print sys.stdout.encoding # None 167 168 sys.stdout.encoding = "ISO-8859-15" 169 print "ISO-8859-15 and Unicode values as ISO-8859-15:" 170 print sys.stdout.encoding # ISO-8859-15 171 print u # ??? 172 print su # ?????? 173 print us # ?????? 174 175 sys.stdout.encoding = "UTF-8" 176 print "Unicode values as UTF-8:" 177 print sys.stdout.encoding # UTF-8 178 print u # ?????? 179 print "ISO-8859-15 values bypassing UTF-8 output encoding:" 180 print su # ?????? 181 print us # ?????? 182 183 # Reset the encoding. 184 185 sys.stdout.encoding = "ISO-8859-15" 186 187 # Test character access. 188 189 u0 = u[0] 190 print u0.__class__ # __builtins__.unicode.utf8string 191 print u0.encoding # ISO-8859-15 192 print u0 # ? 193 print u[-1] # ? 194 print len(u[0]) # 1 195 print len(u[-1]) # 1 196 print u[:2] # ?? 197 print len(u[:2]) # 2 198 print u[-1::-1] # ??? 199 print len(u[-1::-1]) # 3 200 201 # Test character values. 202 203 print ord(u[0]) # 230 204 205 try: 206 print ord(u) # should raise an exception 207 except ValueError, exc: 208 print "ord(u): value is not appropriate", repr(exc.value) 209 210 euro = "?" 211 print euro # ? 212 print repr(euro) # "\u20ac" 213 print ord(euro) # 8364 214 print "\u20ac" # ? 215 print unichr(ord(euro)) # ? 216 print unichr(ord(euro)) == euro # True