1 # -*- coding: ISO-8859-1 -*- 2 3 import sys 4 5 # Print bytes. 6 7 s = b"???" 8 print s # ??? 9 print len(s) # 3 10 11 # Obtain text and print it. 12 13 # Explicitly from bytes. 14 15 u = unicode("???", "ISO-8859-1") 16 print u # ??? 17 print u.__class__ # __builtins__.unicode.utf8string 18 print u.encode("ISO-8859-1") # ??? 19 print u.encoding # ISO-8859-1 20 print len(u) # 3 21 22 # Explicitly from Unicode literals. 23 24 u2 = u"???" 25 print u2 # ??? 26 print u2.__class__ # __builtins__.unicode.utf8string 27 print u2.encode("ISO-8859-1") # ??? 28 print u2.encoding # ISO-8859-1 29 print len(u2) # 3 30 31 # Implicitly from string literals. 32 33 u3 = "???" 34 print u3 # ??? 35 print u3.__class__ # __builtins__.unicode.utf8string 36 print u3.encode("ISO-8859-1") # ??? 37 print u3.encoding # ISO-8859-1 38 print len(u3) # 3 39 40 # Test invalid sequences. 41 42 try: 43 u4 = unicode(s, "UTF-8") 44 except UnicodeDecodeError, exc: 45 print "Attempt to decode", s, "as UTF-8 failed." 46 47 # Combine bytes and text. 48 # The text should be decoded. 49 50 su = s + u 51 print su # ?????? 52 print su.__class__ # __builtins__.str.string 53 print len(su) # 6 54 55 # Combine text and bytes. 56 # The text should be decoded. 57 58 us = u + s 59 print us # ?????? 60 print us.__class__ # __builtins__.str.string 61 print len(us) # 6 62 63 # Combine text and text. 64 65 uu2 = u + u2 66 print uu2 # ?????? 67 print uu2.__class__ # __builtins__.unicode.utf8string 68 print uu2.encoding # ISO-8859-1 69 print len(uu2) # 6 70 71 # Inspect and update the encoding of stdout. 72 # Note that su and us are byte strings and are not recoded. 73 74 print sys.stdout # <posix.io.sysstream instance> 75 print sys.stdout.encoding # None 76 77 sys.stdout.encoding = "ISO-8859-1" 78 print sys.stdout.encoding # ISO-8859-1 79 print u # ??? 80 print su # ?????? 81 print us # ?????? 82 83 sys.stdout.encoding = "UTF-8" 84 print sys.stdout.encoding # UTF-8 85 print u # ?????? 86 print su # ?????? 87 print us # ??????