paul@392 | 1 | # -*- coding: ISO-8859-1 -*- |
paul@392 | 2 | |
paul@392 | 3 | import sys |
paul@392 | 4 | |
paul@392 | 5 | # Print bytes. |
paul@392 | 6 | |
paul@396 | 7 | s = b"???" |
paul@396 | 8 | print s # ??? |
paul@403 | 9 | print len(s) # 3 |
paul@392 | 10 | |
paul@392 | 11 | # Obtain text and print it. |
paul@392 | 12 | |
paul@394 | 13 | # Explicitly from bytes. |
paul@394 | 14 | |
paul@396 | 15 | u = unicode("???", "ISO-8859-1") |
paul@406 | 16 | print u # ??? |
paul@403 | 17 | print u.__class__ # __builtins__.unicode.utf8string |
paul@392 | 18 | print u.encode("ISO-8859-1") # ??? |
paul@398 | 19 | print u.encoding # ISO-8859-1 |
paul@403 | 20 | print len(u) # 3 |
paul@392 | 21 | |
paul@394 | 22 | # Explicitly from Unicode literals. |
paul@394 | 23 | |
paul@394 | 24 | u2 = u"???" |
paul@406 | 25 | print u2 # ??? |
paul@403 | 26 | print u2.__class__ # __builtins__.unicode.utf8string |
paul@394 | 27 | print u2.encode("ISO-8859-1") # ??? |
paul@398 | 28 | print u2.encoding # ISO-8859-1 |
paul@403 | 29 | print len(u2) # 3 |
paul@394 | 30 | |
paul@394 | 31 | # Implicitly from string literals. |
paul@394 | 32 | |
paul@405 | 33 | u3 = "???" |
paul@406 | 34 | print u3 # ??? |
paul@405 | 35 | print u3.__class__ # __builtins__.unicode.utf8string |
paul@405 | 36 | print u3.encode("ISO-8859-1") # ??? |
paul@405 | 37 | print u3.encoding # ISO-8859-1 |
paul@405 | 38 | print len(u3) # 3 |
paul@394 | 39 | |
paul@410 | 40 | # Test invalid sequences. |
paul@410 | 41 | |
paul@410 | 42 | try: |
paul@410 | 43 | u4 = unicode(s, "UTF-8") |
paul@410 | 44 | except UnicodeDecodeError, exc: |
paul@410 | 45 | print "Attempt to decode", s, "as UTF-8 failed." |
paul@410 | 46 | |
paul@396 | 47 | # Combine bytes and text. |
paul@396 | 48 | # The text should be decoded. |
paul@396 | 49 | |
paul@396 | 50 | su = s + u |
paul@396 | 51 | print su # ?????? |
paul@398 | 52 | print su.__class__ # __builtins__.str.string |
paul@403 | 53 | print len(su) # 6 |
paul@396 | 54 | |
paul@396 | 55 | # Combine text and bytes. |
paul@396 | 56 | # The text should be decoded. |
paul@396 | 57 | |
paul@396 | 58 | us = u + s |
paul@396 | 59 | print us # ?????? |
paul@398 | 60 | print us.__class__ # __builtins__.str.string |
paul@403 | 61 | print len(us) # 6 |
paul@398 | 62 | |
paul@398 | 63 | # Combine text and text. |
paul@398 | 64 | |
paul@398 | 65 | uu2 = u + u2 |
paul@406 | 66 | print uu2 # ?????? |
paul@398 | 67 | print uu2.__class__ # __builtins__.unicode.utf8string |
paul@398 | 68 | print uu2.encoding # ISO-8859-1 |
paul@403 | 69 | print len(uu2) # 6 |
paul@396 | 70 | |
paul@392 | 71 | # Inspect and update the encoding of stdout. |
paul@398 | 72 | # Note that su and us are byte strings and are not recoded. |
paul@392 | 73 | |
paul@426 | 74 | print sys.stdout # <posix.io.sysstream instance> |
paul@392 | 75 | print sys.stdout.encoding # None |
paul@398 | 76 | |
paul@392 | 77 | sys.stdout.encoding = "ISO-8859-1" |
paul@398 | 78 | print sys.stdout.encoding # ISO-8859-1 |
paul@392 | 79 | print u # ??? |
paul@396 | 80 | print su # ?????? |
paul@396 | 81 | print us # ?????? |
paul@398 | 82 | |
paul@398 | 83 | sys.stdout.encoding = "UTF-8" |
paul@398 | 84 | print sys.stdout.encoding # UTF-8 |
paul@398 | 85 | print u # ?????? |
paul@398 | 86 | print su # ?????? |
paul@398 | 87 | print us # ?????? |