Lichen

Annotated tests/unicode.py

426:6d5de0f9144e
2016-12-17 Paul Boddie Test for the initialisation of the sys module.
paul@392 1
# -*- coding: ISO-8859-1 -*-
paul@392 2
paul@392 3
import sys
paul@392 4
paul@392 5
# Print bytes.
paul@392 6
paul@396 7
s = b"???"
paul@396 8
print s                             # ???
paul@403 9
print len(s)                        # 3
paul@392 10
paul@392 11
# Obtain text and print it.
paul@392 12
paul@394 13
# Explicitly from bytes.
paul@394 14
paul@396 15
u = unicode("???", "ISO-8859-1")
paul@406 16
print u                             # ???
paul@403 17
print u.__class__                   # __builtins__.unicode.utf8string
paul@392 18
print u.encode("ISO-8859-1")        # ???
paul@398 19
print u.encoding                    # ISO-8859-1
paul@403 20
print len(u)                        # 3
paul@392 21
paul@394 22
# Explicitly from Unicode literals.
paul@394 23
paul@394 24
u2 = u"???"
paul@406 25
print u2                            # ???
paul@403 26
print u2.__class__                  # __builtins__.unicode.utf8string
paul@394 27
print u2.encode("ISO-8859-1")       # ???
paul@398 28
print u2.encoding                   # ISO-8859-1
paul@403 29
print len(u2)                       # 3
paul@394 30
paul@394 31
# Implicitly from string literals.
paul@394 32
paul@405 33
u3 = "???"
paul@406 34
print u3                            # ???
paul@405 35
print u3.__class__                  # __builtins__.unicode.utf8string
paul@405 36
print u3.encode("ISO-8859-1")       # ???
paul@405 37
print u3.encoding                   # ISO-8859-1
paul@405 38
print len(u3)                       # 3
paul@394 39
paul@410 40
# Test invalid sequences.
paul@410 41
paul@410 42
try:
paul@410 43
    u4 = unicode(s, "UTF-8")
paul@410 44
except UnicodeDecodeError, exc:
paul@410 45
    print "Attempt to decode", s, "as UTF-8 failed."
paul@410 46
paul@396 47
# Combine bytes and text.
paul@396 48
# The text should be decoded.
paul@396 49
paul@396 50
su = s + u
paul@396 51
print su                            # ??????
paul@398 52
print su.__class__                  # __builtins__.str.string
paul@403 53
print len(su)                       # 6
paul@396 54
paul@396 55
# Combine text and bytes.
paul@396 56
# The text should be decoded.
paul@396 57
paul@396 58
us = u + s
paul@396 59
print us                            # ??????
paul@398 60
print us.__class__                  # __builtins__.str.string
paul@403 61
print len(us)                       # 6
paul@398 62
paul@398 63
# Combine text and text.
paul@398 64
paul@398 65
uu2 = u + u2
paul@406 66
print uu2                           # ??????
paul@398 67
print uu2.__class__                 # __builtins__.unicode.utf8string
paul@398 68
print uu2.encoding                  # ISO-8859-1
paul@403 69
print len(uu2)                      # 6
paul@396 70
paul@392 71
# Inspect and update the encoding of stdout.
paul@398 72
# Note that su and us are byte strings and are not recoded.
paul@392 73
paul@426 74
print sys.stdout                    # <posix.io.sysstream instance>
paul@392 75
print sys.stdout.encoding           # None
paul@398 76
paul@392 77
sys.stdout.encoding = "ISO-8859-1"
paul@398 78
print sys.stdout.encoding           # ISO-8859-1
paul@392 79
print u                             # ???
paul@396 80
print su                            # ??????
paul@396 81
print us                            # ??????
paul@398 82
paul@398 83
sys.stdout.encoding = "UTF-8"
paul@398 84
print sys.stdout.encoding           # UTF-8
paul@398 85
print u                             # ??????
paul@398 86
print su                            # ??????
paul@398 87
print us                            # ??????