2000-06-30 17:45:20 +08:00
|
|
|
""" Test script for the Unicode implementation.
|
|
|
|
|
|
|
|
Written by Bill Tutt.
|
2001-01-24 15:59:11 +08:00
|
|
|
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
|
2000-06-30 17:45:20 +08:00
|
|
|
|
|
|
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
|
|
|
|
|
|
|
"""#"
|
2001-01-18 03:11:13 +08:00
|
|
|
from test_support import verify, verbose
|
|
|
|
|
2000-06-30 17:45:20 +08:00
|
|
|
print 'Testing General Unicode Character Name, and case insensitivity...',
|
|
|
|
|
|
|
|
# General and case insensitivity test:
|
2001-01-20 19:15:25 +08:00
|
|
|
try:
|
|
|
|
# put all \N escapes inside exec'd raw strings, to make sure this
|
|
|
|
# script runs even if the compiler chokes on \N escapes
|
|
|
|
exec r"""
|
2000-06-30 17:45:20 +08:00
|
|
|
s = u"\N{LATIN CAPITAL LETTER T}" \
|
|
|
|
u"\N{LATIN SMALL LETTER H}" \
|
|
|
|
u"\N{LATIN SMALL LETTER E}" \
|
|
|
|
u"\N{SPACE}" \
|
|
|
|
u"\N{LATIN SMALL LETTER R}" \
|
|
|
|
u"\N{LATIN CAPITAL LETTER E}" \
|
|
|
|
u"\N{LATIN SMALL LETTER D}" \
|
|
|
|
u"\N{SPACE}" \
|
|
|
|
u"\N{LATIN SMALL LETTER f}" \
|
|
|
|
u"\N{LATIN CAPITAL LeTtEr o}" \
|
|
|
|
u"\N{LATIN SMaLl LETTER x}" \
|
|
|
|
u"\N{SPACE}" \
|
|
|
|
u"\N{LATIN SMALL LETTER A}" \
|
|
|
|
u"\N{LATIN SMALL LETTER T}" \
|
|
|
|
u"\N{LATIN SMALL LETTER E}" \
|
|
|
|
u"\N{SPACE}" \
|
|
|
|
u"\N{LATIN SMALL LETTER T}" \
|
|
|
|
u"\N{LATIN SMALL LETTER H}" \
|
|
|
|
u"\N{LATIN SMALL LETTER E}" \
|
|
|
|
u"\N{SpAcE}" \
|
|
|
|
u"\N{LATIN SMALL LETTER S}" \
|
|
|
|
u"\N{LATIN SMALL LETTER H}" \
|
|
|
|
u"\N{LATIN SMALL LETTER E}" \
|
|
|
|
u"\N{LATIN SMALL LETTER E}" \
|
|
|
|
u"\N{LATIN SMALL LETTER P}" \
|
|
|
|
u"\N{FULL STOP}"
|
2001-01-18 03:11:13 +08:00
|
|
|
verify(s == u"The rEd fOx ate the sheep.", s)
|
2001-01-20 19:15:25 +08:00
|
|
|
"""
|
|
|
|
except UnicodeError, v:
|
|
|
|
print v
|
2001-01-19 19:13:46 +08:00
|
|
|
print "done."
|
2001-01-19 19:00:42 +08:00
|
|
|
|
2001-01-24 15:59:11 +08:00
|
|
|
import unicodedata
|
2001-01-19 19:00:42 +08:00
|
|
|
|
2001-01-19 19:13:46 +08:00
|
|
|
print "Testing name to code mapping....",
|
2001-01-19 19:00:42 +08:00
|
|
|
for char in "SPAM":
|
|
|
|
name = "LATIN SMALL LETTER %s" % char
|
2001-01-24 15:59:11 +08:00
|
|
|
code = unicodedata.lookup(name)
|
|
|
|
verify(unicodedata.name(code) == name)
|
2001-01-19 19:13:46 +08:00
|
|
|
print "done."
|
2001-01-19 19:00:42 +08:00
|
|
|
|
2001-01-19 19:13:46 +08:00
|
|
|
print "Testing code to name mapping for all characters....",
|
|
|
|
count = 0
|
2001-01-19 19:00:42 +08:00
|
|
|
for code in range(65536):
|
|
|
|
try:
|
2001-01-24 15:59:11 +08:00
|
|
|
char = unichr(code)
|
|
|
|
name = unicodedata.name(char)
|
|
|
|
verify(unicodedata.lookup(name) == char)
|
2001-01-19 19:13:46 +08:00
|
|
|
count += 1
|
2001-01-24 15:59:11 +08:00
|
|
|
except (KeyError, ValueError):
|
2001-01-19 19:00:42 +08:00
|
|
|
pass
|
2000-06-30 17:45:20 +08:00
|
|
|
print "done."
|
|
|
|
|
2001-01-19 19:13:46 +08:00
|
|
|
print "Found", count, "characters in the unicode name database"
|
|
|
|
|
2000-06-30 17:45:20 +08:00
|
|
|
# misc. symbol testing
|
|
|
|
print "Testing misc. symbols for unicode character name expansion....",
|
2001-01-20 19:15:25 +08:00
|
|
|
exec r"""
|
2001-01-18 03:11:13 +08:00
|
|
|
verify(u"\N{PILCROW SIGN}" == u"\u00b6")
|
|
|
|
verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
|
|
|
|
verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
|
|
|
|
verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
|
2001-01-20 19:15:25 +08:00
|
|
|
"""
|
2000-06-30 17:45:20 +08:00
|
|
|
print "done."
|
|
|
|
|
|
|
|
# strict error testing:
|
|
|
|
print "Testing unicode character name expansion strict error handling....",
|
|
|
|
try:
|
2001-01-19 17:45:02 +08:00
|
|
|
unicode("\N{blah}", 'unicode-escape', 'strict')
|
2000-06-30 17:45:20 +08:00
|
|
|
except UnicodeError:
|
2000-10-24 01:22:08 +08:00
|
|
|
pass
|
2000-06-30 17:45:20 +08:00
|
|
|
else:
|
2001-01-19 17:45:02 +08:00
|
|
|
raise AssertionError, "failed to raise an exception when given a bogus character name"
|
|
|
|
|
2000-06-30 17:45:20 +08:00
|
|
|
try:
|
2001-01-19 17:45:02 +08:00
|
|
|
unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
|
2000-06-30 17:45:20 +08:00
|
|
|
except UnicodeError:
|
2000-10-24 01:22:08 +08:00
|
|
|
pass
|
2000-06-30 17:45:20 +08:00
|
|
|
else:
|
2001-01-19 17:45:02 +08:00
|
|
|
raise AssertionError, "failed to raise an exception when given a very " \
|
|
|
|
"long bogus character name"
|
2000-06-30 17:45:20 +08:00
|
|
|
|
|
|
|
try:
|
2000-10-24 01:22:08 +08:00
|
|
|
unicode("\N{SPACE", 'unicode-escape', 'strict')
|
2000-06-30 17:45:20 +08:00
|
|
|
except UnicodeError:
|
2000-10-24 01:22:08 +08:00
|
|
|
pass
|
2000-06-30 17:45:20 +08:00
|
|
|
else:
|
2000-10-24 01:22:08 +08:00
|
|
|
raise AssertionError, "failed to raise an exception for a missing closing brace."
|
2000-06-30 17:45:20 +08:00
|
|
|
|
|
|
|
try:
|
2000-10-24 01:22:08 +08:00
|
|
|
unicode("\NSPACE", 'unicode-escape', 'strict')
|
2000-06-30 17:45:20 +08:00
|
|
|
except UnicodeError:
|
2000-10-24 01:22:08 +08:00
|
|
|
pass
|
2000-06-30 17:45:20 +08:00
|
|
|
else:
|
2000-10-24 01:22:08 +08:00
|
|
|
raise AssertionError, "failed to raise an exception for a missing opening brace."
|
2000-06-30 17:45:20 +08:00
|
|
|
print "done."
|