mirror of
https://github.com/python/cpython.git
synced 2024-12-01 03:01:36 +01:00
774c931c12
Added another test for string formatting (the one that produced the core dump now fixed in unicodeobject.c).
408 lines
15 KiB
Python
408 lines
15 KiB
Python
""" Test script for the Unicode implementation.
|
||
|
||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||
|
||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||
|
||
"""
|
||
from test_support import verbose
|
||
import sys
|
||
|
||
def test(method, input, output, *args):
|
||
if verbose:
|
||
print '%s.%s%s =? %s... ' % (repr(input), method, args, output),
|
||
try:
|
||
f = getattr(input, method)
|
||
value = apply(f, args)
|
||
except:
|
||
value = sys.exc_type
|
||
exc = sys.exc_info()[:2]
|
||
else:
|
||
exc = None
|
||
if value != output:
|
||
if verbose:
|
||
print 'no'
|
||
print '*',f, `input`, `output`, `value`
|
||
if exc:
|
||
print ' value == %s: %s' % (exc)
|
||
else:
|
||
if verbose:
|
||
print 'yes'
|
||
|
||
test('capitalize', u' hello ', u' hello ')
|
||
test('capitalize', u'hello ', u'Hello ')
|
||
|
||
test('title', u' hello ', u' Hello ')
|
||
test('title', u'hello ', u'Hello ')
|
||
test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String')
|
||
test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String')
|
||
test('title', u"getInt", u'Getint')
|
||
|
||
test('find', u'abcdefghiabc', 0, u'abc')
|
||
test('find', u'abcdefghiabc', 9, u'abc', 1)
|
||
test('find', u'abcdefghiabc', -1, u'def', 4)
|
||
|
||
test('rfind', u'abcdefghiabc', 9, u'abc')
|
||
|
||
test('lower', u'HeLLo', u'hello')
|
||
test('lower', u'hello', u'hello')
|
||
|
||
test('upper', u'HeLLo', u'HELLO')
|
||
test('upper', u'HELLO', u'HELLO')
|
||
|
||
if 0:
|
||
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
|
||
|
||
test('maketrans', u'abc', transtable, u'xyz')
|
||
test('maketrans', u'abc', ValueError, u'xyzq')
|
||
|
||
test('split', u'this is the split function',
|
||
[u'this', u'is', u'the', u'split', u'function'])
|
||
test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|')
|
||
test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2)
|
||
test('split', u'a b c d', [u'a', u'b c d'], None, 1)
|
||
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
|
||
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3)
|
||
test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4)
|
||
test('split', u'a b c d', [u'a b c d'], None, 0)
|
||
test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2)
|
||
test('split', u'a b c d ', [u'a', u'b', u'c', u'd'])
|
||
|
||
# join now works with any sequence type
|
||
class Sequence:
|
||
def __init__(self): self.seq = 'wxyz'
|
||
def __len__(self): return len(self.seq)
|
||
def __getitem__(self, i): return self.seq[i]
|
||
|
||
test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd'])
|
||
test('join', u'', u'abcd', (u'a', u'b', u'c', u'd'))
|
||
test('join', u' ', u'w x y z', Sequence())
|
||
test('join', u' ', TypeError, 7)
|
||
|
||
class BadSeq(Sequence):
|
||
def __init__(self): self.seq = [7, u'hello', 123L]
|
||
|
||
test('join', u' ', TypeError, BadSeq())
|
||
|
||
result = u''
|
||
for i in range(10):
|
||
if i > 0:
|
||
result = result + u':'
|
||
result = result + u'x'*10
|
||
test('join', u':', result, [u'x' * 10] * 10)
|
||
test('join', u':', result, (u'x' * 10,) * 10)
|
||
|
||
test('strip', u' hello ', u'hello')
|
||
test('lstrip', u' hello ', u'hello ')
|
||
test('rstrip', u' hello ', u' hello')
|
||
test('strip', u'hello', u'hello')
|
||
|
||
test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS')
|
||
|
||
if 0:
|
||
test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def')
|
||
|
||
table = string.maketrans('a', u'A')
|
||
test('translate', u'abc', u'Abc', table)
|
||
test('translate', u'xyz', u'xyz', table)
|
||
|
||
test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
|
||
test('replace', u'one!two!three!', u'onetwothree', '!', '')
|
||
test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
|
||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
|
||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
|
||
test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0)
|
||
test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@')
|
||
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@')
|
||
test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2)
|
||
|
||
test('startswith', u'hello', 1, u'he')
|
||
test('startswith', u'hello', 1, u'hello')
|
||
test('startswith', u'hello', 0, u'hello world')
|
||
test('startswith', u'hello', 1, u'')
|
||
test('startswith', u'hello', 0, u'ello')
|
||
test('startswith', u'hello', 1, u'ello', 1)
|
||
test('startswith', u'hello', 1, u'o', 4)
|
||
test('startswith', u'hello', 0, u'o', 5)
|
||
test('startswith', u'hello', 1, u'', 5)
|
||
test('startswith', u'hello', 0, u'lo', 6)
|
||
test('startswith', u'helloworld', 1, u'lowo', 3)
|
||
test('startswith', u'helloworld', 1, u'lowo', 3, 7)
|
||
test('startswith', u'helloworld', 0, u'lowo', 3, 6)
|
||
|
||
test('endswith', u'hello', 1, u'lo')
|
||
test('endswith', u'hello', 0, u'he')
|
||
test('endswith', u'hello', 1, u'')
|
||
test('endswith', u'hello', 0, u'hello world')
|
||
test('endswith', u'helloworld', 0, u'worl')
|
||
test('endswith', u'helloworld', 1, u'worl', 3, 9)
|
||
test('endswith', u'helloworld', 1, u'world', 3, 12)
|
||
test('endswith', u'helloworld', 1, u'lowo', 1, 7)
|
||
test('endswith', u'helloworld', 1, u'lowo', 2, 7)
|
||
test('endswith', u'helloworld', 1, u'lowo', 3, 7)
|
||
test('endswith', u'helloworld', 0, u'lowo', 4, 7)
|
||
test('endswith', u'helloworld', 0, u'lowo', 3, 8)
|
||
test('endswith', u'ab', 0, u'ab', 0, 1)
|
||
test('endswith', u'ab', 0, u'ab', 0, 0)
|
||
|
||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi')
|
||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8)
|
||
test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4)
|
||
test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4)
|
||
|
||
if 0:
|
||
test('capwords', u'abc def ghi', u'Abc Def Ghi')
|
||
test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi')
|
||
test('capwords', u'abc\t def \nghi', u'Abc Def Ghi')
|
||
|
||
# Comparisons:
|
||
print 'Testing Unicode comparisons...',
|
||
assert u'abc' == 'abc'
|
||
assert 'abc' == u'abc'
|
||
assert u'abc' == u'abc'
|
||
assert u'abcd' > 'abc'
|
||
assert 'abcd' > u'abc'
|
||
assert u'abcd' > u'abc'
|
||
assert u'abc' < 'abcd'
|
||
assert 'abc' < u'abcd'
|
||
assert u'abc' < u'abcd'
|
||
print 'done.'
|
||
|
||
test('ljust', u'abc', u'abc ', 10)
|
||
test('rjust', u'abc', u' abc', 10)
|
||
test('center', u'abc', u' abc ', 10)
|
||
test('ljust', u'abc', u'abc ', 6)
|
||
test('rjust', u'abc', u' abc', 6)
|
||
test('center', u'abc', u' abc ', 6)
|
||
test('ljust', u'abc', u'abc', 2)
|
||
test('rjust', u'abc', u'abc', 2)
|
||
test('center', u'abc', u'abc', 2)
|
||
|
||
test('islower', u'a', 1)
|
||
test('islower', u'A', 0)
|
||
test('islower', u'\n', 0)
|
||
test('islower', u'\u1FFc', 0)
|
||
test('islower', u'abc', 1)
|
||
test('islower', u'aBc', 0)
|
||
test('islower', u'abc\n', 1)
|
||
|
||
test('isupper', u'a', 0)
|
||
test('isupper', u'A', 1)
|
||
test('isupper', u'\n', 0)
|
||
test('isupper', u'\u1FFc', 0)
|
||
test('isupper', u'ABC', 1)
|
||
test('isupper', u'AbC', 0)
|
||
test('isupper', u'ABC\n', 1)
|
||
|
||
test('istitle', u'a', 0)
|
||
test('istitle', u'A', 1)
|
||
test('istitle', u'\n', 0)
|
||
test('istitle', u'\u1FFc', 1)
|
||
test('istitle', u'A Titlecased Line', 1)
|
||
test('istitle', u'A\nTitlecased Line', 1)
|
||
test('istitle', u'A Titlecased, Line', 1)
|
||
test('istitle', u'Greek \u1FFcitlecases ...', 1)
|
||
test('istitle', u'Not a capitalized String', 0)
|
||
test('istitle', u'Not\ta Titlecase String', 0)
|
||
test('istitle', u'Not--a Titlecase String', 0)
|
||
|
||
test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi'])
|
||
test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi'])
|
||
test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi'])
|
||
test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi'])
|
||
test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u''])
|
||
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u''])
|
||
test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1)
|
||
|
||
test('translate', u"abababc", u'bbbc', {ord('a'):None})
|
||
test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')})
|
||
test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
|
||
|
||
# Contains:
|
||
print 'Testing Unicode contains method...',
|
||
assert ('a' in u'abdb') == 1
|
||
assert ('a' in u'bdab') == 1
|
||
assert ('a' in u'bdaba') == 1
|
||
assert ('a' in u'bdba') == 1
|
||
assert ('a' in u'bdba') == 1
|
||
assert (u'a' in u'bdba') == 1
|
||
assert (u'a' in u'bdb') == 0
|
||
assert (u'a' in 'bdb') == 0
|
||
assert (u'a' in 'bdba') == 1
|
||
assert (u'a' in ('a',1,None)) == 1
|
||
assert (u'a' in (1,None,'a')) == 1
|
||
assert (u'a' in (1,None,u'a')) == 1
|
||
assert ('a' in ('a',1,None)) == 1
|
||
assert ('a' in (1,None,'a')) == 1
|
||
assert ('a' in (1,None,u'a')) == 1
|
||
assert ('a' in ('x',1,u'y')) == 0
|
||
assert ('a' in ('x',1,None)) == 0
|
||
print 'done.'
|
||
|
||
# Formatting:
|
||
print 'Testing Unicode formatting strings...',
|
||
assert u"%s, %s" % (u"abc", "abc") == u'abc, abc'
|
||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00'
|
||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00'
|
||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50'
|
||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57'
|
||
assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57'
|
||
assert u"%c" % (u"abc",) == u'a'
|
||
assert u"%c" % ("abc",) == u'a'
|
||
assert u"%c" % (34,) == u'"'
|
||
assert u"%c" % (36,) == u'$'
|
||
assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'"
|
||
assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
|
||
assert u"%(x)s, %(<28>)s" % {'x':u"abc", u'<EFBFBD>'.encode('utf-8'):"def"} == u'abc, def'
|
||
# formatting jobs delegated from the string implementation:
|
||
assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...'
|
||
assert '...%(foo)s...' % {'foo':"abc"} == '...abc...'
|
||
assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...'
|
||
assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...'
|
||
assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...'
|
||
assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...'
|
||
assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...'
|
||
assert '...%s...' % u"abc" == u'...abc...'
|
||
try:
|
||
'...%s...<2E><><EFBFBD>...' % u"abc"
|
||
except ValueError:
|
||
pass
|
||
else:
|
||
raise AssertionError, "'...%s...<2E><><EFBFBD>...' % u'abc' failed to raise an exception"
|
||
print 'done.'
|
||
|
||
# Test builtin codecs
|
||
print 'Testing builtin codecs...',
|
||
|
||
assert unicode('hello','ascii') == u'hello'
|
||
assert unicode('hello','utf-8') == u'hello'
|
||
assert unicode('hello','utf8') == u'hello'
|
||
assert unicode('hello','latin-1') == u'hello'
|
||
|
||
try:
|
||
u'Andr\202 x'.encode('ascii')
|
||
u'Andr\202 x'.encode('ascii','strict')
|
||
except ValueError:
|
||
pass
|
||
else:
|
||
raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception"
|
||
assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
|
||
assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"
|
||
|
||
try:
|
||
unicode('Andr\202 x','ascii')
|
||
unicode('Andr\202 x','ascii','strict')
|
||
except ValueError:
|
||
pass
|
||
else:
|
||
raise AssertionError, "unicode('Andr\202') failed to raise an exception"
|
||
assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
|
||
assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'
|
||
|
||
assert u'hello'.encode('ascii') == 'hello'
|
||
assert u'hello'.encode('utf-8') == 'hello'
|
||
assert u'hello'.encode('utf8') == 'hello'
|
||
assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000'
|
||
assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o'
|
||
assert u'hello'.encode('latin-1') == 'hello'
|
||
|
||
u = u''.join(map(unichr, range(1024)))
|
||
for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
|
||
'raw_unicode_escape', 'unicode_escape', 'unicode_internal'):
|
||
assert unicode(u.encode(encoding),encoding) == u
|
||
|
||
u = u''.join(map(unichr, range(256)))
|
||
for encoding in (
|
||
'latin-1',
|
||
):
|
||
try:
|
||
assert unicode(u.encode(encoding),encoding) == u
|
||
except AssertionError:
|
||
print '*** codec "%s" failed round-trip' % encoding
|
||
except ValueError,why:
|
||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||
|
||
u = u''.join(map(unichr, range(128)))
|
||
for encoding in (
|
||
'ascii',
|
||
):
|
||
try:
|
||
assert unicode(u.encode(encoding),encoding) == u
|
||
except AssertionError:
|
||
print '*** codec "%s" failed round-trip' % encoding
|
||
except ValueError,why:
|
||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||
|
||
print 'done.'
|
||
|
||
print 'Testing standard mapping codecs...',
|
||
|
||
print '0-127...',
|
||
s = ''.join(map(chr, range(128)))
|
||
for encoding in (
|
||
'cp037', 'cp1026',
|
||
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
|
||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||
'cp863', 'cp865', 'cp866',
|
||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||
'mac_cyrillic', 'mac_latin2',
|
||
|
||
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||
'cp1256', 'cp1257', 'cp1258',
|
||
'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||
|
||
'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||
'cp1006', 'cp875', 'iso8859_8',
|
||
|
||
### These have undefined mappings:
|
||
#'cp424',
|
||
|
||
):
|
||
try:
|
||
assert unicode(s,encoding).encode(encoding) == s
|
||
except AssertionError:
|
||
print '*** codec "%s" failed round-trip' % encoding
|
||
except ValueError,why:
|
||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||
|
||
print '128-255...',
|
||
s = ''.join(map(chr, range(128,256)))
|
||
for encoding in (
|
||
'cp037', 'cp1026',
|
||
'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
|
||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||
'cp863', 'cp865', 'cp866',
|
||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||
'mac_cyrillic', 'mac_latin2',
|
||
|
||
### These have undefined mappings:
|
||
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||
#'cp1256', 'cp1257', 'cp1258',
|
||
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||
|
||
### These fail the round-trip:
|
||
#'cp1006', 'cp875', 'iso8859_8',
|
||
|
||
):
|
||
try:
|
||
assert unicode(s,encoding).encode(encoding) == s
|
||
except AssertionError:
|
||
print '*** codec "%s" failed round-trip' % encoding
|
||
except ValueError,why:
|
||
print '*** codec for "%s" failed: %s' % (encoding, why)
|
||
|
||
print 'done.'
|
||
|
||
print 'Testing Unicode string concatenation...',
|
||
assert (u"abc" u"def") == u"abcdef"
|
||
assert ("abc" u"def") == u"abcdef"
|
||
assert (u"abc" "def") == u"abcdef"
|
||
assert (u"abc" u"def" "ghi") == u"abcdefghi"
|
||
assert ("abc" "def" u"ghi") == u"abcdefghi"
|
||
print 'done.'
|