2010-03-11 18:53:45 -04:00
|
|
|
#!/usr/bin/env python3
|
2005-10-21 10:47:03 -03:00
|
|
|
|
|
|
|
""" Compare the output of two codecs.
|
|
|
|
|
|
|
|
(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
|
|
|
|
|
|
|
|
Licensed to PSF under a Contributor Agreement.
|
|
|
|
|
|
|
|
"""
|
|
|
|
import sys
|
|
|
|
|
|
|
|
def compare_codecs(encoding1, encoding2):
|
|
|
|
|
2007-08-03 14:06:41 -03:00
|
|
|
print('Comparing encoding/decoding of %r and %r' % (encoding1, encoding2))
|
2005-10-21 10:47:03 -03:00
|
|
|
mismatch = 0
|
|
|
|
# Check encoding
|
2011-10-04 13:06:00 -03:00
|
|
|
for i in range(sys.maxunicode+1):
|
2008-05-16 14:02:34 -03:00
|
|
|
u = chr(i)
|
2005-10-21 10:47:03 -03:00
|
|
|
try:
|
|
|
|
c1 = u.encode(encoding1)
|
2007-01-10 12:19:56 -04:00
|
|
|
except UnicodeError as reason:
|
2005-10-21 10:47:03 -03:00
|
|
|
c1 = '<undefined>'
|
|
|
|
try:
|
|
|
|
c2 = u.encode(encoding2)
|
2007-01-10 12:19:56 -04:00
|
|
|
except UnicodeError as reason:
|
2005-10-21 10:47:03 -03:00
|
|
|
c2 = '<undefined>'
|
|
|
|
if c1 != c2:
|
2007-08-03 14:06:41 -03:00
|
|
|
print(' * encoding mismatch for 0x%04X: %-14r != %r' % \
|
|
|
|
(i, c1, c2))
|
2005-10-21 10:47:03 -03:00
|
|
|
mismatch += 1
|
|
|
|
# Check decoding
|
|
|
|
for i in range(256):
|
|
|
|
c = chr(i)
|
|
|
|
try:
|
|
|
|
u1 = c.decode(encoding1)
|
|
|
|
except UnicodeError:
|
2008-05-16 14:02:34 -03:00
|
|
|
u1 = '<undefined>'
|
2005-10-21 10:47:03 -03:00
|
|
|
try:
|
|
|
|
u2 = c.decode(encoding2)
|
|
|
|
except UnicodeError:
|
2008-05-16 14:02:34 -03:00
|
|
|
u2 = '<undefined>'
|
2005-10-21 10:47:03 -03:00
|
|
|
if u1 != u2:
|
2007-08-03 14:06:41 -03:00
|
|
|
print(' * decoding mismatch for 0x%04X: %-14r != %r' % \
|
|
|
|
(i, u1, u2))
|
2005-10-21 10:47:03 -03:00
|
|
|
mismatch += 1
|
|
|
|
if mismatch:
|
2007-08-03 14:06:41 -03:00
|
|
|
print()
|
|
|
|
print('Found %i mismatches' % mismatch)
|
2005-10-21 10:47:03 -03:00
|
|
|
else:
|
2007-08-03 14:06:41 -03:00
|
|
|
print('-> Codecs are identical.')
|
2005-10-21 10:47:03 -03:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
compare_codecs(sys.argv[1], sys.argv[2])
|