This patch adds a new feature to the builtin charmap codec:

the mapping dictionaries can now contain 1-n mappings, meaning
that character ordinals may be mapped to strings or Unicode object,
e.g. 0x0078 ('x') -> u"abc", causing the ordinal to be replaced by
the complete string or Unicode object instead of just one character.

Another feature introduced by the patch is that of mapping oridnals to
the emtpy string. This allows removing characters.

The patch is different from patch #103100 in that it does not cause a
performance hit for the normal use case of 1-1 mappings.

Written by Marc-Andre Lemburg, copyright assigned to Guido van Rossum.
This commit is contained in:
Marc-André Lemburg 2001-01-06 15:09:57 +00:00
parent ec233e5803
commit 1109db443e
2 changed files with 66 additions and 0 deletions

View File

@ -0,0 +1 @@
test_charmapcodec

View File

@ -0,0 +1,65 @@
""" Python Character Mapping Codec test
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0078: u"abc",
"abc": 0x0078,
})
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k
### Tests
def check(a, b):
if a != b:
print '*** check failed: %s != %s' % (repr(a), repr(b))
check(unicode('abc', 'mycp'), u'abc')
check(unicode('xdef', 'mycp'), u'abcdef')
check(unicode('defx', 'mycp'), u'defabc')
check(unicode('dxf', 'mycp'), u'dabcf')
check(unicode('dxfx', 'mycp'), u'dabcfabc')
check(u'abc'.encode('mycp'), 'abc')
check(u'xdef'.encode('mycp'), 'abcdef')
check(u'defx'.encode('mycp'), 'defabc')
check(u'dxf'.encode('mycp'), 'dabcf')
check(u'dxfx'.encode('mycp'), 'dabcfabc')