This patch changes the default behaviour of the builtin charmap

codec to not apply Latin-1 mappings for keys which are not found
in the mapping dictionaries, but instead treat them as undefined
mappings.

The patch was originally written by Martin v. Loewis with some
additional (cosmetic) changes and an updated test script
by Marc-Andre Lemburg.

The standard codecs were recreated from the most current files
available at the Unicode.org site using the Tools/scripts/gencodec.py
tool.

This patch closes the bugs #116285 and #119960.
This commit is contained in:
Marc-André Lemburg 2001-01-03 21:29:14 +00:00
parent b55b7bb3ab
commit a866df806d
56 changed files with 424 additions and 293 deletions

View File

@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
sr.file_encoding = file_encoding sr.file_encoding = file_encoding
return sr return sr
### Helpers for charmap-based codecs
def make_identity_dict(rng):
""" make_identity_dict(rng) -> dict
Return a dictionary where elements of the rng sequence are
mapped to themselves.
"""
res = {}
for i in rng:
res[i]=i
return res
### Tests ### Tests
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP037.TXT'. """ Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0004: 0x009c, # CONTROL 0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION 0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL 0x0006: 0x0086, # CONTROL
@ -273,7 +273,7 @@ decoding_map = {
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL 0x00ff: 0x009f, # CONTROL
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1006.TXT'. """ Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
@ -131,7 +131,7 @@ decoding_map = {
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1026.TXT'. """ Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0004: 0x009c, # CONTROL 0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION 0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL 0x0006: 0x0086, # CONTROL
@ -273,7 +273,7 @@ decoding_map = {
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL 0x00ff: 0x009f, # CONTROL
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1250.TXT'. """ Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -116,7 +116,7 @@ decoding_map = {
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE 0x00ff: 0x02d9, # DOT ABOVE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1251.TXT'. """ Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -150,7 +150,7 @@ decoding_map = {
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1252.TXT'. """ Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -69,7 +69,7 @@ decoding_map = {
0x009d: None, # UNDEFINED 0x009d: None, # UNDEFINED
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1253.TXT'. """ Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -144,7 +144,7 @@ decoding_map = {
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ff: None, # UNDEFINED 0x00ff: None, # UNDEFINED
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1254.TXT'. """ Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -75,7 +75,7 @@ decoding_map = {
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1255.TXT'. """ Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -136,7 +136,7 @@ decoding_map = {
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: None, # UNDEFINED 0x00ff: None, # UNDEFINED
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1256.TXT'. """ Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: 0x067e, # ARABIC LETTER PEH 0x0081: 0x067e, # ARABIC LETTER PEH
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -122,7 +122,7 @@ decoding_map = {
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1257.TXT'. """ Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -124,7 +124,7 @@ decoding_map = {
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x02d9, # DOT ABOVE 0x00ff: 0x02d9, # DOT ABOVE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP1258.TXT'. """ Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@ -83,7 +83,7 @@ decoding_map = {
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
0x00fe: 0x20ab, # DONG SIGN 0x00fe: 0x20ab, # DONG SIGN
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP424.TXT'. """ Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0004: 0x009c, # SELECT 0x0004: 0x009c, # SELECT
0x0005: 0x0009, # HORIZONTAL TABULATION 0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # REQUIRED NEW LINE 0x0006: 0x0086, # REQUIRED NEW LINE
@ -273,7 +273,7 @@ decoding_map = {
0x00fd: None, # UNDEFINED 0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED 0x00fe: None, # UNDEFINED
0x00ff: 0x009f, # EIGHT ONES 0x00ff: 0x009f, # EIGHT ONES
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP437.TXT'. """ Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP500.TXT'. """ Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0004: 0x009c, # CONTROL 0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION 0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL 0x0006: 0x0086, # CONTROL
@ -273,7 +273,7 @@ decoding_map = {
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL 0x00ff: 0x009f, # CONTROL
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP737.TXT'. """ Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP775.TXT'. """ Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP850.TXT'. """ Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP852.TXT'. """ Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP855.TXT'. """ Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00a7, # SECTION SIGN 0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP856.TXT'. """ Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET 0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL 0x0082: 0x05d2, # HEBREW LETTER GIMEL
@ -120,10 +120,10 @@ decoding_map = {
0x00d0: None, # UNDEFINED 0x00d0: None, # UNDEFINED
0x00d1: None, # UNDEFINED 0x00d1: None, # UNDEFINED
0x00d2: None, # UNDEFINED 0x00d2: None, # UNDEFINED
0x00d3: None, # UNDEFINED 0x00d3: None, # UNDEFINEDS
0x00d4: None, # UNDEFINED 0x00d4: None, # UNDEFINED
0x00d5: None, # UNDEFINED 0x00d5: None, # UNDEFINED
0x00d6: None, # UNDEFINED 0x00d6: None, # UNDEFINEDE
0x00d7: None, # UNDEFINED 0x00d7: None, # UNDEFINED
0x00d8: None, # UNDEFINED 0x00d8: None, # UNDEFINED
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP857.TXT'. """ Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -164,7 +164,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP860.TXT'. """ Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP861.TXT'. """ Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP862.TXT'. """ Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET 0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL 0x0082: 0x05d2, # HEBREW LETTER GIMEL
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP863.TXT'. """ Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP864.TXT'. """ Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0025: 0x066a, # ARABIC PERCENT SIGN 0x0025: 0x066a, # ARABIC PERCENT SIGN
0x0080: 0x00b0, # DEGREE SIGN 0x0080: 0x00b0, # DEGREE SIGN
0x0081: 0x00b7, # MIDDLE DOT 0x0081: 0x00b7, # MIDDLE DOT
@ -163,7 +163,7 @@ decoding_map = {
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: None, # UNDEFINED 0x00ff: None, # UNDEFINED
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP865.TXT'. """ Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP866.TXT'. """ Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x00a4, # CURRENCY SIGN 0x00fd: 0x00a4, # CURRENCY SIGN
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP869.TXT'. """ Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: None, # UNDEFINED 0x0080: None, # UNDEFINED
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED 0x0082: None, # UNDEFINED
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00fe: 0x25a0, # BLACK SQUARE 0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE 0x00ff: 0x00a0, # NO-BREAK SPACE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP874.TXT'. """ Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x20ac, # EURO SIGN 0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED 0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED 0x0082: None, # UNDEFINED
@ -164,7 +164,7 @@ decoding_map = {
0x00fd: None, # UNDEFINED 0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED 0x00fe: None, # UNDEFINED
0x00ff: None, # UNDEFINED 0x00ff: None, # UNDEFINED
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CP875.TXT'. """ Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0004: 0x009c, # CONTROL 0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION 0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL 0x0006: 0x0086, # CONTROL
@ -274,7 +274,7 @@ decoding_map = {
0x00fd: 0x001a, # SUBSTITUTE 0x00fd: 0x001a, # SUBSTITUTE
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ff: 0x009f, # CONTROL 0x00ff: 0x009f, # CONTROL
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-1.TXT'. """ Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -22,10 +22,7 @@ class Codec(codecs.Codec):
return codecs.charmap_decode(input,errors,decoding_map) return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(Codec,codecs.StreamWriter):
pass
def __init__(self,stream,errors='strict'):
codecs.StreamWriter.__init__(self,strict,errors)
class StreamReader(Codec,codecs.StreamReader): class StreamReader(Codec,codecs.StreamReader):
pass pass
@ -38,9 +35,9 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-10.TXT'. """ Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
@ -83,7 +83,7 @@ decoding_map = {
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00ff: 0x0138, # LATIN SMALL LETTER KRA 0x00ff: 0x0138, # LATIN SMALL LETTER KRA
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-13.TXT'. """ Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
@ -93,7 +93,7 @@ decoding_map = {
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-14.TXT'. """ Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
@ -68,7 +68,7 @@ decoding_map = {
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-15.TXT'. """ Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a4: 0x20ac, # EURO SIGN 0x00a4: 0x20ac, # EURO SIGN
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
@ -45,7 +45,7 @@ decoding_map = {
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-2.TXT'. """ Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x02d8, # BREVE 0x00a2: 0x02d8, # BREVE
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
@ -94,7 +94,7 @@ decoding_map = {
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE 0x00ff: 0x02d9, # DOT ABOVE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-3.TXT'. """ Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,15 +35,17 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
0x00a2: 0x02d8, # BREVE 0x00a2: 0x02d8, # BREVE
0x00a5: None,
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0x00ae: None,
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
@ -51,21 +53,26 @@ decoding_map = {
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
0x00be: None,
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00c3: None,
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
0x00d0: None,
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
0x00e3: None,
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
0x00f0: None,
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
0x00ff: 0x02d9, # DOT ABOVE 0x00ff: 0x02d9, # DOT ABOVE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-4.TXT'. """ Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0138, # LATIN SMALL LETTER KRA 0x00a2: 0x0138, # LATIN SMALL LETTER KRA
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
@ -87,7 +87,7 @@ decoding_map = {
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00ff: 0x02d9, # DOT ABOVE 0x00ff: 0x02d9, # DOT ABOVE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-5.TXT'. """ Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
@ -131,7 +131,7 @@ decoding_map = {
0x00fd: 0x00a7, # SECTION SIGN 0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-6.TXT'. """ Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,11 +35,38 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: None,
0x00a2: None,
0x00a3: None,
0x00a5: None,
0x00a6: None,
0x00a7: None,
0x00a8: None,
0x00a9: None,
0x00aa: None,
0x00ab: None,
0x00ac: 0x060c, # ARABIC COMMA 0x00ac: 0x060c, # ARABIC COMMA
0x00ae: None,
0x00af: None,
0x00b0: None,
0x00b1: None,
0x00b2: None,
0x00b3: None,
0x00b4: None,
0x00b5: None,
0x00b6: None,
0x00b7: None,
0x00b8: None,
0x00b9: None,
0x00ba: None,
0x00bb: 0x061b, # ARABIC SEMICOLON 0x00bb: 0x061b, # ARABIC SEMICOLON
0x00bc: None,
0x00bd: None,
0x00be: None,
0x00bf: 0x061f, # ARABIC QUESTION MARK 0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c0: None,
0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c1: 0x0621, # ARABIC LETTER HAMZA
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
@ -66,6 +93,11 @@ decoding_map = {
0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d8: 0x0638, # ARABIC LETTER ZAH
0x00d9: 0x0639, # ARABIC LETTER AIN 0x00d9: 0x0639, # ARABIC LETTER AIN
0x00da: 0x063a, # ARABIC LETTER GHAIN 0x00da: 0x063a, # ARABIC LETTER GHAIN
0x00db: None,
0x00dc: None,
0x00dd: None,
0x00de: None,
0x00df: None,
0x00e0: 0x0640, # ARABIC TATWEEL 0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e1: 0x0641, # ARABIC LETTER FEH
0x00e2: 0x0642, # ARABIC LETTER QAF 0x00e2: 0x0642, # ARABIC LETTER QAF
@ -85,7 +117,20 @@ decoding_map = {
0x00f0: 0x0650, # ARABIC KASRA 0x00f0: 0x0650, # ARABIC KASRA
0x00f1: 0x0651, # ARABIC SHADDA 0x00f1: 0x0651, # ARABIC SHADDA
0x00f2: 0x0652, # ARABIC SUKUN 0x00f2: 0x0652, # ARABIC SUKUN
} 0x00f3: None,
0x00f4: None,
0x00f5: None,
0x00f6: None,
0x00f7: None,
0x00f8: None,
0x00f9: None,
0x00fa: None,
0x00fb: None,
0x00fc: None,
0x00fd: None,
0x00fe: None,
0x00ff: None,
})
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-7.TXT'. """ Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,10 +35,14 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00a4: None,
0x00a5: None,
0x00aa: None,
0x00ae: None,
0x00af: 0x2015, # HORIZONTAL BAR 0x00af: 0x2015, # HORIZONTAL BAR
0x00b4: 0x0384, # GREEK TONOS 0x00b4: 0x0384, # GREEK TONOS
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
@ -67,6 +71,7 @@ decoding_map = {
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00d2: None,
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
@ -111,7 +116,8 @@ decoding_map = {
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
} 0x00ff: None,
})
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-8.TXT'. """ Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,11 +35,43 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: None,
0x00aa: 0x00d7, # MULTIPLICATION SIGN 0x00aa: 0x00d7, # MULTIPLICATION SIGN
0x00af: 0x203e, # OVERLINE
0x00ba: 0x00f7, # DIVISION SIGN 0x00ba: 0x00f7, # DIVISION SIGN
0x00bf: None,
0x00c0: None,
0x00c1: None,
0x00c2: None,
0x00c3: None,
0x00c4: None,
0x00c5: None,
0x00c6: None,
0x00c7: None,
0x00c8: None,
0x00c9: None,
0x00ca: None,
0x00cb: None,
0x00cc: None,
0x00cd: None,
0x00ce: None,
0x00cf: None,
0x00d0: None,
0x00d1: None,
0x00d2: None,
0x00d3: None,
0x00d4: None,
0x00d5: None,
0x00d6: None,
0x00d7: None,
0x00d8: None,
0x00d9: None,
0x00da: None,
0x00db: None,
0x00dc: None,
0x00dd: None,
0x00de: None,
0x00df: 0x2017, # DOUBLE LOW LINE 0x00df: 0x2017, # DOUBLE LOW LINE
0x00e0: 0x05d0, # HEBREW LETTER ALEF 0x00e0: 0x05d0, # HEBREW LETTER ALEF
0x00e1: 0x05d1, # HEBREW LETTER BET 0x00e1: 0x05d1, # HEBREW LETTER BET
@ -68,7 +100,12 @@ decoding_map = {
0x00f8: 0x05e8, # HEBREW LETTER RESH 0x00f8: 0x05e8, # HEBREW LETTER RESH
0x00f9: 0x05e9, # HEBREW LETTER SHIN 0x00f9: 0x05e9, # HEBREW LETTER SHIN
0x00fa: 0x05ea, # HEBREW LETTER TAV 0x00fa: 0x05ea, # HEBREW LETTER TAV
} 0x00fb: None,
0x00fc: None,
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: None,
})
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from '8859-9.TXT'. """ Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,15 +35,15 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'. """ Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
@ -165,7 +165,7 @@ decoding_map = {
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'. """ Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
@ -160,7 +160,7 @@ decoding_map = {
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x00a4, # CURRENCY SIGN 0x00ff: 0x00a4, # CURRENCY SIGN
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'GREEK.TXT'. """ Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00b9, # SUPERSCRIPT ONE 0x0081: 0x00b9, # SUPERSCRIPT ONE
0x0082: 0x00b2, # SUPERSCRIPT TWO 0x0082: 0x00b2, # SUPERSCRIPT TWO
@ -163,7 +163,7 @@ decoding_map = {
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00ff: None, # UNDEFINED 0x00ff: None, # UNDEFINED
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'ICELAND.TXT'. """ Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@ -159,7 +159,7 @@ decoding_map = {
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK 0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON 0x00ff: 0x02c7, # CARON
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'LATIN2.TXT'. """ Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
@ -163,7 +163,7 @@ decoding_map = {
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00ff: 0x02c7, # CARON 0x00ff: 0x02c7, # CARON
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'ROMAN.TXT'. """ Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@ -160,7 +160,7 @@ decoding_map = {
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK 0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON 0x00ff: 0x02c7, # CARON
} })
### Encoding Map ### Encoding Map

View File

@ -1,9 +1,9 @@
""" Python Character Mapping Codec generated from 'TURKISH.TXT'. """ Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -35,8 +35,8 @@ def getregentry():
### Decoding Map ### Decoding Map
decoding_map = { decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@ -160,7 +160,7 @@ decoding_map = {
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK 0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON 0x00ff: 0x02c7, # CARON
} })
### Encoding Map ### Encoding Map

View File

@ -494,14 +494,15 @@ for encoding in (
'cp852', 'cp855', 'cp860', 'cp861', 'cp862', 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866', 'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_2', 'iso8859_4', 'iso8859_5',
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2', 'mac_cyrillic', 'mac_latin2',
### These have undefined mappings: ### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258', #'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
#'iso8859_3', 'iso8859_6', 'iso8859_7',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip: ### These fail the round-trip:

View File

@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
Py_DECREF(w); Py_DECREF(w);
if (x == NULL) { if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) { if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found: default to Latin-1 mapping */ /* No mapping found means: mapping is undefined. */
PyErr_Clear(); PyErr_Clear();
*p++ = (Py_UNICODE)ch; x = Py_None;
continue; Py_INCREF(x);
} } else
goto onError; goto onError;
} }
@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
Py_DECREF(w); Py_DECREF(w);
if (x == NULL) { if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) { if (PyErr_ExceptionMatches(PyExc_LookupError)) {
/* No mapping found: default to Latin-1 mapping if possible */ /* No mapping found means: mapping is undefined. */
PyErr_Clear(); PyErr_Clear();
if (ch < 256) { x = Py_None;
*s++ = (char)ch; Py_INCREF(x);
continue; } else
}
else if (!charmap_encoding_error(&p, &s, errors,
"missing character mapping"))
continue;
}
goto onError; goto onError;
} }

View File

@ -1,9 +1,9 @@
""" Unicode Mapping Parser and Codec Generator. """ Unicode Mapping Parser and Codec Generator.
This script parses Unicode mapping files as available from the Unicode This script parses Unicode mapping files as available from the Unicode
site (ftp.unicode.org) and creates Python codec modules from them. The site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
codecs use the standard character mapping codec to actually apply the modules from them. The codecs use the standard character mapping codec
mapping. to actually apply the mapping.
Synopsis: gencodec.py dir codec_prefix Synopsis: gencodec.py dir codec_prefix
@ -18,6 +18,7 @@ same location (with .mapping extension).
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright Guido van Rossum, 2000.
"""#" """#"
@ -70,6 +71,10 @@ def readmap(filename,
lines = f.readlines() lines = f.readlines()
f.close() f.close()
enc2uni = {} enc2uni = {}
identity = []
unmapped = range(256)
for i in range(256):
unmapped[i] = i
for line in lines: for line in lines:
line = strip(line) line = strip(line)
if not line or line[0] == '#': if not line or line[0] == '#':
@ -85,8 +90,22 @@ def readmap(filename,
comment = '' comment = ''
else: else:
comment = comment[1:] comment = comment[1:]
if enc != uni: if enc < 256:
unmapped.remove(enc)
if enc == uni:
identity.append(enc)
else:
enc2uni[enc] = (uni,comment)
else:
enc2uni[enc] = (uni,comment) enc2uni[enc] = (uni,comment)
# If there are more identity-mapped entries than unmapped entries,
# it pays to generate an identity dictionary first, add add explicit
# mappings to None for the rest
if len(identity)>=len(unmapped):
for enc in unmapped:
enc2uni[enc] = (None, "")
enc2uni['IDENTITY'] = 256
return enc2uni return enc2uni
def hexrepr(t, def hexrepr(t,
@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
""" """
l = [ l = [
'''\ '''\
""" Python Character Mapping Codec generated from '%s'. """ Python Character Mapping Codec generated from '%s' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#" """#"
@ -178,15 +198,23 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map ### Decoding Map
decoding_map = {
''' % name, ''' % name,
] ]
if map.has_key("IDENTITY"):
l.append("decoding_map = codecs.make_identity_dict(range(%d))"
% map["IDENTITY"])
l.append("decoding_map.update({")
splits = 1
del map["IDENTITY"]
else:
l.append("decoding_map = {")
splits = 0
mappings = map.items() mappings = map.items()
mappings.sort() mappings.sort()
append = l.append append = l.append
i = 0 i = 0
splits = 0
for e,value in mappings: for e,value in mappings:
try: try:
(u,c) = value (u,c) = value
@ -198,7 +226,7 @@ decoding_map = {
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c)) append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
else: else:
append('\t%s: %s,' % (key,unicoderepr(u))) append('\t%s: %s,' % (key,unicoderepr(u)))
i = i + 1 i += 1
if i == 4096: if i == 4096:
# Split the definition into parts to that the Python # Split the definition into parts to that the Python
# parser doesn't dump core # parser doesn't dump core
@ -206,7 +234,7 @@ decoding_map = {
append('}') append('}')
else: else:
append('})') append('})')
append('map.update({') append('decoding_map.update({')
i = 0 i = 0
splits = splits + 1 splits = splits + 1
if splits == 0: if splits == 0:
@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
mapnames = os.listdir(dir) mapnames = os.listdir(dir)
for mapname in mapnames: for mapname in mapnames:
if mapname[-len('.mapping'):] != '.mapping': if not mapname.endswith('.mapping'):
continue continue
codefile = mapname[:-len('.mapping')] + '.py' codefile = mapname[:-len('.mapping')] + '.py'
print 'converting %s to %s' % (mapname, print 'converting %s to %s' % (mapname,