This patch changes the default behaviour of the builtin charmap
codec to not apply Latin-1 mappings for keys which are not found in the mapping dictionaries, but instead treat them as undefined mappings. The patch was originally written by Martin v. Loewis with some additional (cosmetic) changes and an updated test script by Marc-Andre Lemburg. The standard codecs were recreated from the most current files available at the Unicode.org site using the Tools/scripts/gencodec.py tool. This patch closes the bugs #116285 and #119960.
This commit is contained in:
parent
b55b7bb3ab
commit
a866df806d
|
@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
|
|||
sr.file_encoding = file_encoding
|
||||
return sr
|
||||
|
||||
### Helpers for charmap-based codecs
|
||||
|
||||
def make_identity_dict(rng):
|
||||
|
||||
""" make_identity_dict(rng) -> dict
|
||||
|
||||
Return a dictionary where elements of the rng sequence are
|
||||
mapped to themselves.
|
||||
|
||||
"""
|
||||
res = {}
|
||||
for i in rng:
|
||||
res[i]=i
|
||||
return res
|
||||
|
||||
### Tests
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP037.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
|||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1006.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
|
@ -131,7 +131,7 @@ decoding_map = {
|
|||
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
||||
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
||||
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1026.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
|||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1250.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -116,7 +116,7 @@ decoding_map = {
|
|||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1251.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -150,7 +150,7 @@ decoding_map = {
|
|||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1252.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -69,7 +69,7 @@ decoding_map = {
|
|||
0x009d: None, # UNDEFINED
|
||||
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1253.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -144,7 +144,7 @@ decoding_map = {
|
|||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1254.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -75,7 +75,7 @@ decoding_map = {
|
|||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1255.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -136,7 +136,7 @@ decoding_map = {
|
|||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1256.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: 0x067e, # ARABIC LETTER PEH
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -122,7 +122,7 @@ decoding_map = {
|
|||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1257.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -124,7 +124,7 @@ decoding_map = {
|
|||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1258.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
|
@ -83,7 +83,7 @@ decoding_map = {
|
|||
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
||||
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
||||
0x00fe: 0x20ab, # DONG SIGN
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP424.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0004: 0x009c, # SELECT
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # REQUIRED NEW LINE
|
||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
|||
0x00fd: None, # UNDEFINED
|
||||
0x00fe: None, # UNDEFINED
|
||||
0x00ff: 0x009f, # EIGHT ONES
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP437.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP500.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
|||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP737.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP775.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP850.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP852.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP855.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00a7, # SECTION SIGN
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP856.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||
|
@ -120,10 +120,10 @@ decoding_map = {
|
|||
0x00d0: None, # UNDEFINED
|
||||
0x00d1: None, # UNDEFINED
|
||||
0x00d2: None, # UNDEFINED
|
||||
0x00d3: None, # UNDEFINED
|
||||
0x00d3: None, # UNDEFINEDS
|
||||
0x00d4: None, # UNDEFINED
|
||||
0x00d5: None, # UNDEFINED
|
||||
0x00d6: None, # UNDEFINED
|
||||
0x00d6: None, # UNDEFINEDE
|
||||
0x00d7: None, # UNDEFINED
|
||||
0x00d8: None, # UNDEFINED
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP857.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -164,7 +164,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP860.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP861.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP862.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP863.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP864.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
||||
0x0080: 0x00b0, # DEGREE SIGN
|
||||
0x0081: 0x00b7, # MIDDLE DOT
|
||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
|||
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP865.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP866.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x00a4, # CURRENCY SIGN
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP869.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: None, # UNDEFINED
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: None, # UNDEFINED
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP874.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: None, # UNDEFINED
|
||||
|
@ -164,7 +164,7 @@ decoding_map = {
|
|||
0x00fd: None, # UNDEFINED
|
||||
0x00fe: None, # UNDEFINED
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CP875.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
|
@ -274,7 +274,7 @@ decoding_map = {
|
|||
0x00fd: 0x001a, # SUBSTITUTE
|
||||
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-1.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -22,10 +22,7 @@ class Codec(codecs.Codec):
|
|||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
|
||||
def __init__(self,stream,errors='strict'):
|
||||
|
||||
codecs.StreamWriter.__init__(self,strict,errors)
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
@ -38,9 +35,9 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
}
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-10.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
|
@ -83,7 +83,7 @@ decoding_map = {
|
|||
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-13.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
|
@ -93,7 +93,7 @@ decoding_map = {
|
|||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-14.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
|
@ -68,7 +68,7 @@ decoding_map = {
|
|||
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-15.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a4: 0x20ac, # EURO SIGN
|
||||
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
|
@ -45,7 +45,7 @@ decoding_map = {
|
|||
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-2.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x02d8, # BREVE
|
||||
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
|
@ -94,7 +94,7 @@ decoding_map = {
|
|||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-3.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,15 +35,17 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0x00a2: 0x02d8, # BREVE
|
||||
0x00a5: None,
|
||||
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0x00ae: None,
|
||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
||||
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||
|
@ -51,21 +53,26 @@ decoding_map = {
|
|||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||
0x00be: None,
|
||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00c3: None,
|
||||
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
0x00d0: None,
|
||||
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
||||
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
0x00e3: None,
|
||||
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||
0x00f0: None,
|
||||
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
||||
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-4.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
||||
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
|
@ -87,7 +87,7 @@ decoding_map = {
|
|||
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-5.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
|
@ -131,7 +131,7 @@ decoding_map = {
|
|||
0x00fd: 0x00a7, # SECTION SIGN
|
||||
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-6.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,11 +35,38 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: None,
|
||||
0x00a2: None,
|
||||
0x00a3: None,
|
||||
0x00a5: None,
|
||||
0x00a6: None,
|
||||
0x00a7: None,
|
||||
0x00a8: None,
|
||||
0x00a9: None,
|
||||
0x00aa: None,
|
||||
0x00ab: None,
|
||||
0x00ac: 0x060c, # ARABIC COMMA
|
||||
0x00ae: None,
|
||||
0x00af: None,
|
||||
0x00b0: None,
|
||||
0x00b1: None,
|
||||
0x00b2: None,
|
||||
0x00b3: None,
|
||||
0x00b4: None,
|
||||
0x00b5: None,
|
||||
0x00b6: None,
|
||||
0x00b7: None,
|
||||
0x00b8: None,
|
||||
0x00b9: None,
|
||||
0x00ba: None,
|
||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||
0x00bc: None,
|
||||
0x00bd: None,
|
||||
0x00be: None,
|
||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00c0: None,
|
||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
|
@ -66,6 +93,11 @@ decoding_map = {
|
|||
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
||||
0x00d9: 0x0639, # ARABIC LETTER AIN
|
||||
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
||||
0x00db: None,
|
||||
0x00dc: None,
|
||||
0x00dd: None,
|
||||
0x00de: None,
|
||||
0x00df: None,
|
||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||
0x00e1: 0x0641, # ARABIC LETTER FEH
|
||||
0x00e2: 0x0642, # ARABIC LETTER QAF
|
||||
|
@ -85,7 +117,20 @@ decoding_map = {
|
|||
0x00f0: 0x0650, # ARABIC KASRA
|
||||
0x00f1: 0x0651, # ARABIC SHADDA
|
||||
0x00f2: 0x0652, # ARABIC SUKUN
|
||||
}
|
||||
0x00f3: None,
|
||||
0x00f4: None,
|
||||
0x00f5: None,
|
||||
0x00f6: None,
|
||||
0x00f7: None,
|
||||
0x00f8: None,
|
||||
0x00f9: None,
|
||||
0x00fa: None,
|
||||
0x00fb: None,
|
||||
0x00fc: None,
|
||||
0x00fd: None,
|
||||
0x00fe: None,
|
||||
0x00ff: None,
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-7.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,10 +35,14 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00a4: None,
|
||||
0x00a5: None,
|
||||
0x00aa: None,
|
||||
0x00ae: None,
|
||||
0x00af: 0x2015, # HORIZONTAL BAR
|
||||
0x00b4: 0x0384, # GREEK TONOS
|
||||
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
|
@ -67,6 +71,7 @@ decoding_map = {
|
|||
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x00d2: None,
|
||||
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
|
@ -111,7 +116,8 @@ decoding_map = {
|
|||
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
}
|
||||
0x00ff: None,
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-8.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,11 +35,43 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: None,
|
||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00af: 0x203e, # OVERLINE
|
||||
0x00ba: 0x00f7, # DIVISION SIGN
|
||||
0x00bf: None,
|
||||
0x00c0: None,
|
||||
0x00c1: None,
|
||||
0x00c2: None,
|
||||
0x00c3: None,
|
||||
0x00c4: None,
|
||||
0x00c5: None,
|
||||
0x00c6: None,
|
||||
0x00c7: None,
|
||||
0x00c8: None,
|
||||
0x00c9: None,
|
||||
0x00ca: None,
|
||||
0x00cb: None,
|
||||
0x00cc: None,
|
||||
0x00cd: None,
|
||||
0x00ce: None,
|
||||
0x00cf: None,
|
||||
0x00d0: None,
|
||||
0x00d1: None,
|
||||
0x00d2: None,
|
||||
0x00d3: None,
|
||||
0x00d4: None,
|
||||
0x00d5: None,
|
||||
0x00d6: None,
|
||||
0x00d7: None,
|
||||
0x00d8: None,
|
||||
0x00d9: None,
|
||||
0x00da: None,
|
||||
0x00db: None,
|
||||
0x00dc: None,
|
||||
0x00dd: None,
|
||||
0x00de: None,
|
||||
0x00df: 0x2017, # DOUBLE LOW LINE
|
||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||
|
@ -68,7 +100,12 @@ decoding_map = {
|
|||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||
}
|
||||
0x00fb: None,
|
||||
0x00fc: None,
|
||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: None,
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from '8859-9.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,15 +35,15 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
|||
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
|||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ff: 0x00a4, # CURRENCY SIGN
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'GREEK.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
|||
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
|
@ -159,7 +159,7 @@ decoding_map = {
|
|||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
|||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
|||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
|
||||
|
||||
""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -35,8 +35,8 @@ def getregentry():
|
|||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
|||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
})
|
||||
|
||||
### Encoding Map
|
||||
|
||||
|
|
|
@ -494,14 +494,15 @@ for encoding in (
|
|||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866',
|
||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||||
'iso8859_2', 'iso8859_4', 'iso8859_5',
|
||||
'iso8859_9', 'koi8_r', 'latin_1',
|
||||
'mac_cyrillic', 'mac_latin2',
|
||||
|
||||
### These have undefined mappings:
|
||||
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||
#'cp1256', 'cp1257', 'cp1258',
|
||||
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||
#'iso8859_3', 'iso8859_6', 'iso8859_7',
|
||||
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||
|
||||
### These fail the round-trip:
|
||||
|
|
|
@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
|||
Py_DECREF(w);
|
||||
if (x == NULL) {
|
||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||
/* No mapping found: default to Latin-1 mapping */
|
||||
/* No mapping found means: mapping is undefined. */
|
||||
PyErr_Clear();
|
||||
*p++ = (Py_UNICODE)ch;
|
||||
continue;
|
||||
}
|
||||
x = Py_None;
|
||||
Py_INCREF(x);
|
||||
} else
|
||||
goto onError;
|
||||
}
|
||||
|
||||
|
@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
|||
Py_DECREF(w);
|
||||
if (x == NULL) {
|
||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||
/* No mapping found: default to Latin-1 mapping if possible */
|
||||
/* No mapping found means: mapping is undefined. */
|
||||
PyErr_Clear();
|
||||
if (ch < 256) {
|
||||
*s++ = (char)ch;
|
||||
continue;
|
||||
}
|
||||
else if (!charmap_encoding_error(&p, &s, errors,
|
||||
"missing character mapping"))
|
||||
continue;
|
||||
}
|
||||
x = Py_None;
|
||||
Py_INCREF(x);
|
||||
} else
|
||||
goto onError;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
""" Unicode Mapping Parser and Codec Generator.
|
||||
|
||||
This script parses Unicode mapping files as available from the Unicode
|
||||
site (ftp.unicode.org) and creates Python codec modules from them. The
|
||||
codecs use the standard character mapping codec to actually apply the
|
||||
mapping.
|
||||
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
|
||||
modules from them. The codecs use the standard character mapping codec
|
||||
to actually apply the mapping.
|
||||
|
||||
Synopsis: gencodec.py dir codec_prefix
|
||||
|
||||
|
@ -18,6 +18,7 @@ same location (with .mapping extension).
|
|||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright Guido van Rossum, 2000.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -70,6 +71,10 @@ def readmap(filename,
|
|||
lines = f.readlines()
|
||||
f.close()
|
||||
enc2uni = {}
|
||||
identity = []
|
||||
unmapped = range(256)
|
||||
for i in range(256):
|
||||
unmapped[i] = i
|
||||
for line in lines:
|
||||
line = strip(line)
|
||||
if not line or line[0] == '#':
|
||||
|
@ -85,8 +90,22 @@ def readmap(filename,
|
|||
comment = ''
|
||||
else:
|
||||
comment = comment[1:]
|
||||
if enc != uni:
|
||||
if enc < 256:
|
||||
unmapped.remove(enc)
|
||||
if enc == uni:
|
||||
identity.append(enc)
|
||||
else:
|
||||
enc2uni[enc] = (uni,comment)
|
||||
else:
|
||||
enc2uni[enc] = (uni,comment)
|
||||
# If there are more identity-mapped entries than unmapped entries,
|
||||
# it pays to generate an identity dictionary first, add add explicit
|
||||
# mappings to None for the rest
|
||||
if len(identity)>=len(unmapped):
|
||||
for enc in unmapped:
|
||||
enc2uni[enc] = (None, "")
|
||||
enc2uni['IDENTITY'] = 256
|
||||
|
||||
return enc2uni
|
||||
|
||||
def hexrepr(t,
|
||||
|
@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
|
|||
"""
|
||||
l = [
|
||||
'''\
|
||||
""" Python Character Mapping Codec generated from '%s'.
|
||||
""" Python Character Mapping Codec generated from '%s' with gencodec.py.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
(c) Copyright 2000 Guido van Rossum.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -178,15 +198,23 @@ def getregentry():
|
|||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
''' % name,
|
||||
]
|
||||
|
||||
if map.has_key("IDENTITY"):
|
||||
l.append("decoding_map = codecs.make_identity_dict(range(%d))"
|
||||
% map["IDENTITY"])
|
||||
l.append("decoding_map.update({")
|
||||
splits = 1
|
||||
del map["IDENTITY"]
|
||||
else:
|
||||
l.append("decoding_map = {")
|
||||
splits = 0
|
||||
|
||||
mappings = map.items()
|
||||
mappings.sort()
|
||||
append = l.append
|
||||
i = 0
|
||||
splits = 0
|
||||
for e,value in mappings:
|
||||
try:
|
||||
(u,c) = value
|
||||
|
@ -198,7 +226,7 @@ decoding_map = {
|
|||
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
|
||||
else:
|
||||
append('\t%s: %s,' % (key,unicoderepr(u)))
|
||||
i = i + 1
|
||||
i += 1
|
||||
if i == 4096:
|
||||
# Split the definition into parts to that the Python
|
||||
# parser doesn't dump core
|
||||
|
@ -206,7 +234,7 @@ decoding_map = {
|
|||
append('}')
|
||||
else:
|
||||
append('})')
|
||||
append('map.update({')
|
||||
append('decoding_map.update({')
|
||||
i = 0
|
||||
splits = splits + 1
|
||||
if splits == 0:
|
||||
|
@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
|
|||
|
||||
mapnames = os.listdir(dir)
|
||||
for mapname in mapnames:
|
||||
if mapname[-len('.mapping'):] != '.mapping':
|
||||
if not mapname.endswith('.mapping'):
|
||||
continue
|
||||
codefile = mapname[:-len('.mapping')] + '.py'
|
||||
print 'converting %s to %s' % (mapname,
|
||||
|
|
Loading…
Reference in New Issue