This patch changes the default behaviour of the builtin charmap
codec to not apply Latin-1 mappings for keys which are not found in the mapping dictionaries, but instead treat them as undefined mappings. The patch was originally written by Martin v. Loewis with some additional (cosmetic) changes and an updated test script by Marc-Andre Lemburg. The standard codecs were recreated from the most current files available at the Unicode.org site using the Tools/scripts/gencodec.py tool. This patch closes the bugs #116285 and #119960.
This commit is contained in:
parent
b55b7bb3ab
commit
a866df806d
|
@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
|
||||||
sr.file_encoding = file_encoding
|
sr.file_encoding = file_encoding
|
||||||
return sr
|
return sr
|
||||||
|
|
||||||
|
### Helpers for charmap-based codecs
|
||||||
|
|
||||||
|
def make_identity_dict(rng):
|
||||||
|
|
||||||
|
""" make_identity_dict(rng) -> dict
|
||||||
|
|
||||||
|
Return a dictionary where elements of the rng sequence are
|
||||||
|
mapped to themselves.
|
||||||
|
|
||||||
|
"""
|
||||||
|
res = {}
|
||||||
|
for i in rng:
|
||||||
|
res[i]=i
|
||||||
|
return res
|
||||||
|
|
||||||
### Tests
|
### Tests
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP037.TXT'.
|
""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1006.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||||
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||||
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||||
|
@ -131,7 +131,7 @@ decoding_map = {
|
||||||
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
||||||
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
||||||
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1026.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1250.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -116,7 +116,7 @@ decoding_map = {
|
||||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1251.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -150,7 +150,7 @@ decoding_map = {
|
||||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||||
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1252.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -69,7 +69,7 @@ decoding_map = {
|
||||||
0x009d: None, # UNDEFINED
|
0x009d: None, # UNDEFINED
|
||||||
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1253.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -144,7 +144,7 @@ decoding_map = {
|
||||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1254.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -75,7 +75,7 @@ decoding_map = {
|
||||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1255.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -136,7 +136,7 @@ decoding_map = {
|
||||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1256.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: 0x067e, # ARABIC LETTER PEH
|
0x0081: 0x067e, # ARABIC LETTER PEH
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -122,7 +122,7 @@ decoding_map = {
|
||||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1257.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -124,7 +124,7 @@ decoding_map = {
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP1258.TXT'.
|
""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||||
|
@ -83,7 +83,7 @@ decoding_map = {
|
||||||
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
||||||
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
||||||
0x00fe: 0x20ab, # DONG SIGN
|
0x00fe: 0x20ab, # DONG SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP424.TXT'.
|
""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # SELECT
|
0x0004: 0x009c, # SELECT
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # REQUIRED NEW LINE
|
0x0006: 0x0086, # REQUIRED NEW LINE
|
||||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
||||||
0x00fd: None, # UNDEFINED
|
0x00fd: None, # UNDEFINED
|
||||||
0x00fe: None, # UNDEFINED
|
0x00fe: None, # UNDEFINED
|
||||||
0x00ff: 0x009f, # EIGHT ONES
|
0x00ff: 0x009f, # EIGHT ONES
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP437.TXT'.
|
""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP500.TXT'.
|
""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
@ -273,7 +273,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP737.TXT'.
|
""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||||
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||||
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP775.TXT'.
|
""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP850.TXT'.
|
""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP852.TXT'.
|
""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP855.TXT'.
|
""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||||
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00a7, # SECTION SIGN
|
0x00fd: 0x00a7, # SECTION SIGN
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP856.TXT'.
|
""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||||
|
@ -120,10 +120,10 @@ decoding_map = {
|
||||||
0x00d0: None, # UNDEFINED
|
0x00d0: None, # UNDEFINED
|
||||||
0x00d1: None, # UNDEFINED
|
0x00d1: None, # UNDEFINED
|
||||||
0x00d2: None, # UNDEFINED
|
0x00d2: None, # UNDEFINED
|
||||||
0x00d3: None, # UNDEFINED
|
0x00d3: None, # UNDEFINEDS
|
||||||
0x00d4: None, # UNDEFINED
|
0x00d4: None, # UNDEFINED
|
||||||
0x00d5: None, # UNDEFINED
|
0x00d5: None, # UNDEFINED
|
||||||
0x00d6: None, # UNDEFINED
|
0x00d6: None, # UNDEFINEDE
|
||||||
0x00d7: None, # UNDEFINED
|
0x00d7: None, # UNDEFINED
|
||||||
0x00d8: None, # UNDEFINED
|
0x00d8: None, # UNDEFINED
|
||||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP857.TXT'.
|
""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -164,7 +164,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP860.TXT'.
|
""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP861.TXT'.
|
""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP862.TXT'.
|
""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP863.TXT'.
|
""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP864.TXT'.
|
""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
||||||
0x0080: 0x00b0, # DEGREE SIGN
|
0x0080: 0x00b0, # DEGREE SIGN
|
||||||
0x0081: 0x00b7, # MIDDLE DOT
|
0x0081: 0x00b7, # MIDDLE DOT
|
||||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
||||||
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP865.TXT'.
|
""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP866.TXT'.
|
""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00a4, # CURRENCY SIGN
|
0x00fd: 0x00a4, # CURRENCY SIGN
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP869.TXT'.
|
""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: None, # UNDEFINED
|
0x0080: None, # UNDEFINED
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: None, # UNDEFINED
|
0x0082: None, # UNDEFINED
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
0x00fe: 0x25a0, # BLACK SQUARE
|
0x00fe: 0x25a0, # BLACK SQUARE
|
||||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP874.TXT'.
|
""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x20ac, # EURO SIGN
|
0x0080: 0x20ac, # EURO SIGN
|
||||||
0x0081: None, # UNDEFINED
|
0x0081: None, # UNDEFINED
|
||||||
0x0082: None, # UNDEFINED
|
0x0082: None, # UNDEFINED
|
||||||
|
@ -164,7 +164,7 @@ decoding_map = {
|
||||||
0x00fd: None, # UNDEFINED
|
0x00fd: None, # UNDEFINED
|
||||||
0x00fe: None, # UNDEFINED
|
0x00fe: None, # UNDEFINED
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CP875.TXT'.
|
""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0004: 0x009c, # CONTROL
|
0x0004: 0x009c, # CONTROL
|
||||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||||
0x0006: 0x0086, # CONTROL
|
0x0006: 0x0086, # CONTROL
|
||||||
|
@ -274,7 +274,7 @@ decoding_map = {
|
||||||
0x00fd: 0x001a, # SUBSTITUTE
|
0x00fd: 0x001a, # SUBSTITUTE
|
||||||
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||||
0x00ff: 0x009f, # CONTROL
|
0x00ff: 0x009f, # CONTROL
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-1.TXT'.
|
""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -22,10 +22,7 @@ class Codec(codecs.Codec):
|
||||||
return codecs.charmap_decode(input,errors,decoding_map)
|
return codecs.charmap_decode(input,errors,decoding_map)
|
||||||
|
|
||||||
class StreamWriter(Codec,codecs.StreamWriter):
|
class StreamWriter(Codec,codecs.StreamWriter):
|
||||||
|
pass
|
||||||
def __init__(self,stream,errors='strict'):
|
|
||||||
|
|
||||||
codecs.StreamWriter.__init__(self,strict,errors)
|
|
||||||
|
|
||||||
class StreamReader(Codec,codecs.StreamReader):
|
class StreamReader(Codec,codecs.StreamReader):
|
||||||
pass
|
pass
|
||||||
|
@ -38,9 +35,9 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-10.TXT'.
|
""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||||
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
|
@ -83,7 +83,7 @@ decoding_map = {
|
||||||
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||||
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||||
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-13.TXT'.
|
""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||||
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||||
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||||
|
@ -93,7 +93,7 @@ decoding_map = {
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||||
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-14.TXT'.
|
""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||||
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||||
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
|
@ -68,7 +68,7 @@ decoding_map = {
|
||||||
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||||
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||||
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-15.TXT'.
|
""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a4: 0x20ac, # EURO SIGN
|
0x00a4: 0x20ac, # EURO SIGN
|
||||||
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||||
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||||
|
@ -45,7 +45,7 @@ decoding_map = {
|
||||||
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||||
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
||||||
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-2.TXT'.
|
""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x02d8, # BREVE
|
0x00a2: 0x02d8, # BREVE
|
||||||
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||||
|
@ -94,7 +94,7 @@ decoding_map = {
|
||||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-3.TXT'.
|
""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,15 +35,17 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
||||||
0x00a2: 0x02d8, # BREVE
|
0x00a2: 0x02d8, # BREVE
|
||||||
|
0x00a5: None,
|
||||||
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||||
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x00ae: None,
|
||||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||||
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
||||||
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||||
|
@ -51,21 +53,26 @@ decoding_map = {
|
||||||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||||
|
0x00be: None,
|
||||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
|
0x00c3: None,
|
||||||
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x00d0: None,
|
||||||
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||||
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||||
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
||||||
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||||
|
0x00e3: None,
|
||||||
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||||
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||||
|
0x00f0: None,
|
||||||
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||||
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||||
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
||||||
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-4.TXT'.
|
""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
||||||
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||||
|
@ -87,7 +87,7 @@ decoding_map = {
|
||||||
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||||
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||||
0x00ff: 0x02d9, # DOT ABOVE
|
0x00ff: 0x02d9, # DOT ABOVE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-5.TXT'.
|
""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||||
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||||
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||||
|
@ -131,7 +131,7 @@ decoding_map = {
|
||||||
0x00fd: 0x00a7, # SECTION SIGN
|
0x00fd: 0x00a7, # SECTION SIGN
|
||||||
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||||
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-6.TXT'.
|
""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,11 +35,38 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
|
0x00a1: None,
|
||||||
|
0x00a2: None,
|
||||||
|
0x00a3: None,
|
||||||
|
0x00a5: None,
|
||||||
|
0x00a6: None,
|
||||||
|
0x00a7: None,
|
||||||
|
0x00a8: None,
|
||||||
|
0x00a9: None,
|
||||||
|
0x00aa: None,
|
||||||
|
0x00ab: None,
|
||||||
0x00ac: 0x060c, # ARABIC COMMA
|
0x00ac: 0x060c, # ARABIC COMMA
|
||||||
|
0x00ae: None,
|
||||||
|
0x00af: None,
|
||||||
|
0x00b0: None,
|
||||||
|
0x00b1: None,
|
||||||
|
0x00b2: None,
|
||||||
|
0x00b3: None,
|
||||||
|
0x00b4: None,
|
||||||
|
0x00b5: None,
|
||||||
|
0x00b6: None,
|
||||||
|
0x00b7: None,
|
||||||
|
0x00b8: None,
|
||||||
|
0x00b9: None,
|
||||||
|
0x00ba: None,
|
||||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||||
|
0x00bc: None,
|
||||||
|
0x00bd: None,
|
||||||
|
0x00be: None,
|
||||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||||
|
0x00c0: None,
|
||||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||||
|
@ -66,6 +93,11 @@ decoding_map = {
|
||||||
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
||||||
0x00d9: 0x0639, # ARABIC LETTER AIN
|
0x00d9: 0x0639, # ARABIC LETTER AIN
|
||||||
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
||||||
|
0x00db: None,
|
||||||
|
0x00dc: None,
|
||||||
|
0x00dd: None,
|
||||||
|
0x00de: None,
|
||||||
|
0x00df: None,
|
||||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||||
0x00e1: 0x0641, # ARABIC LETTER FEH
|
0x00e1: 0x0641, # ARABIC LETTER FEH
|
||||||
0x00e2: 0x0642, # ARABIC LETTER QAF
|
0x00e2: 0x0642, # ARABIC LETTER QAF
|
||||||
|
@ -85,7 +117,20 @@ decoding_map = {
|
||||||
0x00f0: 0x0650, # ARABIC KASRA
|
0x00f0: 0x0650, # ARABIC KASRA
|
||||||
0x00f1: 0x0651, # ARABIC SHADDA
|
0x00f1: 0x0651, # ARABIC SHADDA
|
||||||
0x00f2: 0x0652, # ARABIC SUKUN
|
0x00f2: 0x0652, # ARABIC SUKUN
|
||||||
}
|
0x00f3: None,
|
||||||
|
0x00f4: None,
|
||||||
|
0x00f5: None,
|
||||||
|
0x00f6: None,
|
||||||
|
0x00f7: None,
|
||||||
|
0x00f8: None,
|
||||||
|
0x00f9: None,
|
||||||
|
0x00fa: None,
|
||||||
|
0x00fb: None,
|
||||||
|
0x00fc: None,
|
||||||
|
0x00fd: None,
|
||||||
|
0x00fe: None,
|
||||||
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-7.TXT'.
|
""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,10 +35,14 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||||
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||||
|
0x00a4: None,
|
||||||
|
0x00a5: None,
|
||||||
|
0x00aa: None,
|
||||||
|
0x00ae: None,
|
||||||
0x00af: 0x2015, # HORIZONTAL BAR
|
0x00af: 0x2015, # HORIZONTAL BAR
|
||||||
0x00b4: 0x0384, # GREEK TONOS
|
0x00b4: 0x0384, # GREEK TONOS
|
||||||
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
||||||
|
@ -67,6 +71,7 @@ decoding_map = {
|
||||||
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||||
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||||
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||||
|
0x00d2: None,
|
||||||
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||||
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||||
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||||
|
@ -111,7 +116,8 @@ decoding_map = {
|
||||||
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||||
}
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-8.TXT'.
|
""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,11 +35,43 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
|
0x00a1: None,
|
||||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||||
0x00af: 0x203e, # OVERLINE
|
|
||||||
0x00ba: 0x00f7, # DIVISION SIGN
|
0x00ba: 0x00f7, # DIVISION SIGN
|
||||||
|
0x00bf: None,
|
||||||
|
0x00c0: None,
|
||||||
|
0x00c1: None,
|
||||||
|
0x00c2: None,
|
||||||
|
0x00c3: None,
|
||||||
|
0x00c4: None,
|
||||||
|
0x00c5: None,
|
||||||
|
0x00c6: None,
|
||||||
|
0x00c7: None,
|
||||||
|
0x00c8: None,
|
||||||
|
0x00c9: None,
|
||||||
|
0x00ca: None,
|
||||||
|
0x00cb: None,
|
||||||
|
0x00cc: None,
|
||||||
|
0x00cd: None,
|
||||||
|
0x00ce: None,
|
||||||
|
0x00cf: None,
|
||||||
|
0x00d0: None,
|
||||||
|
0x00d1: None,
|
||||||
|
0x00d2: None,
|
||||||
|
0x00d3: None,
|
||||||
|
0x00d4: None,
|
||||||
|
0x00d5: None,
|
||||||
|
0x00d6: None,
|
||||||
|
0x00d7: None,
|
||||||
|
0x00d8: None,
|
||||||
|
0x00d9: None,
|
||||||
|
0x00da: None,
|
||||||
|
0x00db: None,
|
||||||
|
0x00dc: None,
|
||||||
|
0x00dd: None,
|
||||||
|
0x00de: None,
|
||||||
0x00df: 0x2017, # DOUBLE LOW LINE
|
0x00df: 0x2017, # DOUBLE LOW LINE
|
||||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||||
|
@ -68,7 +100,12 @@ decoding_map = {
|
||||||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||||
}
|
0x00fb: None,
|
||||||
|
0x00fc: None,
|
||||||
|
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||||
|
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||||
|
0x00ff: None,
|
||||||
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from '8859-9.TXT'.
|
""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,15 +35,15 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||||
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
|
""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||||
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||||
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||||
|
@ -165,7 +165,7 @@ decoding_map = {
|
||||||
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||||
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||||
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
|
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
||||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||||
0x00ff: 0x00a4, # CURRENCY SIGN
|
0x00ff: 0x00a4, # CURRENCY SIGN
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'GREEK.TXT'.
|
""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
||||||
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
||||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
||||||
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||||
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||||
0x00ff: None, # UNDEFINED
|
0x00ff: None, # UNDEFINED
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
|
""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
@ -159,7 +159,7 @@ decoding_map = {
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
|
""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||||
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||||
|
@ -163,7 +163,7 @@ decoding_map = {
|
||||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||||
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
|
""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
|
""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
|
||||||
|
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -35,8 +35,8 @@ def getregentry():
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
decoding_map = codecs.make_identity_dict(range(256))
|
||||||
|
decoding_map.update({
|
||||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
@ -160,7 +160,7 @@ decoding_map = {
|
||||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||||
0x00fe: 0x02db, # OGONEK
|
0x00fe: 0x02db, # OGONEK
|
||||||
0x00ff: 0x02c7, # CARON
|
0x00ff: 0x02c7, # CARON
|
||||||
}
|
})
|
||||||
|
|
||||||
### Encoding Map
|
### Encoding Map
|
||||||
|
|
||||||
|
|
|
@ -494,14 +494,15 @@ for encoding in (
|
||||||
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
|
||||||
'cp863', 'cp865', 'cp866',
|
'cp863', 'cp865', 'cp866',
|
||||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
'iso8859_2', 'iso8859_4', 'iso8859_5',
|
||||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
'iso8859_9', 'koi8_r', 'latin_1',
|
||||||
'mac_cyrillic', 'mac_latin2',
|
'mac_cyrillic', 'mac_latin2',
|
||||||
|
|
||||||
### These have undefined mappings:
|
### These have undefined mappings:
|
||||||
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||||
#'cp1256', 'cp1257', 'cp1258',
|
#'cp1256', 'cp1257', 'cp1258',
|
||||||
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
|
||||||
|
#'iso8859_3', 'iso8859_6', 'iso8859_7',
|
||||||
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
|
||||||
|
|
||||||
### These fail the round-trip:
|
### These fail the round-trip:
|
||||||
|
|
|
@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||||
/* No mapping found: default to Latin-1 mapping */
|
/* No mapping found means: mapping is undefined. */
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
*p++ = (Py_UNICODE)ch;
|
x = Py_None;
|
||||||
continue;
|
Py_INCREF(x);
|
||||||
}
|
} else
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
|
||||||
Py_DECREF(w);
|
Py_DECREF(w);
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
|
||||||
/* No mapping found: default to Latin-1 mapping if possible */
|
/* No mapping found means: mapping is undefined. */
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
if (ch < 256) {
|
x = Py_None;
|
||||||
*s++ = (char)ch;
|
Py_INCREF(x);
|
||||||
continue;
|
} else
|
||||||
}
|
|
||||||
else if (!charmap_encoding_error(&p, &s, errors,
|
|
||||||
"missing character mapping"))
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
""" Unicode Mapping Parser and Codec Generator.
|
""" Unicode Mapping Parser and Codec Generator.
|
||||||
|
|
||||||
This script parses Unicode mapping files as available from the Unicode
|
This script parses Unicode mapping files as available from the Unicode
|
||||||
site (ftp.unicode.org) and creates Python codec modules from them. The
|
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
|
||||||
codecs use the standard character mapping codec to actually apply the
|
modules from them. The codecs use the standard character mapping codec
|
||||||
mapping.
|
to actually apply the mapping.
|
||||||
|
|
||||||
Synopsis: gencodec.py dir codec_prefix
|
Synopsis: gencodec.py dir codec_prefix
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ same location (with .mapping extension).
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright Guido van Rossum, 2000.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -70,6 +71,10 @@ def readmap(filename,
|
||||||
lines = f.readlines()
|
lines = f.readlines()
|
||||||
f.close()
|
f.close()
|
||||||
enc2uni = {}
|
enc2uni = {}
|
||||||
|
identity = []
|
||||||
|
unmapped = range(256)
|
||||||
|
for i in range(256):
|
||||||
|
unmapped[i] = i
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = strip(line)
|
line = strip(line)
|
||||||
if not line or line[0] == '#':
|
if not line or line[0] == '#':
|
||||||
|
@ -85,8 +90,22 @@ def readmap(filename,
|
||||||
comment = ''
|
comment = ''
|
||||||
else:
|
else:
|
||||||
comment = comment[1:]
|
comment = comment[1:]
|
||||||
if enc != uni:
|
if enc < 256:
|
||||||
|
unmapped.remove(enc)
|
||||||
|
if enc == uni:
|
||||||
|
identity.append(enc)
|
||||||
|
else:
|
||||||
|
enc2uni[enc] = (uni,comment)
|
||||||
|
else:
|
||||||
enc2uni[enc] = (uni,comment)
|
enc2uni[enc] = (uni,comment)
|
||||||
|
# If there are more identity-mapped entries than unmapped entries,
|
||||||
|
# it pays to generate an identity dictionary first, add add explicit
|
||||||
|
# mappings to None for the rest
|
||||||
|
if len(identity)>=len(unmapped):
|
||||||
|
for enc in unmapped:
|
||||||
|
enc2uni[enc] = (None, "")
|
||||||
|
enc2uni['IDENTITY'] = 256
|
||||||
|
|
||||||
return enc2uni
|
return enc2uni
|
||||||
|
|
||||||
def hexrepr(t,
|
def hexrepr(t,
|
||||||
|
@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
|
||||||
"""
|
"""
|
||||||
l = [
|
l = [
|
||||||
'''\
|
'''\
|
||||||
""" Python Character Mapping Codec generated from '%s'.
|
""" Python Character Mapping Codec generated from '%s' with gencodec.py.
|
||||||
|
|
||||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||||
|
|
||||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||||
|
(c) Copyright 2000 Guido van Rossum.
|
||||||
|
|
||||||
"""#"
|
"""#"
|
||||||
|
|
||||||
|
@ -178,15 +198,23 @@ def getregentry():
|
||||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||||
|
|
||||||
### Decoding Map
|
### Decoding Map
|
||||||
|
|
||||||
decoding_map = {
|
|
||||||
''' % name,
|
''' % name,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if map.has_key("IDENTITY"):
|
||||||
|
l.append("decoding_map = codecs.make_identity_dict(range(%d))"
|
||||||
|
% map["IDENTITY"])
|
||||||
|
l.append("decoding_map.update({")
|
||||||
|
splits = 1
|
||||||
|
del map["IDENTITY"]
|
||||||
|
else:
|
||||||
|
l.append("decoding_map = {")
|
||||||
|
splits = 0
|
||||||
|
|
||||||
mappings = map.items()
|
mappings = map.items()
|
||||||
mappings.sort()
|
mappings.sort()
|
||||||
append = l.append
|
append = l.append
|
||||||
i = 0
|
i = 0
|
||||||
splits = 0
|
|
||||||
for e,value in mappings:
|
for e,value in mappings:
|
||||||
try:
|
try:
|
||||||
(u,c) = value
|
(u,c) = value
|
||||||
|
@ -198,7 +226,7 @@ decoding_map = {
|
||||||
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
|
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
|
||||||
else:
|
else:
|
||||||
append('\t%s: %s,' % (key,unicoderepr(u)))
|
append('\t%s: %s,' % (key,unicoderepr(u)))
|
||||||
i = i + 1
|
i += 1
|
||||||
if i == 4096:
|
if i == 4096:
|
||||||
# Split the definition into parts to that the Python
|
# Split the definition into parts to that the Python
|
||||||
# parser doesn't dump core
|
# parser doesn't dump core
|
||||||
|
@ -206,7 +234,7 @@ decoding_map = {
|
||||||
append('}')
|
append('}')
|
||||||
else:
|
else:
|
||||||
append('})')
|
append('})')
|
||||||
append('map.update({')
|
append('decoding_map.update({')
|
||||||
i = 0
|
i = 0
|
||||||
splits = splits + 1
|
splits = splits + 1
|
||||||
if splits == 0:
|
if splits == 0:
|
||||||
|
@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
|
||||||
|
|
||||||
mapnames = os.listdir(dir)
|
mapnames = os.listdir(dir)
|
||||||
for mapname in mapnames:
|
for mapname in mapnames:
|
||||||
if mapname[-len('.mapping'):] != '.mapping':
|
if not mapname.endswith('.mapping'):
|
||||||
continue
|
continue
|
||||||
codefile = mapname[:-len('.mapping')] + '.py'
|
codefile = mapname[:-len('.mapping')] + '.py'
|
||||||
print 'converting %s to %s' % (mapname,
|
print 'converting %s to %s' % (mapname,
|
||||||
|
|
Loading…
Reference in New Issue