From a866df806dd0ffd439bbba873ab9f3da7080e0a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Wed, 3 Jan 2001 21:29:14 +0000 Subject: [PATCH] This patch changes the default behaviour of the builtin charmap codec to not apply Latin-1 mappings for keys which are not found in the mapping dictionaries, but instead treat them as undefined mappings. The patch was originally written by Martin v. Loewis with some additional (cosmetic) changes and an updated test script by Marc-Andre Lemburg. The standard codecs were recreated from the most current files available at the Unicode.org site using the Tools/scripts/gencodec.py tool. This patch closes the bugs #116285 and #119960. --- Lib/codecs.py | 15 ++++++++++ Lib/encodings/cp037.py | 10 +++---- Lib/encodings/cp1006.py | 10 +++---- Lib/encodings/cp1026.py | 10 +++---- Lib/encodings/cp1250.py | 10 +++---- Lib/encodings/cp1251.py | 10 +++---- Lib/encodings/cp1252.py | 10 +++---- Lib/encodings/cp1253.py | 10 +++---- Lib/encodings/cp1254.py | 10 +++---- Lib/encodings/cp1255.py | 10 +++---- Lib/encodings/cp1256.py | 10 +++---- Lib/encodings/cp1257.py | 10 +++---- Lib/encodings/cp1258.py | 10 +++---- Lib/encodings/cp424.py | 10 +++---- Lib/encodings/cp437.py | 10 +++---- Lib/encodings/cp500.py | 10 +++---- Lib/encodings/cp737.py | 10 +++---- Lib/encodings/cp775.py | 10 +++---- Lib/encodings/cp850.py | 10 +++---- Lib/encodings/cp852.py | 10 +++---- Lib/encodings/cp855.py | 10 +++---- Lib/encodings/cp856.py | 14 ++++----- Lib/encodings/cp857.py | 10 +++---- Lib/encodings/cp860.py | 10 +++---- Lib/encodings/cp861.py | 10 +++---- Lib/encodings/cp862.py | 10 +++---- Lib/encodings/cp863.py | 10 +++---- Lib/encodings/cp864.py | 10 +++---- Lib/encodings/cp865.py | 10 +++---- Lib/encodings/cp866.py | 10 +++---- Lib/encodings/cp869.py | 10 +++---- Lib/encodings/cp874.py | 10 +++---- Lib/encodings/cp875.py | 10 +++---- Lib/encodings/iso8859_1.py | 15 ++++------ Lib/encodings/iso8859_10.py | 10 +++---- Lib/encodings/iso8859_13.py | 10 +++---- Lib/encodings/iso8859_14.py | 10 +++---- Lib/encodings/iso8859_15.py | 10 +++---- Lib/encodings/iso8859_2.py | 10 +++---- Lib/encodings/iso8859_3.py | 17 +++++++---- Lib/encodings/iso8859_4.py | 10 +++---- Lib/encodings/iso8859_5.py | 10 +++---- Lib/encodings/iso8859_6.py | 55 +++++++++++++++++++++++++++++++---- Lib/encodings/iso8859_7.py | 16 ++++++---- Lib/encodings/iso8859_8.py | 49 +++++++++++++++++++++++++++---- Lib/encodings/iso8859_9.py | 10 +++---- Lib/encodings/koi8_r.py | 10 +++---- Lib/encodings/mac_cyrillic.py | 10 +++---- Lib/encodings/mac_greek.py | 10 +++---- Lib/encodings/mac_iceland.py | 10 +++---- Lib/encodings/mac_latin2.py | 10 +++---- Lib/encodings/mac_roman.py | 10 +++---- Lib/encodings/mac_turkish.py | 10 +++---- Lib/test/test_unicode.py | 5 ++-- Objects/unicodeobject.c | 21 +++++-------- Tools/scripts/gencodec.py | 50 ++++++++++++++++++++++++------- 56 files changed, 424 insertions(+), 293 deletions(-) diff --git a/Lib/codecs.py b/Lib/codecs.py index fca0f8e287c..993113752ef 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): sr.file_encoding = file_encoding return sr +### Helpers for charmap-based codecs + +def make_identity_dict(rng): + + """ make_identity_dict(rng) -> dict + + Return a dictionary where elements of the rng sequence are + mapped to themselves. + + """ + res = {} + for i in rng: + res[i]=i + return res + ### Tests if __name__ == '__main__': diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py index d60504ca0fd..5868372b077 100644 --- a/Lib/encodings/cp037.py +++ b/Lib/encodings/cp037.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP037.TXT'. - +""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py index 991feed9c7c..593fbb601cc 100644 --- a/Lib/encodings/cp1006.py +++ b/Lib/encodings/cp1006.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1006.TXT'. - +""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO 0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE 0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO @@ -131,7 +131,7 @@ decoding_map = { 0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM 0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM 0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1026.py b/Lib/encodings/cp1026.py index ae8086fc37b..3796a75c093 100644 --- a/Lib/encodings/cp1026.py +++ b/Lib/encodings/cp1026.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1026.TXT'. - +""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py index d1276c4c6ed..03a3e3177d8 100644 --- a/Lib/encodings/cp1250.py +++ b/Lib/encodings/cp1250.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1250.TXT'. - +""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -116,7 +116,7 @@ decoding_map = { 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py index 42921e46534..e27a122c382 100644 --- a/Lib/encodings/cp1251.py +++ b/Lib/encodings/cp1251.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1251.TXT'. - +""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -150,7 +150,7 @@ decoding_map = { 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py index 07a5358366d..5d7bdd63969 100644 --- a/Lib/encodings/cp1252.py +++ b/Lib/encodings/cp1252.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1252.TXT'. - +""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -69,7 +69,7 @@ decoding_map = { 0x009d: None, # UNDEFINED 0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py index c84808a254a..abc144cc04d 100644 --- a/Lib/encodings/cp1253.py +++ b/Lib/encodings/cp1253.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1253.TXT'. - +""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -144,7 +144,7 @@ decoding_map = { 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py index 9897ecf602f..4a2ab3caf47 100644 --- a/Lib/encodings/cp1254.py +++ b/Lib/encodings/cp1254.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1254.TXT'. - +""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -75,7 +75,7 @@ decoding_map = { 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py index 5404b46e4a7..c987b85f45e 100644 --- a/Lib/encodings/cp1255.py +++ b/Lib/encodings/cp1255.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1255.TXT'. - +""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -136,7 +136,7 @@ decoding_map = { 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py index 6bb02dda214..d72c5bcc9bb 100644 --- a/Lib/encodings/cp1256.py +++ b/Lib/encodings/cp1256.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1256.TXT'. - +""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: 0x067e, # ARABIC LETTER PEH 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -122,7 +122,7 @@ decoding_map = { 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py index ded826c92ca..d17a9042170 100644 --- a/Lib/encodings/cp1257.py +++ b/Lib/encodings/cp1257.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1257.TXT'. - +""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -124,7 +124,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py index 955253cf913..597f12438c9 100644 --- a/Lib/encodings/cp1258.py +++ b/Lib/encodings/cp1258.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP1258.TXT'. - +""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK @@ -83,7 +83,7 @@ decoding_map = { 0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN 0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN 0x00fe: 0x20ab, # DONG SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py index c4abaecdfb0..bc10379808d 100644 --- a/Lib/encodings/cp424.py +++ b/Lib/encodings/cp424.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP424.TXT'. - +""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # SELECT 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # REQUIRED NEW LINE @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: None, # UNDEFINED 0x00fe: None, # UNDEFINED 0x00ff: 0x009f, # EIGHT ONES -} +}) ### Encoding Map diff --git a/Lib/encodings/cp437.py b/Lib/encodings/cp437.py index ca7d90ea52e..db1b88a5679 100644 --- a/Lib/encodings/cp437.py +++ b/Lib/encodings/cp437.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP437.TXT'. - +""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py index 33d6fedb464..1c8fb57e174 100644 --- a/Lib/encodings/cp500.py +++ b/Lib/encodings/cp500.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP500.TXT'. - +""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -273,7 +273,7 @@ decoding_map = { 0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE 0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/cp737.py b/Lib/encodings/cp737.py index e55b3dd6b42..03665aea7d1 100644 --- a/Lib/encodings/cp737.py +++ b/Lib/encodings/cp737.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP737.TXT'. - +""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp775.py b/Lib/encodings/cp775.py index e43ce2d1bab..b38ccb5fe3e 100644 --- a/Lib/encodings/cp775.py +++ b/Lib/encodings/cp775.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP775.TXT'. - +""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp850.py b/Lib/encodings/cp850.py index cb0918c96ba..e26287b7f3b 100644 --- a/Lib/encodings/cp850.py +++ b/Lib/encodings/cp850.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP850.TXT'. - +""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp852.py b/Lib/encodings/cp852.py index ba4f14219af..431d8448f7e 100644 --- a/Lib/encodings/cp852.py +++ b/Lib/encodings/cp852.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP852.TXT'. - +""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp855.py b/Lib/encodings/cp855.py index c967bcf17c6..c9e71687bde 100644 --- a/Lib/encodings/cp855.py +++ b/Lib/encodings/cp855.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP855.TXT'. - +""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00a7, # SECTION SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py index f384acbf2d2..cc2e01f45af 100644 --- a/Lib/encodings/cp856.py +++ b/Lib/encodings/cp856.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP856.TXT'. - +""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL @@ -120,10 +120,10 @@ decoding_map = { 0x00d0: None, # UNDEFINED 0x00d1: None, # UNDEFINED 0x00d2: None, # UNDEFINED - 0x00d3: None, # UNDEFINED + 0x00d3: None, # UNDEFINEDS 0x00d4: None, # UNDEFINED 0x00d5: None, # UNDEFINED - 0x00d6: None, # UNDEFINED + 0x00d6: None, # UNDEFINEDE 0x00d7: None, # UNDEFINED 0x00d8: None, # UNDEFINED 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp857.py b/Lib/encodings/cp857.py index 49cc68529a1..6f4df23a568 100644 --- a/Lib/encodings/cp857.py +++ b/Lib/encodings/cp857.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP857.TXT'. - +""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -164,7 +164,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp860.py b/Lib/encodings/cp860.py index 3b9a15d297c..057d91870f3 100644 --- a/Lib/encodings/cp860.py +++ b/Lib/encodings/cp860.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP860.TXT'. - +""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp861.py b/Lib/encodings/cp861.py index 3f07fbac41d..8db3b40b243 100644 --- a/Lib/encodings/cp861.py +++ b/Lib/encodings/cp861.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP861.TXT'. - +""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp862.py b/Lib/encodings/cp862.py index 4bc1cbed336..1cac3e278a5 100644 --- a/Lib/encodings/cp862.py +++ b/Lib/encodings/cp862.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP862.TXT'. - +""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x05d0, # HEBREW LETTER ALEF 0x0081: 0x05d1, # HEBREW LETTER BET 0x0082: 0x05d2, # HEBREW LETTER GIMEL @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp863.py b/Lib/encodings/cp863.py index 3e6103f6ca9..ecdc391f74f 100644 --- a/Lib/encodings/cp863.py +++ b/Lib/encodings/cp863.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP863.TXT'. - +""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp864.py b/Lib/encodings/cp864.py index 819327836e6..861fb00111e 100644 --- a/Lib/encodings/cp864.py +++ b/Lib/encodings/cp864.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP864.TXT'. - +""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0025: 0x066a, # ARABIC PERCENT SIGN 0x0080: 0x00b0, # DEGREE SIGN 0x0081: 0x00b7, # MIDDLE DOT @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp865.py b/Lib/encodings/cp865.py index eaed7a9f9ff..4d9010df8fb 100644 --- a/Lib/encodings/cp865.py +++ b/Lib/encodings/cp865.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP865.TXT'. - +""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00b2, # SUPERSCRIPT TWO 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py index 25e1a50c12f..6a8b0b07520 100644 --- a/Lib/encodings/cp866.py +++ b/Lib/encodings/cp866.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP866.TXT'. - +""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x00a4, # CURRENCY SIGN 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp869.py b/Lib/encodings/cp869.py index 840335f0a8a..65d2b2e1d09 100644 --- a/Lib/encodings/cp869.py +++ b/Lib/encodings/cp869.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP869.TXT'. - +""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: None, # UNDEFINED 0x0081: None, # UNDEFINED 0x0082: None, # UNDEFINED @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS 0x00fe: 0x25a0, # BLACK SQUARE 0x00ff: 0x00a0, # NO-BREAK SPACE -} +}) ### Encoding Map diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py index 0231c7ac5bb..31f4d3d6d4c 100644 --- a/Lib/encodings/cp874.py +++ b/Lib/encodings/cp874.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP874.TXT'. - +""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x20ac, # EURO SIGN 0x0081: None, # UNDEFINED 0x0082: None, # UNDEFINED @@ -164,7 +164,7 @@ decoding_map = { 0x00fd: None, # UNDEFINED 0x00fe: None, # UNDEFINED 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/cp875.py b/Lib/encodings/cp875.py index 924c0a0a269..3500446930a 100644 --- a/Lib/encodings/cp875.py +++ b/Lib/encodings/cp875.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CP875.TXT'. - +""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0004: 0x009c, # CONTROL 0x0005: 0x0009, # HORIZONTAL TABULATION 0x0006: 0x0086, # CONTROL @@ -274,7 +274,7 @@ decoding_map = { 0x00fd: 0x001a, # SUBSTITUTE 0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ff: 0x009f, # CONTROL -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py index 7355853d745..f4c0bf750f2 100644 --- a/Lib/encodings/iso8859_1.py +++ b/Lib/encodings/iso8859_1.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-1.TXT'. - +""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -22,10 +22,7 @@ class Codec(codecs.Codec): return codecs.charmap_decode(input,errors,decoding_map) class StreamWriter(Codec,codecs.StreamWriter): - - def __init__(self,stream,errors='strict'): - - codecs.StreamWriter.__init__(self,strict,errors) + pass class StreamReader(Codec,codecs.StreamReader): pass @@ -38,9 +35,9 @@ def getregentry(): ### Decoding Map -decoding_map = { - -} +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py index 96b435cc299..c43c65306a2 100644 --- a/Lib/encodings/iso8859_10.py +++ b/Lib/encodings/iso8859_10.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-10.TXT'. - +""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON 0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA @@ -83,7 +83,7 @@ decoding_map = { 0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK 0x00ff: 0x0138, # LATIN SMALL LETTER KRA -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py index d8b223005a5..2ab52927ec3 100644 --- a/Lib/encodings/iso8859_13.py +++ b/Lib/encodings/iso8859_13.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-13.TXT'. - +""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK 0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK 0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE @@ -93,7 +93,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON 0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py index 8ee0aa9ded4..5533e9617a9 100644 --- a/Lib/encodings/iso8859_14.py +++ b/Lib/encodings/iso8859_14.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-14.TXT'. - +""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE 0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE 0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE @@ -68,7 +68,7 @@ decoding_map = { 0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX 0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE 0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py index 862ff28cef5..7bffff42b88 100644 --- a/Lib/encodings/iso8859_15.py +++ b/Lib/encodings/iso8859_15.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-15.TXT'. - +""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a4: 0x20ac, # EURO SIGN 0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON 0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON @@ -45,7 +45,7 @@ decoding_map = { 0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE 0x00bd: 0x0153, # LATIN SMALL LIGATURE OE 0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py index 034001a0564..481f9a0a6cb 100644 --- a/Lib/encodings/iso8859_2.py +++ b/Lib/encodings/iso8859_2.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-2.TXT'. - +""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x02d8, # BREVE 0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE @@ -94,7 +94,7 @@ decoding_map = { 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE 0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py index f262767c2d3..c2820ad6add 100644 --- a/Lib/encodings/iso8859_3.py +++ b/Lib/encodings/iso8859_3.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-3.TXT'. - +""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,15 +35,17 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE 0x00a2: 0x02d8, # BREVE + 0x00a5: None, 0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x00ae: None, 0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE 0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE 0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX @@ -51,21 +53,26 @@ decoding_map = { 0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA 0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX + 0x00be: None, 0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE + 0x00c3: None, 0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE 0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x00d0: None, 0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE 0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x00e3: None, 0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE 0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX + 0x00f0: None, 0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE 0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX 0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE 0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py index 29f9fd348c4..30d6ca6805d 100644 --- a/Lib/encodings/iso8859_4.py +++ b/Lib/encodings/iso8859_4.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-4.TXT'. - +""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK 0x00a2: 0x0138, # LATIN SMALL LETTER KRA 0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA @@ -87,7 +87,7 @@ decoding_map = { 0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE 0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON 0x00ff: 0x02d9, # DOT ABOVE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py index d71c15f3901..2bdaa5003c0 100644 --- a/Lib/encodings/iso8859_5.py +++ b/Lib/encodings/iso8859_5.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-5.TXT'. - +""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE 0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE @@ -131,7 +131,7 @@ decoding_map = { 0x00fd: 0x00a7, # SECTION SIGN 0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U 0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE -} +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py index b4d4315ca43..585fa11e5d3 100644 --- a/Lib/encodings/iso8859_6.py +++ b/Lib/encodings/iso8859_6.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-6.TXT'. - +""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,11 +35,38 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x00a1: None, + 0x00a2: None, + 0x00a3: None, + 0x00a5: None, + 0x00a6: None, + 0x00a7: None, + 0x00a8: None, + 0x00a9: None, + 0x00aa: None, + 0x00ab: None, 0x00ac: 0x060c, # ARABIC COMMA + 0x00ae: None, + 0x00af: None, + 0x00b0: None, + 0x00b1: None, + 0x00b2: None, + 0x00b3: None, + 0x00b4: None, + 0x00b5: None, + 0x00b6: None, + 0x00b7: None, + 0x00b8: None, + 0x00b9: None, + 0x00ba: None, 0x00bb: 0x061b, # ARABIC SEMICOLON + 0x00bc: None, + 0x00bd: None, + 0x00be: None, 0x00bf: 0x061f, # ARABIC QUESTION MARK + 0x00c0: None, 0x00c1: 0x0621, # ARABIC LETTER HAMZA 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE @@ -66,6 +93,11 @@ decoding_map = { 0x00d8: 0x0638, # ARABIC LETTER ZAH 0x00d9: 0x0639, # ARABIC LETTER AIN 0x00da: 0x063a, # ARABIC LETTER GHAIN + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, + 0x00df: None, 0x00e0: 0x0640, # ARABIC TATWEEL 0x00e1: 0x0641, # ARABIC LETTER FEH 0x00e2: 0x0642, # ARABIC LETTER QAF @@ -85,7 +117,20 @@ decoding_map = { 0x00f0: 0x0650, # ARABIC KASRA 0x00f1: 0x0651, # ARABIC SHADDA 0x00f2: 0x0652, # ARABIC SUKUN -} + 0x00f3: None, + 0x00f4: None, + 0x00f5: None, + 0x00f6: None, + 0x00f7: None, + 0x00f8: None, + 0x00f9: None, + 0x00fa: None, + 0x00fb: None, + 0x00fc: None, + 0x00fd: None, + 0x00fe: None, + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py index c84761098cb..48f1bd58dbc 100644 --- a/Lib/encodings/iso8859_7.py +++ b/Lib/encodings/iso8859_7.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-7.TXT'. - +""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,10 +35,14 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK 0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK + 0x00a4: None, + 0x00a5: None, + 0x00aa: None, + 0x00ae: None, 0x00af: 0x2015, # HORIZONTAL BAR 0x00b4: 0x0384, # GREEK TONOS 0x00b5: 0x0385, # GREEK DIALYTIKA TONOS @@ -67,6 +71,7 @@ decoding_map = { 0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON 0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI 0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO + 0x00d2: None, 0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA 0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU 0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON @@ -111,7 +116,8 @@ decoding_map = { 0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS 0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS 0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS -} + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py index 72b783b9409..a19aa671cca 100644 --- a/Lib/encodings/iso8859_8.py +++ b/Lib/encodings/iso8859_8.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-8.TXT'. - +""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,11 +35,43 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ + 0x00a1: None, 0x00aa: 0x00d7, # MULTIPLICATION SIGN - 0x00af: 0x203e, # OVERLINE 0x00ba: 0x00f7, # DIVISION SIGN + 0x00bf: None, + 0x00c0: None, + 0x00c1: None, + 0x00c2: None, + 0x00c3: None, + 0x00c4: None, + 0x00c5: None, + 0x00c6: None, + 0x00c7: None, + 0x00c8: None, + 0x00c9: None, + 0x00ca: None, + 0x00cb: None, + 0x00cc: None, + 0x00cd: None, + 0x00ce: None, + 0x00cf: None, + 0x00d0: None, + 0x00d1: None, + 0x00d2: None, + 0x00d3: None, + 0x00d4: None, + 0x00d5: None, + 0x00d6: None, + 0x00d7: None, + 0x00d8: None, + 0x00d9: None, + 0x00da: None, + 0x00db: None, + 0x00dc: None, + 0x00dd: None, + 0x00de: None, 0x00df: 0x2017, # DOUBLE LOW LINE 0x00e0: 0x05d0, # HEBREW LETTER ALEF 0x00e1: 0x05d1, # HEBREW LETTER BET @@ -68,7 +100,12 @@ decoding_map = { 0x00f8: 0x05e8, # HEBREW LETTER RESH 0x00f9: 0x05e9, # HEBREW LETTER SHIN 0x00fa: 0x05ea, # HEBREW LETTER TAV -} + 0x00fb: None, + 0x00fc: None, + 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK + 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK + 0x00ff: None, +}) ### Encoding Map diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py index 3f91d32fb39..a27890507a3 100644 --- a/Lib/encodings/iso8859_9.py +++ b/Lib/encodings/iso8859_9.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from '8859-9.TXT'. - +""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,15 +35,15 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE 0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE 0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA 0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE 0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I 0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA -} +}) ### Encoding Map diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py index 0e1c15b1715..c28004ef6ec 100644 --- a/Lib/encodings/koi8_r.py +++ b/Lib/encodings/koi8_r.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'KOI8-R.TXT'. - +""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL 0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL 0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT @@ -165,7 +165,7 @@ decoding_map = { 0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA 0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE 0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py index 1314836a5f6..45528319b22 100644 --- a/Lib/encodings/mac_cyrillic.py +++ b/Lib/encodings/mac_cyrillic.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'. - +""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU 0x00ff: 0x00a4, # CURRENCY SIGN -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py index 7673b83e279..b7040c4bc4f 100644 --- a/Lib/encodings/mac_greek.py +++ b/Lib/encodings/mac_greek.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'GREEK.TXT'. - +""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00b9, # SUPERSCRIPT ONE 0x0082: 0x00b2, # SUPERSCRIPT TWO @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 0x00ff: None, # UNDEFINED -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py index 62e1f6330d5..f20e1344fba 100644 --- a/Lib/encodings/mac_iceland.py +++ b/Lib/encodings/mac_iceland.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'ICELAND.TXT'. - +""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -159,7 +159,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_latin2.py b/Lib/encodings/mac_latin2.py index 7e64959fffc..0fba502cd6b 100644 --- a/Lib/encodings/mac_latin2.py +++ b/Lib/encodings/mac_latin2.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'LATIN2.TXT'. - +""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON @@ -163,7 +163,7 @@ decoding_map = { 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py index 9147e93cd60..6d048a3b6ea 100644 --- a/Lib/encodings/mac_roman.py +++ b/Lib/encodings/mac_roman.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'ROMAN.TXT'. - +""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py index 76b6bbe3f79..c81a8646695 100644 --- a/Lib/encodings/mac_turkish.py +++ b/Lib/encodings/mac_turkish.py @@ -1,9 +1,9 @@ -""" Python Character Mapping Codec generated from 'TURKISH.TXT'. - +""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -35,8 +35,8 @@ def getregentry(): ### Decoding Map -decoding_map = { - +decoding_map = codecs.make_identity_dict(range(256)) +decoding_map.update({ 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS 0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA @@ -160,7 +160,7 @@ decoding_map = { 0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT 0x00fe: 0x02db, # OGONEK 0x00ff: 0x02c7, # CARON -} +}) ### Encoding Map diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 05aecd87b67..579bab1fe27 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -494,14 +494,15 @@ for encoding in ( 'cp852', 'cp855', 'cp860', 'cp861', 'cp862', 'cp863', 'cp865', 'cp866', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', - 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', - 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', + 'iso8859_2', 'iso8859_4', 'iso8859_5', + 'iso8859_9', 'koi8_r', 'latin_1', 'mac_cyrillic', 'mac_latin2', ### These have undefined mappings: #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', #'cp1256', 'cp1257', 'cp1258', #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', + #'iso8859_3', 'iso8859_6', 'iso8859_7', #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', ### These fail the round-trip: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fe591b52a49..b9e457d6a7b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s, Py_DECREF(w); if (x == NULL) { if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found: default to Latin-1 mapping */ + /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - *p++ = (Py_UNICODE)ch; - continue; - } + x = Py_None; + Py_INCREF(x); + } else goto onError; } @@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, Py_DECREF(w); if (x == NULL) { if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found: default to Latin-1 mapping if possible */ + /* No mapping found means: mapping is undefined. */ PyErr_Clear(); - if (ch < 256) { - *s++ = (char)ch; - continue; - } - else if (!charmap_encoding_error(&p, &s, errors, - "missing character mapping")) - continue; - } + x = Py_None; + Py_INCREF(x); + } else goto onError; } diff --git a/Tools/scripts/gencodec.py b/Tools/scripts/gencodec.py index 45b69b0abd1..39b42ffc05f 100644 --- a/Tools/scripts/gencodec.py +++ b/Tools/scripts/gencodec.py @@ -1,9 +1,9 @@ """ Unicode Mapping Parser and Codec Generator. This script parses Unicode mapping files as available from the Unicode -site (ftp.unicode.org) and creates Python codec modules from them. The -codecs use the standard character mapping codec to actually apply the -mapping. +site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec +modules from them. The codecs use the standard character mapping codec +to actually apply the mapping. Synopsis: gencodec.py dir codec_prefix @@ -18,6 +18,7 @@ same location (with .mapping extension). Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright Guido van Rossum, 2000. """#" @@ -70,6 +71,10 @@ def readmap(filename, lines = f.readlines() f.close() enc2uni = {} + identity = [] + unmapped = range(256) + for i in range(256): + unmapped[i] = i for line in lines: line = strip(line) if not line or line[0] == '#': @@ -85,8 +90,22 @@ def readmap(filename, comment = '' else: comment = comment[1:] - if enc != uni: + if enc < 256: + unmapped.remove(enc) + if enc == uni: + identity.append(enc) + else: + enc2uni[enc] = (uni,comment) + else: enc2uni[enc] = (uni,comment) + # If there are more identity-mapped entries than unmapped entries, + # it pays to generate an identity dictionary first, add add explicit + # mappings to None for the rest + if len(identity)>=len(unmapped): + for enc in unmapped: + enc2uni[enc] = (None, "") + enc2uni['IDENTITY'] = 256 + return enc2uni def hexrepr(t, @@ -143,11 +162,12 @@ def codegen(name,map,comments=1): """ l = [ '''\ -""" Python Character Mapping Codec generated from '%s'. +""" Python Character Mapping Codec generated from '%s' with gencodec.py. Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright 2000 Guido van Rossum. """#" @@ -178,15 +198,23 @@ def getregentry(): return (Codec().encode,Codec().decode,StreamReader,StreamWriter) ### Decoding Map - -decoding_map = { ''' % name, ] + + if map.has_key("IDENTITY"): + l.append("decoding_map = codecs.make_identity_dict(range(%d))" + % map["IDENTITY"]) + l.append("decoding_map.update({") + splits = 1 + del map["IDENTITY"] + else: + l.append("decoding_map = {") + splits = 0 + mappings = map.items() mappings.sort() append = l.append i = 0 - splits = 0 for e,value in mappings: try: (u,c) = value @@ -198,7 +226,7 @@ decoding_map = { append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c)) else: append('\t%s: %s,' % (key,unicoderepr(u))) - i = i + 1 + i += 1 if i == 4096: # Split the definition into parts to that the Python # parser doesn't dump core @@ -206,7 +234,7 @@ decoding_map = { append('}') else: append('})') - append('map.update({') + append('decoding_map.update({') i = 0 splits = splits + 1 if splits == 0: @@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1): mapnames = os.listdir(dir) for mapname in mapnames: - if mapname[-len('.mapping'):] != '.mapping': + if not mapname.endswith('.mapping'): continue codefile = mapname[:-len('.mapping')] + '.py' print 'converting %s to %s' % (mapname,