From be0c3250b1bbc16eea41dfe0f93f33ad384e9bd4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 23 Nov 2013 18:52:23 +0200 Subject: [PATCH] Issue #19668: Added support for the cp1125 encoding. --- Doc/library/codecs.rst | 4 +++ Lib/encodings/aliases.py | 6 +++++ Lib/encodings/cp866.py | 52 +++++++++++++++++++------------------- Lib/test/test_codecs.py | 1 + Lib/test/test_unicode.py | 4 +-- Lib/test/test_xml_etree.py | 6 ++--- Misc/NEWS | 2 ++ 7 files changed, 44 insertions(+), 31 deletions(-) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index ef799189805..843de27ae35 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1027,6 +1027,10 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | cp1026 | ibm1026 | Turkish | +-----------------+--------------------------------+--------------------------------+ +| cp1125 | 1125, ibm1125, cp866u, ruscii | Ukrainian | +| | | | +| | | .. versionadded:: 3.4 | ++-----------------+--------------------------------+--------------------------------+ | cp1140 | ibm1140 | Western Europe | +-----------------+--------------------------------+--------------------------------+ | cp1250 | windows-1250 | Central and Eastern Europe | diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index 331095b1f16..5461aa053a4 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -63,6 +63,12 @@ aliases = { 'csibm1026' : 'cp1026', 'ibm1026' : 'cp1026', + # cp1125 codec + '1125' : 'cp1125', + 'ibm1125' : 'cp1125', + 'cp866u' : 'cp1125', + 'ruscii' : 'cp1125', + # cp1140 codec '1140' : 'cp1140', 'ibm1140' : 'cp1140', diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py index bec7ae39fba..b1fd69deb07 100644 --- a/Lib/encodings/cp866.py +++ b/Lib/encodings/cp866.py @@ -1,4 +1,4 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. +""" Python Character Mapping Codec for CP1125 """#" @@ -32,7 +32,7 @@ class StreamReader(Codec,codecs.StreamReader): def getregentry(): return codecs.CodecInfo( - name='cp866', + name='cp1125', encode=Codec().encode, decode=Codec().decode, incrementalencoder=IncrementalEncoder, @@ -159,14 +159,14 @@ decoding_map.update({ 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR + 0x00f2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x00f3: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN + 0x00f4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x00f5: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x00f6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00f7: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x00f8: 0x0407, # CYRILLIC CAPITAL LETTER YI + 0x00f9: 0x0457, # CYRILLIC SMALL LETTER YI 0x00fa: 0x00b7, # MIDDLE DOT 0x00fb: 0x221a, # SQUARE ROOT 0x00fc: 0x2116, # NUMERO SIGN @@ -420,14 +420,14 @@ decoding_table = ( '\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA '\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO '\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO - '\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - '\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE - '\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI - '\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI - '\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U - '\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U - '\xb0' # 0x00f8 -> DEGREE SIGN - '\u2219' # 0x00f9 -> BULLET OPERATOR + '\u0490' # 0x00f2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN + '\u0491' # 0x00f3 -> CYRILLIC SMALL LETTER GHE WITH UPTURN + '\u0404' # 0x00f4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE + '\u0454' # 0x00f5 -> CYRILLIC SMALL LETTER UKRAINIAN IE + '\u0406' # 0x00f6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + '\u0456' # 0x00f7 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + '\u0407' # 0x00f8 -> CYRILLIC CAPITAL LETTER YI + '\u0457' # 0x00f9 -> CYRILLIC SMALL LETTER YI '\xb7' # 0x00fa -> MIDDLE DOT '\u221a' # 0x00fb -> SQUARE ROOT '\u2116' # 0x00fc -> NUMERO SIGN @@ -569,12 +569,11 @@ encoding_map = { 0x007f: 0x007f, # DELETE 0x00a0: 0x00ff, # NO-BREAK SPACE 0x00a4: 0x00fd, # CURRENCY SIGN - 0x00b0: 0x00f8, # DEGREE SIGN 0x00b7: 0x00fa, # MIDDLE DOT 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI - 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U + 0x0404: 0x00f4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0406: 0x00f6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407: 0x00f8, # CYRILLIC CAPITAL LETTER YI 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE @@ -640,11 +639,12 @@ encoding_map = { 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO - 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI - 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U + 0x0454: 0x00f5, # CYRILLIC SMALL LETTER UKRAINIAN IE + 0x0456: 0x00f7, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0457: 0x00f9, # CYRILLIC SMALL LETTER YI + 0x0490: 0x00f2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0491: 0x00f3, # CYRILLIC SMALL LETTER GHE WITH UPTURN 0x2116: 0x00fc, # NUMERO SIGN - 0x2219: 0x00f9, # BULLET OPERATOR 0x221a: 0x00fb, # SQUARE ROOT 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 07a6a5e0be2..639ce9fd852 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1602,6 +1602,7 @@ all_unicode_encodings = [ "cp037", "cp1006", "cp1026", + "cp1125", "cp1140", "cp1250", "cp1251", diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 727897e53e6..07f76a8ae0d 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1834,7 +1834,7 @@ class UnicodeTest(string_tests.CommonTest, 'cp037', 'cp1026', 'cp273', 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862', - 'cp863', 'cp865', 'cp866', + 'cp863', 'cp865', 'cp866', 'cp1125', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', @@ -1862,7 +1862,7 @@ class UnicodeTest(string_tests.CommonTest, 'cp037', 'cp1026', 'cp273', 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862', - 'cp863', 'cp865', 'cp866', + 'cp863', 'cp865', 'cp866', 'cp1125', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_2', 'iso8859_4', 'iso8859_5', 'iso8859_9', 'koi8_r', 'latin_1', diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 614e598f6cb..5351b0b82eb 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -692,9 +692,9 @@ class ElementTreeTest(unittest.TestCase): 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', - 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1250', - 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', - 'cp1257', 'cp1258', + 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', + 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', + 'cp1256', 'cp1257', 'cp1258', 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', diff --git a/Misc/NEWS b/Misc/NEWS index 2b804e8cabe..668162f3723 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -68,6 +68,8 @@ Core and Builtins Library ------- +- Issue #19668: Added support for the cp1125 encoding. + - Issue #19689: Add ssl.create_default_context() factory function. It creates a new SSLContext object with secure default settings.