Issue #19668: Added support for the cp1125 encoding.
This commit is contained in:
parent
e50240c504
commit
be0c3250b1
|
@ -1027,6 +1027,10 @@ particular, the following variants typically exist:
|
|||
+-----------------+--------------------------------+--------------------------------+
|
||||
| cp1026 | ibm1026 | Turkish |
|
||||
+-----------------+--------------------------------+--------------------------------+
|
||||
| cp1125 | 1125, ibm1125, cp866u, ruscii | Ukrainian |
|
||||
| | | |
|
||||
| | | .. versionadded:: 3.4 |
|
||||
+-----------------+--------------------------------+--------------------------------+
|
||||
| cp1140 | ibm1140 | Western Europe |
|
||||
+-----------------+--------------------------------+--------------------------------+
|
||||
| cp1250 | windows-1250 | Central and Eastern Europe |
|
||||
|
|
|
@ -63,6 +63,12 @@ aliases = {
|
|||
'csibm1026' : 'cp1026',
|
||||
'ibm1026' : 'cp1026',
|
||||
|
||||
# cp1125 codec
|
||||
'1125' : 'cp1125',
|
||||
'ibm1125' : 'cp1125',
|
||||
'cp866u' : 'cp1125',
|
||||
'ruscii' : 'cp1125',
|
||||
|
||||
# cp1140 codec
|
||||
'1140' : 'cp1140',
|
||||
'ibm1140' : 'cp1140',
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py.
|
||||
""" Python Character Mapping Codec for CP1125
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -32,7 +32,7 @@ class StreamReader(Codec,codecs.StreamReader):
|
|||
|
||||
def getregentry():
|
||||
return codecs.CodecInfo(
|
||||
name='cp866',
|
||||
name='cp1125',
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
|
@ -159,14 +159,14 @@ decoding_map.update({
|
|||
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00f2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0x00f3: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0x00f4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00f5: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00f6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00f7: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00f8: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00f9: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x2116, # NUMERO SIGN
|
||||
|
@ -420,14 +420,14 @@ decoding_table = (
|
|||
'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
|
||||
'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
|
||||
'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
|
||||
'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI
|
||||
'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI
|
||||
'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
|
||||
'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U
|
||||
'\xb0' # 0x00f8 -> DEGREE SIGN
|
||||
'\u2219' # 0x00f9 -> BULLET OPERATOR
|
||||
'\u0490' # 0x00f2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
'\u0491' # 0x00f3 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
'\u0404' # 0x00f4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
'\u0454' # 0x00f5 -> CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
'\u0406' # 0x00f6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
'\u0456' # 0x00f7 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
'\u0407' # 0x00f8 -> CYRILLIC CAPITAL LETTER YI
|
||||
'\u0457' # 0x00f9 -> CYRILLIC SMALL LETTER YI
|
||||
'\xb7' # 0x00fa -> MIDDLE DOT
|
||||
'\u221a' # 0x00fb -> SQUARE ROOT
|
||||
'\u2116' # 0x00fc -> NUMERO SIGN
|
||||
|
@ -569,12 +569,11 @@ encoding_map = {
|
|||
0x007f: 0x007f, # DELETE
|
||||
0x00a0: 0x00ff, # NO-BREAK SPACE
|
||||
0x00a4: 0x00fd, # CURRENCY SIGN
|
||||
0x00b0: 0x00f8, # DEGREE SIGN
|
||||
0x00b7: 0x00fa, # MIDDLE DOT
|
||||
0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI
|
||||
0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x0404: 0x00f4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0406: 0x00f6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0x00f8, # CYRILLIC CAPITAL LETTER YI
|
||||
0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
|
||||
|
@ -640,11 +639,12 @@ encoding_map = {
|
|||
0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
|
||||
0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
|
||||
0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI
|
||||
0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x0454: 0x00f5, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0456: 0x00f7, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0x00f9, # CYRILLIC SMALL LETTER YI
|
||||
0x0490: 0x00f2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0x0491: 0x00f3, # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0x2116: 0x00fc, # NUMERO SIGN
|
||||
0x2219: 0x00f9, # BULLET OPERATOR
|
||||
0x221a: 0x00fb, # SQUARE ROOT
|
||||
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
|
||||
|
|
|
@ -1602,6 +1602,7 @@ all_unicode_encodings = [
|
|||
"cp037",
|
||||
"cp1006",
|
||||
"cp1026",
|
||||
"cp1125",
|
||||
"cp1140",
|
||||
"cp1250",
|
||||
"cp1251",
|
||||
|
|
|
@ -1834,7 +1834,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
'cp037', 'cp1026', 'cp273',
|
||||
'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
|
||||
'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866',
|
||||
'cp863', 'cp865', 'cp866', 'cp1125',
|
||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||
'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
|
||||
'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
|
||||
|
@ -1862,7 +1862,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
'cp037', 'cp1026', 'cp273',
|
||||
'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
|
||||
'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866',
|
||||
'cp863', 'cp865', 'cp866', 'cp1125',
|
||||
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
|
||||
'iso8859_2', 'iso8859_4', 'iso8859_5',
|
||||
'iso8859_9', 'koi8_r', 'latin_1',
|
||||
|
|
|
@ -692,9 +692,9 @@ class ElementTreeTest(unittest.TestCase):
|
|||
'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
|
||||
'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
|
||||
'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
|
||||
'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1250',
|
||||
'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256',
|
||||
'cp1257', 'cp1258',
|
||||
'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
|
||||
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
|
||||
'cp1256', 'cp1257', 'cp1258',
|
||||
'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
|
||||
'mac-roman', 'mac-turkish',
|
||||
'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
|
||||
|
|
Loading…
Reference in New Issue