Marc-Andre Lemburg: Unicode encodings.

This commit is contained in:
Guido van Rossum 2000-03-10 23:17:24 +00:00
parent 21288edad0
commit 0229bf6001
64 changed files with 8398 additions and 0 deletions

78
Lib/encodings/__init__.py Normal file
View File

@ -0,0 +1,78 @@
""" Standard "encodings" Package
Standard Python encoding modules are stored in this package
directory.
Codec modules must have names corresponding to standard lower-case
encoding names. Hyphens are automatically converted to
underscores, e.g. 'utf-8' is looked up as module utf_8.
Each codec module must export the following interface:
* getregentry() -> (encoder, decoder, stream_reader, stream_writer)
The getregentry() API must return callable objects which adhere to
the Python Codec Interface Standard.
In addition, a module may optionally also define the following
APIs which are then used by the package's codec search function:
* getaliases() -> sequence of encoding name strings to use as aliases
Alias names returned by getaliases() must be lower-case.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import string,codecs,aliases
_cache = {}
_unkown = '--unkown--'
def search_function(encoding):
# Cache lookup
entry = _cache.get(encoding,_unkown)
if entry is not _unkown:
return entry
# Import the module
modname = string.replace(encoding,'-','_')
modname = aliases.aliases.get(modname,modname)
try:
mod = __import__(modname,globals(),locals(),'*')
except ImportError,why:
_cache[encoding] = None
return None
# Now ask the module for the registry entry
try:
entry = tuple(mod.getregentry())
except AttributeError:
entry = ()
if len(entry) != 4:
raise SystemError,\
'module "%s.%s" failed to register' % \
(__name__,modname)
for obj in entry:
if not callable(obj):
raise SystemError,\
'incompatible codecs in module "%s.%s"' % \
(__name__,modname)
# Cache the encoding and its aliases
_cache[encoding] = entry
try:
codecaliases = mod.getaliases()
except AttributeError:
pass
else:
for alias in codecaliases:
_cache[alias] = entry
return entry
# Register the search_function in the Python codec registry
codecs.register(search_function)

57
Lib/encodings/aliases.py Normal file
View File

@ -0,0 +1,57 @@
""" Encoding Aliases Support
This module is used by the encodings package search function to
map encodings names to module names.
Note that the search function converts the encoding names to lower
case and replaces hyphens with underscores *before* performing the
lookup.
"""
aliases = {
# Latin-1
'latin': 'latin_1',
'latin1': 'latin_1',
# UTF-8
'utf': 'utf_8',
'utf8': 'utf_8',
'u8': 'utf_8',
# UTF-16
'utf16': 'utf_16',
'u16': 'utf_16',
'utf_16be': 'utf_16_be',
'utf_16le': 'utf_16_le',
'UnicodeBigUnmarked': 'utf_16_be',
'UnicodeLittleUnmarked': 'utf_16_le',
# ASCII
'us_ascii': 'ascii',
# ISO
'iso8859_1': 'latin_1',
'iso_8859_1': 'latin_1',
'iso_8859_10': 'iso8859_10',
'iso_8859_13': 'iso8859_13',
'iso_8859_14': 'iso8859_14',
'iso_8859_15': 'iso8859_15',
'iso_8859_2': 'iso8859_2',
'iso_8859_3': 'iso8859_3',
'iso_8859_4': 'iso8859_4',
'iso_8859_5': 'iso8859_5',
'iso_8859_6': 'iso8859_6',
'iso_8859_7': 'iso8859_7',
'iso_8859_8': 'iso8859_8',
'iso_8859_9': 'iso8859_9',
# Mac
'MacCentralEurope': 'mac_latin2',
'MacCyrillic': 'mac_cyrillic',
'MacGreek': 'mac_greek',
'MacIceland': 'mac_iceland',
'MacRoman': 'mac_roman',
'MacTurkish': 'mac_turkish',
}

35
Lib/encodings/ascii.py Normal file
View File

@ -0,0 +1,35 @@
""" Python 'ascii' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.ascii_encode
decode = codecs.ascii_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
class StreamConverter(StreamWriter,StreamReader):
encode = codecs.ascii_decode
decode = codecs.ascii_encode
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

51
Lib/encodings/charmap.py Normal file
View File

@ -0,0 +1,51 @@
""" Generic Python Character Mapping Codec.
Use this codec directly rather than through the automatic
conversion mechanisms supplied by unicode() and .encode().
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.charmap_encode
decode = codecs.charmap_decode
class StreamWriter(Codec,codecs.StreamWriter):
def __init__(self,stream,errors='strict',mapping=None):
codecs.StreamWriter.__init__(self,strict,errors)
self.mapping = mapping
def encode(self,input,errors='strict'):
return Codec.encode(input,errors,self.mapping)
class StreamReader(Codec,codecs.StreamReader):
def __init__(self,stream,errors='strict',mapping=None):
codecs.StreamReader.__init__(self,strict,errors)
self.mapping = mapping
def decode(self,input,errors='strict'):
return Codec.decode(input,errors,self.mapping)
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

282
Lib/encodings/cp037.py Normal file
View File

@ -0,0 +1,282 @@
""" Python Character Mapping Codec generated from 'CP037.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x0007: 0x007f, # DELETE
0x0008: 0x0097, # CONTROL
0x0009: 0x008d, # CONTROL
0x000a: 0x008e, # CONTROL
0x0014: 0x009d, # CONTROL
0x0015: 0x0085, # CONTROL
0x0016: 0x0008, # BACKSPACE
0x0017: 0x0087, # CONTROL
0x001a: 0x0092, # CONTROL
0x001b: 0x008f, # CONTROL
0x0020: 0x0080, # CONTROL
0x0021: 0x0081, # CONTROL
0x0022: 0x0082, # CONTROL
0x0023: 0x0083, # CONTROL
0x0024: 0x0084, # CONTROL
0x0025: 0x000a, # LINE FEED
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
0x0027: 0x001b, # ESCAPE
0x0028: 0x0088, # CONTROL
0x0029: 0x0089, # CONTROL
0x002a: 0x008a, # CONTROL
0x002b: 0x008b, # CONTROL
0x002c: 0x008c, # CONTROL
0x002d: 0x0005, # ENQUIRY
0x002e: 0x0006, # ACKNOWLEDGE
0x002f: 0x0007, # BELL
0x0030: 0x0090, # CONTROL
0x0031: 0x0091, # CONTROL
0x0032: 0x0016, # SYNCHRONOUS IDLE
0x0033: 0x0093, # CONTROL
0x0034: 0x0094, # CONTROL
0x0035: 0x0095, # CONTROL
0x0036: 0x0096, # CONTROL
0x0037: 0x0004, # END OF TRANSMISSION
0x0038: 0x0098, # CONTROL
0x0039: 0x0099, # CONTROL
0x003a: 0x009a, # CONTROL
0x003b: 0x009b, # CONTROL
0x003c: 0x0014, # DEVICE CONTROL FOUR
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
0x003e: 0x009e, # CONTROL
0x003f: 0x001a, # SUBSTITUTE
0x0040: 0x0020, # SPACE
0x0041: 0x00a0, # NO-BREAK SPACE
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x004a: 0x00a2, # CENT SIGN
0x004b: 0x002e, # FULL STOP
0x004c: 0x003c, # LESS-THAN SIGN
0x004d: 0x0028, # LEFT PARENTHESIS
0x004e: 0x002b, # PLUS SIGN
0x004f: 0x007c, # VERTICAL LINE
0x0050: 0x0026, # AMPERSAND
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x005a: 0x0021, # EXCLAMATION MARK
0x005b: 0x0024, # DOLLAR SIGN
0x005c: 0x002a, # ASTERISK
0x005d: 0x0029, # RIGHT PARENTHESIS
0x005e: 0x003b, # SEMICOLON
0x005f: 0x00ac, # NOT SIGN
0x0060: 0x002d, # HYPHEN-MINUS
0x0061: 0x002f, # SOLIDUS
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x006a: 0x00a6, # BROKEN BAR
0x006b: 0x002c, # COMMA
0x006c: 0x0025, # PERCENT SIGN
0x006d: 0x005f, # LOW LINE
0x006e: 0x003e, # GREATER-THAN SIGN
0x006f: 0x003f, # QUESTION MARK
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x0079: 0x0060, # GRAVE ACCENT
0x007a: 0x003a, # COLON
0x007b: 0x0023, # NUMBER SIGN
0x007c: 0x0040, # COMMERCIAL AT
0x007d: 0x0027, # APOSTROPHE
0x007e: 0x003d, # EQUALS SIGN
0x007f: 0x0022, # QUOTATION MARK
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x0081: 0x0061, # LATIN SMALL LETTER A
0x0082: 0x0062, # LATIN SMALL LETTER B
0x0083: 0x0063, # LATIN SMALL LETTER C
0x0084: 0x0064, # LATIN SMALL LETTER D
0x0085: 0x0065, # LATIN SMALL LETTER E
0x0086: 0x0066, # LATIN SMALL LETTER F
0x0087: 0x0067, # LATIN SMALL LETTER G
0x0088: 0x0068, # LATIN SMALL LETTER H
0x0089: 0x0069, # LATIN SMALL LETTER I
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC)
0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC)
0x008f: 0x00b1, # PLUS-MINUS SIGN
0x0090: 0x00b0, # DEGREE SIGN
0x0091: 0x006a, # LATIN SMALL LETTER J
0x0092: 0x006b, # LATIN SMALL LETTER K
0x0093: 0x006c, # LATIN SMALL LETTER L
0x0094: 0x006d, # LATIN SMALL LETTER M
0x0095: 0x006e, # LATIN SMALL LETTER N
0x0096: 0x006f, # LATIN SMALL LETTER O
0x0097: 0x0070, # LATIN SMALL LETTER P
0x0098: 0x0071, # LATIN SMALL LETTER Q
0x0099: 0x0072, # LATIN SMALL LETTER R
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
0x009d: 0x00b8, # CEDILLA
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x00b5, # MICRO SIGN
0x00a1: 0x007e, # TILDE
0x00a2: 0x0073, # LATIN SMALL LETTER S
0x00a3: 0x0074, # LATIN SMALL LETTER T
0x00a4: 0x0075, # LATIN SMALL LETTER U
0x00a5: 0x0076, # LATIN SMALL LETTER V
0x00a6: 0x0077, # LATIN SMALL LETTER W
0x00a7: 0x0078, # LATIN SMALL LETTER X
0x00a8: 0x0079, # LATIN SMALL LETTER Y
0x00a9: 0x007a, # LATIN SMALL LETTER Z
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ab: 0x00bf, # INVERTED QUESTION MARK
0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC)
0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC)
0x00af: 0x00ae, # REGISTERED SIGN
0x00b0: 0x005e, # CIRCUMFLEX ACCENT
0x00b1: 0x00a3, # POUND SIGN
0x00b2: 0x00a5, # YEN SIGN
0x00b3: 0x00b7, # MIDDLE DOT
0x00b4: 0x00a9, # COPYRIGHT SIGN
0x00b5: 0x00a7, # SECTION SIGN
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ba: 0x005b, # LEFT SQUARE BRACKET
0x00bb: 0x005d, # RIGHT SQUARE BRACKET
0x00bc: 0x00af, # MACRON
0x00bd: 0x00a8, # DIAERESIS
0x00be: 0x00b4, # ACUTE ACCENT
0x00bf: 0x00d7, # MULTIPLICATION SIGN
0x00c0: 0x007b, # LEFT CURLY BRACKET
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
0x00ca: 0x00ad, # SOFT HYPHEN
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00d0: 0x007d, # RIGHT CURLY BRACKET
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
0x00da: 0x00b9, # SUPERSCRIPT ONE
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00e0: 0x005c, # REVERSE SOLIDUS
0x00e1: 0x00f7, # DIVISION SIGN
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
0x00ea: 0x00b2, # SUPERSCRIPT TWO
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00f0: 0x0030, # DIGIT ZERO
0x00f1: 0x0031, # DIGIT ONE
0x00f2: 0x0032, # DIGIT TWO
0x00f3: 0x0033, # DIGIT THREE
0x00f4: 0x0034, # DIGIT FOUR
0x00f5: 0x0035, # DIGIT FIVE
0x00f6: 0x0036, # DIGIT SIX
0x00f7: 0x0037, # DIGIT SEVEN
0x00f8: 0x0038, # DIGIT EIGHT
0x00f9: 0x0039, # DIGIT NINE
0x00fa: 0x00b3, # SUPERSCRIPT THREE
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

140
Lib/encodings/cp1006.py Normal file
View File

@ -0,0 +1,140 @@
""" Python Character Mapping Codec generated from 'CP1006.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE
0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR
0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE
0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX
0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN
0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT
0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE
0x00ab: 0x060c, # ARABIC COMMA
0x00ac: 0x061b, # ARABIC SEMICOLON
0x00ae: 0x061f, # ARABIC QUESTION MARK
0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM
0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM
0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM
0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM
0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM
0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM
0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN
0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM
0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM
0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM
0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM
0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM
0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM
0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM
0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM
0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM
0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM
0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM
0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM
0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

282
Lib/encodings/cp1026.py Normal file
View File

@ -0,0 +1,282 @@
""" Python Character Mapping Codec generated from 'CP1026.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x0007: 0x007f, # DELETE
0x0008: 0x0097, # CONTROL
0x0009: 0x008d, # CONTROL
0x000a: 0x008e, # CONTROL
0x0014: 0x009d, # CONTROL
0x0015: 0x0085, # CONTROL
0x0016: 0x0008, # BACKSPACE
0x0017: 0x0087, # CONTROL
0x001a: 0x0092, # CONTROL
0x001b: 0x008f, # CONTROL
0x0020: 0x0080, # CONTROL
0x0021: 0x0081, # CONTROL
0x0022: 0x0082, # CONTROL
0x0023: 0x0083, # CONTROL
0x0024: 0x0084, # CONTROL
0x0025: 0x000a, # LINE FEED
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
0x0027: 0x001b, # ESCAPE
0x0028: 0x0088, # CONTROL
0x0029: 0x0089, # CONTROL
0x002a: 0x008a, # CONTROL
0x002b: 0x008b, # CONTROL
0x002c: 0x008c, # CONTROL
0x002d: 0x0005, # ENQUIRY
0x002e: 0x0006, # ACKNOWLEDGE
0x002f: 0x0007, # BELL
0x0030: 0x0090, # CONTROL
0x0031: 0x0091, # CONTROL
0x0032: 0x0016, # SYNCHRONOUS IDLE
0x0033: 0x0093, # CONTROL
0x0034: 0x0094, # CONTROL
0x0035: 0x0095, # CONTROL
0x0036: 0x0096, # CONTROL
0x0037: 0x0004, # END OF TRANSMISSION
0x0038: 0x0098, # CONTROL
0x0039: 0x0099, # CONTROL
0x003a: 0x009a, # CONTROL
0x003b: 0x009b, # CONTROL
0x003c: 0x0014, # DEVICE CONTROL FOUR
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
0x003e: 0x009e, # CONTROL
0x003f: 0x001a, # SUBSTITUTE
0x0040: 0x0020, # SPACE
0x0041: 0x00a0, # NO-BREAK SPACE
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0048: 0x007b, # LEFT CURLY BRACKET
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x004a: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x004b: 0x002e, # FULL STOP
0x004c: 0x003c, # LESS-THAN SIGN
0x004d: 0x0028, # LEFT PARENTHESIS
0x004e: 0x002b, # PLUS SIGN
0x004f: 0x0021, # EXCLAMATION MARK
0x0050: 0x0026, # AMPERSAND
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x005a: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x005b: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x005c: 0x002a, # ASTERISK
0x005d: 0x0029, # RIGHT PARENTHESIS
0x005e: 0x003b, # SEMICOLON
0x005f: 0x005e, # CIRCUMFLEX ACCENT
0x0060: 0x002d, # HYPHEN-MINUS
0x0061: 0x002f, # SOLIDUS
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0068: 0x005b, # LEFT SQUARE BRACKET
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x006a: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x006b: 0x002c, # COMMA
0x006c: 0x0025, # PERCENT SIGN
0x006d: 0x005f, # LOW LINE
0x006e: 0x003e, # GREATER-THAN SIGN
0x006f: 0x003f, # QUESTION MARK
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x0079: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x007a: 0x003a, # COLON
0x007b: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x007c: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x007d: 0x0027, # APOSTROPHE
0x007e: 0x003d, # EQUALS SIGN
0x007f: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x0081: 0x0061, # LATIN SMALL LETTER A
0x0082: 0x0062, # LATIN SMALL LETTER B
0x0083: 0x0063, # LATIN SMALL LETTER C
0x0084: 0x0064, # LATIN SMALL LETTER D
0x0085: 0x0065, # LATIN SMALL LETTER E
0x0086: 0x0066, # LATIN SMALL LETTER F
0x0087: 0x0067, # LATIN SMALL LETTER G
0x0088: 0x0068, # LATIN SMALL LETTER H
0x0089: 0x0069, # LATIN SMALL LETTER I
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008c: 0x007d, # RIGHT CURLY BRACKET
0x008d: 0x0060, # GRAVE ACCENT
0x008e: 0x00a6, # BROKEN BAR
0x008f: 0x00b1, # PLUS-MINUS SIGN
0x0090: 0x00b0, # DEGREE SIGN
0x0091: 0x006a, # LATIN SMALL LETTER J
0x0092: 0x006b, # LATIN SMALL LETTER K
0x0093: 0x006c, # LATIN SMALL LETTER L
0x0094: 0x006d, # LATIN SMALL LETTER M
0x0095: 0x006e, # LATIN SMALL LETTER N
0x0096: 0x006f, # LATIN SMALL LETTER O
0x0097: 0x0070, # LATIN SMALL LETTER P
0x0098: 0x0071, # LATIN SMALL LETTER Q
0x0099: 0x0072, # LATIN SMALL LETTER R
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
0x009d: 0x00b8, # CEDILLA
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x00b5, # MICRO SIGN
0x00a1: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x00a2: 0x0073, # LATIN SMALL LETTER S
0x00a3: 0x0074, # LATIN SMALL LETTER T
0x00a4: 0x0075, # LATIN SMALL LETTER U
0x00a5: 0x0076, # LATIN SMALL LETTER V
0x00a6: 0x0077, # LATIN SMALL LETTER W
0x00a7: 0x0078, # LATIN SMALL LETTER X
0x00a8: 0x0079, # LATIN SMALL LETTER Y
0x00a9: 0x007a, # LATIN SMALL LETTER Z
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ab: 0x00bf, # INVERTED QUESTION MARK
0x00ac: 0x005d, # RIGHT SQUARE BRACKET
0x00ad: 0x0024, # DOLLAR SIGN
0x00ae: 0x0040, # COMMERCIAL AT
0x00af: 0x00ae, # REGISTERED SIGN
0x00b0: 0x00a2, # CENT SIGN
0x00b1: 0x00a3, # POUND SIGN
0x00b2: 0x00a5, # YEN SIGN
0x00b3: 0x00b7, # MIDDLE DOT
0x00b4: 0x00a9, # COPYRIGHT SIGN
0x00b5: 0x00a7, # SECTION SIGN
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ba: 0x00ac, # NOT SIGN
0x00bb: 0x007c, # VERTICAL LINE
0x00bc: 0x00af, # MACRON
0x00bd: 0x00a8, # DIAERESIS
0x00be: 0x00b4, # ACUTE ACCENT
0x00bf: 0x00d7, # MULTIPLICATION SIGN
0x00c0: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
0x00ca: 0x00ad, # SOFT HYPHEN
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00cc: 0x007e, # TILDE
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00d0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
0x00da: 0x00b9, # SUPERSCRIPT ONE
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00dc: 0x005c, # REVERSE SOLIDUS
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00e0: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00e1: 0x00f7, # DIVISION SIGN
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
0x00ea: 0x00b2, # SUPERSCRIPT TWO
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00ec: 0x0023, # NUMBER SIGN
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00f0: 0x0030, # DIGIT ZERO
0x00f1: 0x0031, # DIGIT ONE
0x00f2: 0x0032, # DIGIT TWO
0x00f3: 0x0033, # DIGIT THREE
0x00f4: 0x0034, # DIGIT FOUR
0x00f5: 0x0035, # DIGIT FIVE
0x00f6: 0x0036, # DIGIT SIX
0x00f7: 0x0037, # DIGIT SEVEN
0x00f8: 0x0038, # DIGIT EIGHT
0x00f9: 0x0039, # DIGIT NINE
0x00fa: 0x00b3, # SUPERSCRIPT THREE
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0022, # QUOTATION MARK
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

125
Lib/encodings/cp1250.py Normal file
View File

@ -0,0 +1,125 @@
""" Python Character Mapping Codec generated from 'CP1250.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: None, # UNDEFINED
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: None, # UNDEFINED
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: None, # UNDEFINED
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00a1: 0x02c7, # CARON
0x00a2: 0x02d8, # BREVE
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00b2: 0x02db, # OGONEK
0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON
0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON
0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

159
Lib/encodings/cp1251.py Normal file
View File

@ -0,0 +1,159 @@
""" Python Character Mapping Codec generated from 'CP1251.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x20ac, # EURO SIGN
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE
0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE
0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: None, # UNDEFINED
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE
0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE
0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE
0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE
0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE
0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN
0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO
0x00b9: 0x2116, # NUMERO SIGN
0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE
0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE
0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE
0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI
0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A
0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I
0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O
0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U
0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E
0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00e0: 0x0430, # CYRILLIC SMALL LETTER A
0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE
0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE
0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00e8: 0x0438, # CYRILLIC SMALL LETTER I
0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA
0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL
0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM
0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN
0x00ee: 0x043e, # CYRILLIC SMALL LETTER O
0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE
0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER
0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES
0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE
0x00f3: 0x0443, # CYRILLIC SMALL LETTER U
0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF
0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

78
Lib/encodings/cp1252.py Normal file
View File

@ -0,0 +1,78 @@
""" Python Character Mapping Codec generated from 'CP1252.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
0x008d: None, # UNDEFINED
0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x02dc, # SMALL TILDE
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
0x009d: None, # UNDEFINED
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

153
Lib/encodings/cp1253.py Normal file
View File

@ -0,0 +1,153 @@
""" Python Character Mapping Codec generated from 'CP1253.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: None, # UNDEFINED
0x0089: 0x2030, # PER MILLE SIGN
0x008a: None, # UNDEFINED
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: None, # UNDEFINED
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: None, # UNDEFINED
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: None, # UNDEFINED
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: None, # UNDEFINED
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: None, # UNDEFINED
0x00a1: 0x0385, # GREEK DIALYTIKA TONOS
0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x00aa: None, # UNDEFINED
0x00af: 0x2015, # HORIZONTAL BAR
0x00b4: 0x0384, # GREEK TONOS
0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA
0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA
0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA
0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA
0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA
0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA
0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x00cc: 0x039c, # GREEK CAPITAL LETTER MU
0x00cd: 0x039d, # GREEK CAPITAL LETTER NU
0x00ce: 0x039e, # GREEK CAPITAL LETTER XI
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00d2: None, # UNDEFINED
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI
0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI
0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA
0x00e7: 0x03b7, # GREEK SMALL LETTER ETA
0x00e8: 0x03b8, # GREEK SMALL LETTER THETA
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00ec: 0x03bc, # GREEK SMALL LETTER MU
0x00ed: 0x03bd, # GREEK SMALL LETTER NU
0x00ee: 0x03be, # GREEK SMALL LETTER XI
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
0x00f1: 0x03c1, # GREEK SMALL LETTER RHO
0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00f6: 0x03c6, # GREEK SMALL LETTER PHI
0x00f7: 0x03c7, # GREEK SMALL LETTER CHI
0x00f8: 0x03c8, # GREEK SMALL LETTER PSI
0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ff: None, # UNDEFINED
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

84
Lib/encodings/cp1254.py Normal file
View File

@ -0,0 +1,84 @@
""" Python Character Mapping Codec generated from 'CP1254.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x02dc, # SMALL TILDE
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

145
Lib/encodings/cp1255.py Normal file
View File

@ -0,0 +1,145 @@
""" Python Character Mapping Codec generated from 'CP1255.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: None, # UNDEFINED
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: None, # UNDEFINED
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x02dc, # SMALL TILDE
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: None, # UNDEFINED
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: None, # UNDEFINED
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: None, # UNDEFINED
0x00a4: 0x20aa, # NEW SHEQEL SIGN
0x00aa: 0x00d7, # MULTIPLICATION SIGN
0x00ba: 0x00f7, # DIVISION SIGN
0x00c0: 0x05b0, # HEBREW POINT SHEVA
0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL
0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH
0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS
0x00c4: 0x05b4, # HEBREW POINT HIRIQ
0x00c5: 0x05b5, # HEBREW POINT TSERE
0x00c6: 0x05b6, # HEBREW POINT SEGOL
0x00c7: 0x05b7, # HEBREW POINT PATAH
0x00c8: 0x05b8, # HEBREW POINT QAMATS
0x00c9: 0x05b9, # HEBREW POINT HOLAM
0x00ca: None, # UNDEFINED
0x00cb: 0x05bb, # HEBREW POINT QUBUTS
0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ
0x00cd: 0x05bd, # HEBREW POINT METEG
0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF
0x00cf: 0x05bf, # HEBREW POINT RAFE
0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ
0x00d1: 0x05c1, # HEBREW POINT SHIN DOT
0x00d2: 0x05c2, # HEBREW POINT SIN DOT
0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ
0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV
0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD
0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD
0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH
0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM
0x00d9: None, # UNDEFINED
0x00da: None, # UNDEFINED
0x00db: None, # UNDEFINED
0x00dc: None, # UNDEFINED
0x00dd: None, # UNDEFINED
0x00de: None, # UNDEFINED
0x00df: None, # UNDEFINED
0x00e0: 0x05d0, # HEBREW LETTER ALEF
0x00e1: 0x05d1, # HEBREW LETTER BET
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
0x00e3: 0x05d3, # HEBREW LETTER DALET
0x00e4: 0x05d4, # HEBREW LETTER HE
0x00e5: 0x05d5, # HEBREW LETTER VAV
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
0x00e7: 0x05d7, # HEBREW LETTER HET
0x00e8: 0x05d8, # HEBREW LETTER TET
0x00e9: 0x05d9, # HEBREW LETTER YOD
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
0x00eb: 0x05db, # HEBREW LETTER KAF
0x00ec: 0x05dc, # HEBREW LETTER LAMED
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
0x00ee: 0x05de, # HEBREW LETTER MEM
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
0x00f0: 0x05e0, # HEBREW LETTER NUN
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
0x00f2: 0x05e2, # HEBREW LETTER AYIN
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
0x00f4: 0x05e4, # HEBREW LETTER PE
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
0x00f6: 0x05e6, # HEBREW LETTER TSADI
0x00f7: 0x05e7, # HEBREW LETTER QOF
0x00f8: 0x05e8, # HEBREW LETTER RESH
0x00f9: 0x05e9, # HEBREW LETTER SHIN
0x00fa: 0x05ea, # HEBREW LETTER TAV
0x00fb: None, # UNDEFINED
0x00fc: None, # UNDEFINED
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: None, # UNDEFINED
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

131
Lib/encodings/cp1256.py Normal file
View File

@ -0,0 +1,131 @@
""" Python Character Mapping Codec generated from 'CP1256.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: 0x067e, # ARABIC LETTER PEH
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: 0x0679, # ARABIC LETTER TTEH
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
0x008d: 0x0686, # ARABIC LETTER TCHEH
0x008e: 0x0698, # ARABIC LETTER JEH
0x008f: 0x0688, # ARABIC LETTER DDAL
0x0090: 0x06af, # ARABIC LETTER GAF
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x06a9, # ARABIC LETTER KEHEH
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: 0x0691, # ARABIC LETTER RREH
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
0x009d: 0x200c, # ZERO WIDTH NON-JOINER
0x009e: 0x200d, # ZERO WIDTH JOINER
0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA
0x00a1: 0x060c, # ARABIC COMMA
0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE
0x00ba: 0x061b, # ARABIC SEMICOLON
0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL
0x00c1: 0x0621, # ARABIC LETTER HAMZA
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
0x00c7: 0x0627, # ARABIC LETTER ALEF
0x00c8: 0x0628, # ARABIC LETTER BEH
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
0x00ca: 0x062a, # ARABIC LETTER TEH
0x00cb: 0x062b, # ARABIC LETTER THEH
0x00cc: 0x062c, # ARABIC LETTER JEEM
0x00cd: 0x062d, # ARABIC LETTER HAH
0x00ce: 0x062e, # ARABIC LETTER KHAH
0x00cf: 0x062f, # ARABIC LETTER DAL
0x00d0: 0x0630, # ARABIC LETTER THAL
0x00d1: 0x0631, # ARABIC LETTER REH
0x00d2: 0x0632, # ARABIC LETTER ZAIN
0x00d3: 0x0633, # ARABIC LETTER SEEN
0x00d4: 0x0634, # ARABIC LETTER SHEEN
0x00d5: 0x0635, # ARABIC LETTER SAD
0x00d6: 0x0636, # ARABIC LETTER DAD
0x00d8: 0x0637, # ARABIC LETTER TAH
0x00d9: 0x0638, # ARABIC LETTER ZAH
0x00da: 0x0639, # ARABIC LETTER AIN
0x00db: 0x063a, # ARABIC LETTER GHAIN
0x00dc: 0x0640, # ARABIC TATWEEL
0x00dd: 0x0641, # ARABIC LETTER FEH
0x00de: 0x0642, # ARABIC LETTER QAF
0x00df: 0x0643, # ARABIC LETTER KAF
0x00e1: 0x0644, # ARABIC LETTER LAM
0x00e3: 0x0645, # ARABIC LETTER MEEM
0x00e4: 0x0646, # ARABIC LETTER NOON
0x00e5: 0x0647, # ARABIC LETTER HEH
0x00e6: 0x0648, # ARABIC LETTER WAW
0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA
0x00ed: 0x064a, # ARABIC LETTER YEH
0x00f0: 0x064b, # ARABIC FATHATAN
0x00f1: 0x064c, # ARABIC DAMMATAN
0x00f2: 0x064d, # ARABIC KASRATAN
0x00f3: 0x064e, # ARABIC FATHA
0x00f5: 0x064f, # ARABIC DAMMA
0x00f6: 0x0650, # ARABIC KASRA
0x00f8: 0x0651, # ARABIC SHADDA
0x00fa: 0x0652, # ARABIC SUKUN
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

133
Lib/encodings/cp1257.py Normal file
View File

@ -0,0 +1,133 @@
""" Python Character Mapping Codec generated from 'CP1257.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: None, # UNDEFINED
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: None, # UNDEFINED
0x0089: 0x2030, # PER MILLE SIGN
0x008a: None, # UNDEFINED
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: None, # UNDEFINED
0x008d: 0x00a8, # DIAERESIS
0x008e: 0x02c7, # CARON
0x008f: 0x00b8, # CEDILLA
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: None, # UNDEFINED
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: None, # UNDEFINED
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: None, # UNDEFINED
0x009d: 0x00af, # MACRON
0x009e: 0x02db, # OGONEK
0x009f: None, # UNDEFINED
0x00a1: None, # UNDEFINED
0x00a5: None, # UNDEFINED
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x00af: 0x00c6, # LATIN CAPITAL LETTER AE
0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x00bf: 0x00e6, # LATIN SMALL LETTER AE
0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x02d9, # DOT ABOVE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

92
Lib/encodings/cp1258.py Normal file
View File

@ -0,0 +1,92 @@
""" Python Character Mapping Codec generated from 'CP1258.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: 0x2020, # DAGGER
0x0087: 0x2021, # DOUBLE DAGGER
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x0089: 0x2030, # PER MILLE SIGN
0x008a: None, # UNDEFINED
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: 0x02dc, # SMALL TILDE
0x0099: 0x2122, # TRADE MARK SIGN
0x009a: None, # UNDEFINED
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
0x00cc: 0x0300, # COMBINING GRAVE ACCENT
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d2: 0x0309, # COMBINING HOOK ABOVE
0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN
0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN
0x00de: 0x0303, # COMBINING TILDE
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
0x00ec: 0x0301, # COMBINING ACUTE ACCENT
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00f2: 0x0323, # COMBINING DOT BELOW
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
0x00fe: 0x20ab, # DONG SIGN
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

282
Lib/encodings/cp424.py Normal file
View File

@ -0,0 +1,282 @@
""" Python Character Mapping Codec generated from 'CP424.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0004: 0x009c, # SELECT
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # REQUIRED NEW LINE
0x0007: 0x007f, # DELETE
0x0008: 0x0097, # GRAPHIC ESCAPE
0x0009: 0x008d, # SUPERSCRIPT
0x000a: 0x008e, # REPEAT
0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION
0x0015: 0x0085, # NEW LINE
0x0016: 0x0008, # BACKSPACE
0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION
0x001a: 0x0092, # UNIT BACK SPACE
0x001b: 0x008f, # CUSTOMER USE ONE
0x0020: 0x0080, # DIGIT SELECT
0x0021: 0x0081, # START OF SIGNIFICANCE
0x0022: 0x0082, # FIELD SEPARATOR
0x0023: 0x0083, # WORD UNDERSCORE
0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION
0x0025: 0x000a, # LINE FEED
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
0x0027: 0x001b, # ESCAPE
0x0028: 0x0088, # SET ATTRIBUTE
0x0029: 0x0089, # START FIELD EXTENDED
0x002a: 0x008a, # SET MODE OR SWITCH
0x002b: 0x008b, # CONTROL SEQUENCE PREFIX
0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE
0x002d: 0x0005, # ENQUIRY
0x002e: 0x0006, # ACKNOWLEDGE
0x002f: 0x0007, # BELL
0x0030: 0x0090, # <reserved>
0x0031: 0x0091, # <reserved>
0x0032: 0x0016, # SYNCHRONOUS IDLE
0x0033: 0x0093, # INDEX RETURN
0x0034: 0x0094, # PRESENTATION POSITION
0x0035: 0x0095, # TRANSPARENT
0x0036: 0x0096, # NUMERIC BACKSPACE
0x0037: 0x0004, # END OF TRANSMISSION
0x0038: 0x0098, # SUBSCRIPT
0x0039: 0x0099, # INDENT TABULATION
0x003a: 0x009a, # REVERSE FORM FEED
0x003b: 0x009b, # CUSTOMER USE THREE
0x003c: 0x0014, # DEVICE CONTROL FOUR
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
0x003e: 0x009e, # <reserved>
0x003f: 0x001a, # SUBSTITUTE
0x0040: 0x0020, # SPACE
0x0041: 0x05d0, # HEBREW LETTER ALEF
0x0042: 0x05d1, # HEBREW LETTER BET
0x0043: 0x05d2, # HEBREW LETTER GIMEL
0x0044: 0x05d3, # HEBREW LETTER DALET
0x0045: 0x05d4, # HEBREW LETTER HE
0x0046: 0x05d5, # HEBREW LETTER VAV
0x0047: 0x05d6, # HEBREW LETTER ZAYIN
0x0048: 0x05d7, # HEBREW LETTER HET
0x0049: 0x05d8, # HEBREW LETTER TET
0x004a: 0x00a2, # CENT SIGN
0x004b: 0x002e, # FULL STOP
0x004c: 0x003c, # LESS-THAN SIGN
0x004d: 0x0028, # LEFT PARENTHESIS
0x004e: 0x002b, # PLUS SIGN
0x004f: 0x007c, # VERTICAL LINE
0x0050: 0x0026, # AMPERSAND
0x0051: 0x05d9, # HEBREW LETTER YOD
0x0052: 0x05da, # HEBREW LETTER FINAL KAF
0x0053: 0x05db, # HEBREW LETTER KAF
0x0054: 0x05dc, # HEBREW LETTER LAMED
0x0055: 0x05dd, # HEBREW LETTER FINAL MEM
0x0056: 0x05de, # HEBREW LETTER MEM
0x0057: 0x05df, # HEBREW LETTER FINAL NUN
0x0058: 0x05e0, # HEBREW LETTER NUN
0x0059: 0x05e1, # HEBREW LETTER SAMEKH
0x005a: 0x0021, # EXCLAMATION MARK
0x005b: 0x0024, # DOLLAR SIGN
0x005c: 0x002a, # ASTERISK
0x005d: 0x0029, # RIGHT PARENTHESIS
0x005e: 0x003b, # SEMICOLON
0x005f: 0x00ac, # NOT SIGN
0x0060: 0x002d, # HYPHEN-MINUS
0x0061: 0x002f, # SOLIDUS
0x0062: 0x05e2, # HEBREW LETTER AYIN
0x0063: 0x05e3, # HEBREW LETTER FINAL PE
0x0064: 0x05e4, # HEBREW LETTER PE
0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI
0x0066: 0x05e6, # HEBREW LETTER TSADI
0x0067: 0x05e7, # HEBREW LETTER QOF
0x0068: 0x05e8, # HEBREW LETTER RESH
0x0069: 0x05e9, # HEBREW LETTER SHIN
0x006a: 0x00a6, # BROKEN BAR
0x006b: 0x002c, # COMMA
0x006c: 0x0025, # PERCENT SIGN
0x006d: 0x005f, # LOW LINE
0x006e: 0x003e, # GREATER-THAN SIGN
0x006f: 0x003f, # QUESTION MARK
0x0070: None, # UNDEFINED
0x0071: 0x05ea, # HEBREW LETTER TAV
0x0072: None, # UNDEFINED
0x0073: None, # UNDEFINED
0x0074: 0x00a0, # NO-BREAK SPACE
0x0075: None, # UNDEFINED
0x0076: None, # UNDEFINED
0x0077: None, # UNDEFINED
0x0078: 0x2017, # DOUBLE LOW LINE
0x0079: 0x0060, # GRAVE ACCENT
0x007a: 0x003a, # COLON
0x007b: 0x0023, # NUMBER SIGN
0x007c: 0x0040, # COMMERCIAL AT
0x007d: 0x0027, # APOSTROPHE
0x007e: 0x003d, # EQUALS SIGN
0x007f: 0x0022, # QUOTATION MARK
0x0080: None, # UNDEFINED
0x0081: 0x0061, # LATIN SMALL LETTER A
0x0082: 0x0062, # LATIN SMALL LETTER B
0x0083: 0x0063, # LATIN SMALL LETTER C
0x0084: 0x0064, # LATIN SMALL LETTER D
0x0085: 0x0065, # LATIN SMALL LETTER E
0x0086: 0x0066, # LATIN SMALL LETTER F
0x0087: 0x0067, # LATIN SMALL LETTER G
0x0088: 0x0068, # LATIN SMALL LETTER H
0x0089: 0x0069, # LATIN SMALL LETTER I
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008c: None, # UNDEFINED
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: 0x00b1, # PLUS-MINUS SIGN
0x0090: 0x00b0, # DEGREE SIGN
0x0091: 0x006a, # LATIN SMALL LETTER J
0x0092: 0x006b, # LATIN SMALL LETTER K
0x0093: 0x006c, # LATIN SMALL LETTER L
0x0094: 0x006d, # LATIN SMALL LETTER M
0x0095: 0x006e, # LATIN SMALL LETTER N
0x0096: 0x006f, # LATIN SMALL LETTER O
0x0097: 0x0070, # LATIN SMALL LETTER P
0x0098: 0x0071, # LATIN SMALL LETTER Q
0x0099: 0x0072, # LATIN SMALL LETTER R
0x009a: None, # UNDEFINED
0x009b: None, # UNDEFINED
0x009c: None, # UNDEFINED
0x009d: 0x00b8, # CEDILLA
0x009e: None, # UNDEFINED
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x00b5, # MICRO SIGN
0x00a1: 0x007e, # TILDE
0x00a2: 0x0073, # LATIN SMALL LETTER S
0x00a3: 0x0074, # LATIN SMALL LETTER T
0x00a4: 0x0075, # LATIN SMALL LETTER U
0x00a5: 0x0076, # LATIN SMALL LETTER V
0x00a6: 0x0077, # LATIN SMALL LETTER W
0x00a7: 0x0078, # LATIN SMALL LETTER X
0x00a8: 0x0079, # LATIN SMALL LETTER Y
0x00a9: 0x007a, # LATIN SMALL LETTER Z
0x00aa: None, # UNDEFINED
0x00ab: None, # UNDEFINED
0x00ac: None, # UNDEFINED
0x00ad: None, # UNDEFINED
0x00ae: None, # UNDEFINED
0x00af: 0x00ae, # REGISTERED SIGN
0x00b0: 0x005e, # CIRCUMFLEX ACCENT
0x00b1: 0x00a3, # POUND SIGN
0x00b2: 0x00a5, # YEN SIGN
0x00b3: 0x00b7, # MIDDLE DOT
0x00b4: 0x00a9, # COPYRIGHT SIGN
0x00b5: 0x00a7, # SECTION SIGN
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ba: 0x005b, # LEFT SQUARE BRACKET
0x00bb: 0x005d, # RIGHT SQUARE BRACKET
0x00bc: 0x00af, # MACRON
0x00bd: 0x00a8, # DIAERESIS
0x00be: 0x00b4, # ACUTE ACCENT
0x00bf: 0x00d7, # MULTIPLICATION SIGN
0x00c0: 0x007b, # LEFT CURLY BRACKET
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
0x00ca: 0x00ad, # SOFT HYPHEN
0x00cb: None, # UNDEFINED
0x00cc: None, # UNDEFINED
0x00cd: None, # UNDEFINED
0x00ce: None, # UNDEFINED
0x00cf: None, # UNDEFINED
0x00d0: 0x007d, # RIGHT CURLY BRACKET
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
0x00da: 0x00b9, # SUPERSCRIPT ONE
0x00db: None, # UNDEFINED
0x00dc: None, # UNDEFINED
0x00dd: None, # UNDEFINED
0x00de: None, # UNDEFINED
0x00df: None, # UNDEFINED
0x00e0: 0x005c, # REVERSE SOLIDUS
0x00e1: 0x00f7, # DIVISION SIGN
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
0x00ea: 0x00b2, # SUPERSCRIPT TWO
0x00eb: None, # UNDEFINED
0x00ec: None, # UNDEFINED
0x00ed: None, # UNDEFINED
0x00ee: None, # UNDEFINED
0x00ef: None, # UNDEFINED
0x00f0: 0x0030, # DIGIT ZERO
0x00f1: 0x0031, # DIGIT ONE
0x00f2: 0x0032, # DIGIT TWO
0x00f3: 0x0033, # DIGIT THREE
0x00f4: 0x0034, # DIGIT FOUR
0x00f5: 0x0035, # DIGIT FIVE
0x00f6: 0x0036, # DIGIT SIX
0x00f7: 0x0037, # DIGIT SEVEN
0x00f8: 0x0038, # DIGIT EIGHT
0x00f9: 0x0039, # DIGIT NINE
0x00fa: 0x00b3, # SUPERSCRIPT THREE
0x00fb: None, # UNDEFINED
0x00fc: None, # UNDEFINED
0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED
0x00ff: 0x009f, # EIGHT ONES
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp437.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP437.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00a5, # YEN SIGN
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

282
Lib/encodings/cp500.py Normal file
View File

@ -0,0 +1,282 @@
""" Python Character Mapping Codec generated from 'CP500.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x0007: 0x007f, # DELETE
0x0008: 0x0097, # CONTROL
0x0009: 0x008d, # CONTROL
0x000a: 0x008e, # CONTROL
0x0014: 0x009d, # CONTROL
0x0015: 0x0085, # CONTROL
0x0016: 0x0008, # BACKSPACE
0x0017: 0x0087, # CONTROL
0x001a: 0x0092, # CONTROL
0x001b: 0x008f, # CONTROL
0x0020: 0x0080, # CONTROL
0x0021: 0x0081, # CONTROL
0x0022: 0x0082, # CONTROL
0x0023: 0x0083, # CONTROL
0x0024: 0x0084, # CONTROL
0x0025: 0x000a, # LINE FEED
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
0x0027: 0x001b, # ESCAPE
0x0028: 0x0088, # CONTROL
0x0029: 0x0089, # CONTROL
0x002a: 0x008a, # CONTROL
0x002b: 0x008b, # CONTROL
0x002c: 0x008c, # CONTROL
0x002d: 0x0005, # ENQUIRY
0x002e: 0x0006, # ACKNOWLEDGE
0x002f: 0x0007, # BELL
0x0030: 0x0090, # CONTROL
0x0031: 0x0091, # CONTROL
0x0032: 0x0016, # SYNCHRONOUS IDLE
0x0033: 0x0093, # CONTROL
0x0034: 0x0094, # CONTROL
0x0035: 0x0095, # CONTROL
0x0036: 0x0096, # CONTROL
0x0037: 0x0004, # END OF TRANSMISSION
0x0038: 0x0098, # CONTROL
0x0039: 0x0099, # CONTROL
0x003a: 0x009a, # CONTROL
0x003b: 0x009b, # CONTROL
0x003c: 0x0014, # DEVICE CONTROL FOUR
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
0x003e: 0x009e, # CONTROL
0x003f: 0x001a, # SUBSTITUTE
0x0040: 0x0020, # SPACE
0x0041: 0x00a0, # NO-BREAK SPACE
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x004a: 0x005b, # LEFT SQUARE BRACKET
0x004b: 0x002e, # FULL STOP
0x004c: 0x003c, # LESS-THAN SIGN
0x004d: 0x0028, # LEFT PARENTHESIS
0x004e: 0x002b, # PLUS SIGN
0x004f: 0x0021, # EXCLAMATION MARK
0x0050: 0x0026, # AMPERSAND
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x005a: 0x005d, # RIGHT SQUARE BRACKET
0x005b: 0x0024, # DOLLAR SIGN
0x005c: 0x002a, # ASTERISK
0x005d: 0x0029, # RIGHT PARENTHESIS
0x005e: 0x003b, # SEMICOLON
0x005f: 0x005e, # CIRCUMFLEX ACCENT
0x0060: 0x002d, # HYPHEN-MINUS
0x0061: 0x002f, # SOLIDUS
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x006a: 0x00a6, # BROKEN BAR
0x006b: 0x002c, # COMMA
0x006c: 0x0025, # PERCENT SIGN
0x006d: 0x005f, # LOW LINE
0x006e: 0x003e, # GREATER-THAN SIGN
0x006f: 0x003f, # QUESTION MARK
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x0079: 0x0060, # GRAVE ACCENT
0x007a: 0x003a, # COLON
0x007b: 0x0023, # NUMBER SIGN
0x007c: 0x0040, # COMMERCIAL AT
0x007d: 0x0027, # APOSTROPHE
0x007e: 0x003d, # EQUALS SIGN
0x007f: 0x0022, # QUOTATION MARK
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x0081: 0x0061, # LATIN SMALL LETTER A
0x0082: 0x0062, # LATIN SMALL LETTER B
0x0083: 0x0063, # LATIN SMALL LETTER C
0x0084: 0x0064, # LATIN SMALL LETTER D
0x0085: 0x0065, # LATIN SMALL LETTER E
0x0086: 0x0066, # LATIN SMALL LETTER F
0x0087: 0x0067, # LATIN SMALL LETTER G
0x0088: 0x0068, # LATIN SMALL LETTER H
0x0089: 0x0069, # LATIN SMALL LETTER I
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC)
0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC)
0x008f: 0x00b1, # PLUS-MINUS SIGN
0x0090: 0x00b0, # DEGREE SIGN
0x0091: 0x006a, # LATIN SMALL LETTER J
0x0092: 0x006b, # LATIN SMALL LETTER K
0x0093: 0x006c, # LATIN SMALL LETTER L
0x0094: 0x006d, # LATIN SMALL LETTER M
0x0095: 0x006e, # LATIN SMALL LETTER N
0x0096: 0x006f, # LATIN SMALL LETTER O
0x0097: 0x0070, # LATIN SMALL LETTER P
0x0098: 0x0071, # LATIN SMALL LETTER Q
0x0099: 0x0072, # LATIN SMALL LETTER R
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
0x009d: 0x00b8, # CEDILLA
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x00b5, # MICRO SIGN
0x00a1: 0x007e, # TILDE
0x00a2: 0x0073, # LATIN SMALL LETTER S
0x00a3: 0x0074, # LATIN SMALL LETTER T
0x00a4: 0x0075, # LATIN SMALL LETTER U
0x00a5: 0x0076, # LATIN SMALL LETTER V
0x00a6: 0x0077, # LATIN SMALL LETTER W
0x00a7: 0x0078, # LATIN SMALL LETTER X
0x00a8: 0x0079, # LATIN SMALL LETTER Y
0x00a9: 0x007a, # LATIN SMALL LETTER Z
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ab: 0x00bf, # INVERTED QUESTION MARK
0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC)
0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC)
0x00af: 0x00ae, # REGISTERED SIGN
0x00b0: 0x00a2, # CENT SIGN
0x00b1: 0x00a3, # POUND SIGN
0x00b2: 0x00a5, # YEN SIGN
0x00b3: 0x00b7, # MIDDLE DOT
0x00b4: 0x00a9, # COPYRIGHT SIGN
0x00b5: 0x00a7, # SECTION SIGN
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ba: 0x00ac, # NOT SIGN
0x00bb: 0x007c, # VERTICAL LINE
0x00bc: 0x00af, # MACRON
0x00bd: 0x00a8, # DIAERESIS
0x00be: 0x00b4, # ACUTE ACCENT
0x00bf: 0x00d7, # MULTIPLICATION SIGN
0x00c0: 0x007b, # LEFT CURLY BRACKET
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
0x00ca: 0x00ad, # SOFT HYPHEN
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00d0: 0x007d, # RIGHT CURLY BRACKET
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
0x00da: 0x00b9, # SUPERSCRIPT ONE
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00e0: 0x005c, # REVERSE SOLIDUS
0x00e1: 0x00f7, # DIVISION SIGN
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
0x00ea: 0x00b2, # SUPERSCRIPT TWO
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00f0: 0x0030, # DIGIT ZERO
0x00f1: 0x0031, # DIGIT ONE
0x00f2: 0x0032, # DIGIT TWO
0x00f3: 0x0033, # DIGIT THREE
0x00f4: 0x0034, # DIGIT FOUR
0x00f5: 0x0035, # DIGIT FIVE
0x00f6: 0x0036, # DIGIT SIX
0x00f7: 0x0037, # DIGIT SEVEN
0x00f8: 0x0038, # DIGIT EIGHT
0x00f9: 0x0039, # DIGIT NINE
0x00fa: 0x00b3, # SUPERSCRIPT THREE
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp737.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP737.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA
0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA
0x0086: 0x0397, # GREEK CAPITAL LETTER ETA
0x0087: 0x0398, # GREEK CAPITAL LETTER THETA
0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA
0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x008b: 0x039c, # GREEK CAPITAL LETTER MU
0x008c: 0x039d, # GREEK CAPITAL LETTER NU
0x008d: 0x039e, # GREEK CAPITAL LETTER XI
0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x008f: 0x03a0, # GREEK CAPITAL LETTER PI
0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO
0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU
0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI
0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI
0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI
0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA
0x0099: 0x03b2, # GREEK SMALL LETTER BETA
0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA
0x009b: 0x03b4, # GREEK SMALL LETTER DELTA
0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON
0x009d: 0x03b6, # GREEK SMALL LETTER ZETA
0x009e: 0x03b7, # GREEK SMALL LETTER ETA
0x009f: 0x03b8, # GREEK SMALL LETTER THETA
0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA
0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00a3: 0x03bc, # GREEK SMALL LETTER MU
0x00a4: 0x03bd, # GREEK SMALL LETTER NU
0x00a5: 0x03be, # GREEK SMALL LETTER XI
0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00a7: 0x03c0, # GREEK SMALL LETTER PI
0x00a8: 0x03c1, # GREEK SMALL LETTER RHO
0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00ab: 0x03c4, # GREEK SMALL LETTER TAU
0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00ad: 0x03c6, # GREEK SMALL LETTER PHI
0x00ae: 0x03c7, # GREEK SMALL LETTER CHI
0x00af: 0x03c8, # GREEK SMALL LETTER PSI
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp775.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP775.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x0096: 0x00a2, # CENT SIGN
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00a7: 0x00a6, # BROKEN BAR
0x00a8: 0x00a9, # COPYRIGHT SIGN
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp850.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP850.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2017, # DOUBLE LOW LINE
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp852.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP852.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON
0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON
0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON
0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT
0x00f2: 0x02db, # OGONEK
0x00f3: 0x02c7, # CARON
0x00f4: 0x02d8, # BREVE
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x02d9, # DOT ABOVE
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp855.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP855.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x0084: 0x0451, # CYRILLIC SMALL LETTER IO
0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE
0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE
0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x008c: 0x0457, # CYRILLIC SMALL LETTER YI
0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x008e: 0x0458, # CYRILLIC SMALL LETTER JE
0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE
0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE
0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE
0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE
0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE
0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE
0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE
0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE
0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE
0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
0x009c: 0x044e, # CYRILLIC SMALL LETTER YU
0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A
0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE
0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE
0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE
0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF
0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x00b7: 0x0438, # CYRILLIC SMALL LETTER I
0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA
0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL
0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM
0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN
0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x00d6: 0x043e, # CYRILLIC SMALL LETTER O
0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O
0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x00de: 0x044f, # CYRILLIC SMALL LETTER YA
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER
0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES
0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE
0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x00e7: 0x0443, # CYRILLIC SMALL LETTER U
0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U
0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE
0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x00ef: 0x2116, # NUMERO SIGN
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x00f7: 0x044d, # CYRILLIC SMALL LETTER E
0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp856.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP856.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL
0x0083: 0x05d3, # HEBREW LETTER DALET
0x0084: 0x05d4, # HEBREW LETTER HE
0x0085: 0x05d5, # HEBREW LETTER VAV
0x0086: 0x05d6, # HEBREW LETTER ZAYIN
0x0087: 0x05d7, # HEBREW LETTER HET
0x0088: 0x05d8, # HEBREW LETTER TET
0x0089: 0x05d9, # HEBREW LETTER YOD
0x008a: 0x05da, # HEBREW LETTER FINAL KAF
0x008b: 0x05db, # HEBREW LETTER KAF
0x008c: 0x05dc, # HEBREW LETTER LAMED
0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
0x008e: 0x05de, # HEBREW LETTER MEM
0x008f: 0x05df, # HEBREW LETTER FINAL NUN
0x0090: 0x05e0, # HEBREW LETTER NUN
0x0091: 0x05e1, # HEBREW LETTER SAMEKH
0x0092: 0x05e2, # HEBREW LETTER AYIN
0x0093: 0x05e3, # HEBREW LETTER FINAL PE
0x0094: 0x05e4, # HEBREW LETTER PE
0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
0x0096: 0x05e6, # HEBREW LETTER TSADI
0x0097: 0x05e7, # HEBREW LETTER QOF
0x0098: 0x05e8, # HEBREW LETTER RESH
0x0099: 0x05e9, # HEBREW LETTER SHIN
0x009a: 0x05ea, # HEBREW LETTER TAV
0x009b: None, # UNDEFINED
0x009c: 0x00a3, # POUND SIGN
0x009d: None, # UNDEFINED
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: None, # UNDEFINED
0x00a0: None, # UNDEFINED
0x00a1: None, # UNDEFINED
0x00a2: None, # UNDEFINED
0x00a3: None, # UNDEFINED
0x00a4: None, # UNDEFINED
0x00a5: None, # UNDEFINED
0x00a6: None, # UNDEFINED
0x00a7: None, # UNDEFINED
0x00a8: None, # UNDEFINED
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: None, # UNDEFINED
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: None, # UNDEFINED
0x00b6: None, # UNDEFINED
0x00b7: None, # UNDEFINED
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: None, # UNDEFINED
0x00c7: None, # UNDEFINED
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: None, # UNDEFINED
0x00d1: None, # UNDEFINED
0x00d2: None, # UNDEFINED
0x00d3: None, # UNDEFINEDS
0x00d4: None, # UNDEFINED
0x00d5: None, # UNDEFINED
0x00d6: None, # UNDEFINEDE
0x00d7: None, # UNDEFINED
0x00d8: None, # UNDEFINED
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: None, # UNDEFINED
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: None, # UNDEFINED
0x00e1: None, # UNDEFINED
0x00e2: None, # UNDEFINED
0x00e3: None, # UNDEFINED
0x00e4: None, # UNDEFINED
0x00e5: None, # UNDEFINED
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: None, # UNDEFINED
0x00e8: None, # UNDEFINED
0x00e9: None, # UNDEFINED
0x00ea: None, # UNDEFINED
0x00eb: None, # UNDEFINED
0x00ec: None, # UNDEFINED
0x00ed: None, # UNDEFINED
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2017, # DOUBLE LOW LINE
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

173
Lib/encodings/cp857.py Normal file
View File

@ -0,0 +1,173 @@
""" Python Character Mapping Codec generated from 'CP857.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00d5: None, # UNDEFINED
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: None, # UNDEFINED
0x00e8: 0x00d7, # MULTIPLICATION SIGN
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: None, # UNDEFINED
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp860.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP860.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp861.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP861.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH
0x008c: 0x00f0, # LATIN SMALL LETTER ETH
0x008d: 0x00de, # LATIN CAPITAL LETTER THORN
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00fe, # LATIN SMALL LETTER THORN
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp862.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP862.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL
0x0083: 0x05d3, # HEBREW LETTER DALET
0x0084: 0x05d4, # HEBREW LETTER HE
0x0085: 0x05d5, # HEBREW LETTER VAV
0x0086: 0x05d6, # HEBREW LETTER ZAYIN
0x0087: 0x05d7, # HEBREW LETTER HET
0x0088: 0x05d8, # HEBREW LETTER TET
0x0089: 0x05d9, # HEBREW LETTER YOD
0x008a: 0x05da, # HEBREW LETTER FINAL KAF
0x008b: 0x05db, # HEBREW LETTER KAF
0x008c: 0x05dc, # HEBREW LETTER LAMED
0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
0x008e: 0x05de, # HEBREW LETTER MEM
0x008f: 0x05df, # HEBREW LETTER FINAL NUN
0x0090: 0x05e0, # HEBREW LETTER NUN
0x0091: 0x05e1, # HEBREW LETTER SAMEKH
0x0092: 0x05e2, # HEBREW LETTER AYIN
0x0093: 0x05e3, # HEBREW LETTER FINAL PE
0x0094: 0x05e4, # HEBREW LETTER PE
0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
0x0096: 0x05e6, # HEBREW LETTER TSADI
0x0097: 0x05e7, # HEBREW LETTER QOF
0x0098: 0x05e8, # HEBREW LETTER RESH
0x0099: 0x05e9, # HEBREW LETTER SHIN
0x009a: 0x05ea, # HEBREW LETTER TAV
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00a5, # YEN SIGN
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp863.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP863.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00b6, # PILCROW SIGN
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x2017, # DOUBLE LOW LINE
0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x008f: 0x00a7, # SECTION SIGN
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00a4, # CURRENCY SIGN
0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00a6, # BROKEN BAR
0x00a1: 0x00b4, # ACUTE ACCENT
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00a8, # DIAERESIS
0x00a5: 0x00b8, # CEDILLA
0x00a6: 0x00b3, # SUPERSCRIPT THREE
0x00a7: 0x00af, # MACRON
0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

172
Lib/encodings/cp864.py Normal file
View File

@ -0,0 +1,172 @@
""" Python Character Mapping Codec generated from 'CP864.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0025: 0x066a, # ARABIC PERCENT SIGN
0x0080: 0x00b0, # DEGREE SIGN
0x0081: 0x00b7, # MIDDLE DOT
0x0082: 0x2219, # BULLET OPERATOR
0x0083: 0x221a, # SQUARE ROOT
0x0084: 0x2592, # MEDIUM SHADE
0x0085: 0x2500, # FORMS LIGHT HORIZONTAL
0x0086: 0x2502, # FORMS LIGHT VERTICAL
0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL
0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT
0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL
0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT
0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL
0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT
0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT
0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT
0x008f: 0x2518, # FORMS LIGHT UP AND LEFT
0x0090: 0x03b2, # GREEK SMALL BETA
0x0091: 0x221e, # INFINITY
0x0092: 0x03c6, # GREEK SMALL PHI
0x0093: 0x00b1, # PLUS-OR-MINUS SIGN
0x0094: 0x00bd, # FRACTION 1/2
0x0095: 0x00bc, # FRACTION 1/4
0x0096: 0x2248, # ALMOST EQUAL TO
0x0097: 0x00ab, # LEFT POINTING GUILLEMET
0x0098: 0x00bb, # RIGHT POINTING GUILLEMET
0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
0x009b: None, # UNDEFINED
0x009c: None, # UNDEFINED
0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
0x009f: None, # UNDEFINED
0x00a1: 0x00ad, # SOFT HYPHEN
0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
0x00a6: None, # UNDEFINED
0x00a7: None, # UNDEFINED
0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
0x00ac: 0x060c, # ARABIC COMMA
0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO
0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE
0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO
0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE
0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR
0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE
0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX
0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN
0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT
0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE
0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
0x00bb: 0x061b, # ARABIC SEMICOLON
0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c0: 0x00a2, # CENT SIGN
0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM
0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM
0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
0x00db: 0x00a6, # BROKEN VERTICAL BAR
0x00dc: 0x00ac, # NOT SIGN
0x00dd: 0x00f7, # DIVISION SIGN
0x00de: 0x00d7, # MULTIPLICATION SIGN
0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM
0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
0x00f1: 0x0651, # ARABIC SHADDAH
0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM
0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM
0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM
0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM
0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: None, # UNDEFINED
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp865.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP865.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00a4, # CURRENCY SIGN
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp866.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP866.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x2116, # NUMERO SIGN
0x00fd: 0x00a4, # CURRENCY SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/cp869.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'CP869.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: None, # UNDEFINED
0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED
0x0083: None, # UNDEFINED
0x0084: None, # UNDEFINED
0x0085: None, # UNDEFINED
0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x0087: None, # UNDEFINED
0x0088: 0x00b7, # MIDDLE DOT
0x0089: 0x00ac, # NOT SIGN
0x008a: 0x00a6, # BROKEN BAR
0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK
0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x008e: 0x2015, # HORIZONTAL BAR
0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x0093: None, # UNDEFINED
0x0094: None, # UNDEFINED
0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x0097: 0x00a9, # COPYRIGHT SIGN
0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x0099: 0x00b2, # SUPERSCRIPT TWO
0x009a: 0x00b3, # SUPERSCRIPT THREE
0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA
0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA
0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA
0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x00b7: 0x039c, # GREEK CAPITAL LETTER MU
0x00b8: 0x039d, # GREEK CAPITAL LETTER NU
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x039e, # GREEK CAPITAL LETTER XI
0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI
0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI
0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI
0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00d7: 0x03b2, # GREEK SMALL LETTER BETA
0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA
0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA
0x00e1: 0x03b7, # GREEK SMALL LETTER ETA
0x00e2: 0x03b8, # GREEK SMALL LETTER THETA
0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA
0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00e6: 0x03bc, # GREEK SMALL LETTER MU
0x00e7: 0x03bd, # GREEK SMALL LETTER NU
0x00e8: 0x03be, # GREEK SMALL LETTER XI
0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00ea: 0x03c0, # GREEK SMALL LETTER PI
0x00eb: 0x03c1, # GREEK SMALL LETTER RHO
0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00ee: 0x03c4, # GREEK SMALL LETTER TAU
0x00ef: 0x0384, # GREEK TONOS
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00f3: 0x03c6, # GREEK SMALL LETTER PHI
0x00f4: 0x03c7, # GREEK SMALL LETTER CHI
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x03c8, # GREEK SMALL LETTER PSI
0x00f7: 0x0385, # GREEK DIALYTIKA TONOS
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

173
Lib/encodings/cp874.py Normal file
View File

@ -0,0 +1,173 @@
""" Python Character Mapping Codec generated from 'CP874.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED
0x0083: None, # UNDEFINED
0x0084: None, # UNDEFINED
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
0x0086: None, # UNDEFINED
0x0087: None, # UNDEFINED
0x0088: None, # UNDEFINED
0x0089: None, # UNDEFINED
0x008a: None, # UNDEFINED
0x008b: None, # UNDEFINED
0x008c: None, # UNDEFINED
0x008d: None, # UNDEFINED
0x008e: None, # UNDEFINED
0x008f: None, # UNDEFINED
0x0090: None, # UNDEFINED
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x0095: 0x2022, # BULLET
0x0096: 0x2013, # EN DASH
0x0097: 0x2014, # EM DASH
0x0098: None, # UNDEFINED
0x0099: None, # UNDEFINED
0x009a: None, # UNDEFINED
0x009b: None, # UNDEFINED
0x009c: None, # UNDEFINED
0x009d: None, # UNDEFINED
0x009e: None, # UNDEFINED
0x009f: None, # UNDEFINED
0x00a1: 0x0e01, # THAI CHARACTER KO KAI
0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI
0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT
0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI
0x00a5: 0x0e05, # THAI CHARACTER KHO KHON
0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG
0x00a7: 0x0e07, # THAI CHARACTER NGO NGU
0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN
0x00a9: 0x0e09, # THAI CHARACTER CHO CHING
0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG
0x00ab: 0x0e0b, # THAI CHARACTER SO SO
0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE
0x00ad: 0x0e0d, # THAI CHARACTER YO YING
0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA
0x00af: 0x0e0f, # THAI CHARACTER TO PATAK
0x00b0: 0x0e10, # THAI CHARACTER THO THAN
0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO
0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO
0x00b3: 0x0e13, # THAI CHARACTER NO NEN
0x00b4: 0x0e14, # THAI CHARACTER DO DEK
0x00b5: 0x0e15, # THAI CHARACTER TO TAO
0x00b6: 0x0e16, # THAI CHARACTER THO THUNG
0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN
0x00b8: 0x0e18, # THAI CHARACTER THO THONG
0x00b9: 0x0e19, # THAI CHARACTER NO NU
0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI
0x00bb: 0x0e1b, # THAI CHARACTER PO PLA
0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG
0x00bd: 0x0e1d, # THAI CHARACTER FO FA
0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN
0x00bf: 0x0e1f, # THAI CHARACTER FO FAN
0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO
0x00c1: 0x0e21, # THAI CHARACTER MO MA
0x00c2: 0x0e22, # THAI CHARACTER YO YAK
0x00c3: 0x0e23, # THAI CHARACTER RO RUA
0x00c4: 0x0e24, # THAI CHARACTER RU
0x00c5: 0x0e25, # THAI CHARACTER LO LING
0x00c6: 0x0e26, # THAI CHARACTER LU
0x00c7: 0x0e27, # THAI CHARACTER WO WAEN
0x00c8: 0x0e28, # THAI CHARACTER SO SALA
0x00c9: 0x0e29, # THAI CHARACTER SO RUSI
0x00ca: 0x0e2a, # THAI CHARACTER SO SUA
0x00cb: 0x0e2b, # THAI CHARACTER HO HIP
0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA
0x00cd: 0x0e2d, # THAI CHARACTER O ANG
0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK
0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI
0x00d0: 0x0e30, # THAI CHARACTER SARA A
0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT
0x00d2: 0x0e32, # THAI CHARACTER SARA AA
0x00d3: 0x0e33, # THAI CHARACTER SARA AM
0x00d4: 0x0e34, # THAI CHARACTER SARA I
0x00d5: 0x0e35, # THAI CHARACTER SARA II
0x00d6: 0x0e36, # THAI CHARACTER SARA UE
0x00d7: 0x0e37, # THAI CHARACTER SARA UEE
0x00d8: 0x0e38, # THAI CHARACTER SARA U
0x00d9: 0x0e39, # THAI CHARACTER SARA UU
0x00da: 0x0e3a, # THAI CHARACTER PHINTHU
0x00db: None, # UNDEFINED
0x00dc: None, # UNDEFINED
0x00dd: None, # UNDEFINED
0x00de: None, # UNDEFINED
0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT
0x00e0: 0x0e40, # THAI CHARACTER SARA E
0x00e1: 0x0e41, # THAI CHARACTER SARA AE
0x00e2: 0x0e42, # THAI CHARACTER SARA O
0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN
0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI
0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO
0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK
0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU
0x00e8: 0x0e48, # THAI CHARACTER MAI EK
0x00e9: 0x0e49, # THAI CHARACTER MAI THO
0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI
0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA
0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT
0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT
0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN
0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN
0x00f0: 0x0e50, # THAI DIGIT ZERO
0x00f1: 0x0e51, # THAI DIGIT ONE
0x00f2: 0x0e52, # THAI DIGIT TWO
0x00f3: 0x0e53, # THAI DIGIT THREE
0x00f4: 0x0e54, # THAI DIGIT FOUR
0x00f5: 0x0e55, # THAI DIGIT FIVE
0x00f6: 0x0e56, # THAI DIGIT SIX
0x00f7: 0x0e57, # THAI DIGIT SEVEN
0x00f8: 0x0e58, # THAI DIGIT EIGHT
0x00f9: 0x0e59, # THAI DIGIT NINE
0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU
0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT
0x00fc: None, # UNDEFINED
0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED
0x00ff: None, # UNDEFINED
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

283
Lib/encodings/cp875.py Normal file
View File

@ -0,0 +1,283 @@
""" Python Character Mapping Codec generated from 'CP875.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x0007: 0x007f, # DELETE
0x0008: 0x0097, # CONTROL
0x0009: 0x008d, # CONTROL
0x000a: 0x008e, # CONTROL
0x0014: 0x009d, # CONTROL
0x0015: 0x0085, # CONTROL
0x0016: 0x0008, # BACKSPACE
0x0017: 0x0087, # CONTROL
0x001a: 0x0092, # CONTROL
0x001b: 0x008f, # CONTROL
0x0020: 0x0080, # CONTROL
0x0021: 0x0081, # CONTROL
0x0022: 0x0082, # CONTROL
0x0023: 0x0083, # CONTROL
0x0024: 0x0084, # CONTROL
0x0025: 0x000a, # LINE FEED
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
0x0027: 0x001b, # ESCAPE
0x0028: 0x0088, # CONTROL
0x0029: 0x0089, # CONTROL
0x002a: 0x008a, # CONTROL
0x002b: 0x008b, # CONTROL
0x002c: 0x008c, # CONTROL
0x002d: 0x0005, # ENQUIRY
0x002e: 0x0006, # ACKNOWLEDGE
0x002f: 0x0007, # BELL
0x0030: 0x0090, # CONTROL
0x0031: 0x0091, # CONTROL
0x0032: 0x0016, # SYNCHRONOUS IDLE
0x0033: 0x0093, # CONTROL
0x0034: 0x0094, # CONTROL
0x0035: 0x0095, # CONTROL
0x0036: 0x0096, # CONTROL
0x0037: 0x0004, # END OF TRANSMISSION
0x0038: 0x0098, # CONTROL
0x0039: 0x0099, # CONTROL
0x003a: 0x009a, # CONTROL
0x003b: 0x009b, # CONTROL
0x003c: 0x0014, # DEVICE CONTROL FOUR
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
0x003e: 0x009e, # CONTROL
0x003f: 0x001a, # SUBSTITUTE
0x0040: 0x0020, # SPACE
0x0041: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x0042: 0x0392, # GREEK CAPITAL LETTER BETA
0x0043: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x0044: 0x0394, # GREEK CAPITAL LETTER DELTA
0x0045: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x0046: 0x0396, # GREEK CAPITAL LETTER ZETA
0x0047: 0x0397, # GREEK CAPITAL LETTER ETA
0x0048: 0x0398, # GREEK CAPITAL LETTER THETA
0x0049: 0x0399, # GREEK CAPITAL LETTER IOTA
0x004a: 0x005b, # LEFT SQUARE BRACKET
0x004b: 0x002e, # FULL STOP
0x004c: 0x003c, # LESS-THAN SIGN
0x004d: 0x0028, # LEFT PARENTHESIS
0x004e: 0x002b, # PLUS SIGN
0x004f: 0x0021, # EXCLAMATION MARK
0x0050: 0x0026, # AMPERSAND
0x0051: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x0052: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x0053: 0x039c, # GREEK CAPITAL LETTER MU
0x0054: 0x039d, # GREEK CAPITAL LETTER NU
0x0055: 0x039e, # GREEK CAPITAL LETTER XI
0x0056: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x0057: 0x03a0, # GREEK CAPITAL LETTER PI
0x0058: 0x03a1, # GREEK CAPITAL LETTER RHO
0x0059: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x005a: 0x005d, # RIGHT SQUARE BRACKET
0x005b: 0x0024, # DOLLAR SIGN
0x005c: 0x002a, # ASTERISK
0x005d: 0x0029, # RIGHT PARENTHESIS
0x005e: 0x003b, # SEMICOLON
0x005f: 0x005e, # CIRCUMFLEX ACCENT
0x0060: 0x002d, # HYPHEN-MINUS
0x0061: 0x002f, # SOLIDUS
0x0062: 0x03a4, # GREEK CAPITAL LETTER TAU
0x0063: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x0064: 0x03a6, # GREEK CAPITAL LETTER PHI
0x0065: 0x03a7, # GREEK CAPITAL LETTER CHI
0x0066: 0x03a8, # GREEK CAPITAL LETTER PSI
0x0067: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x0068: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x0069: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x006a: 0x007c, # VERTICAL LINE
0x006b: 0x002c, # COMMA
0x006c: 0x0025, # PERCENT SIGN
0x006d: 0x005f, # LOW LINE
0x006e: 0x003e, # GREATER-THAN SIGN
0x006f: 0x003f, # QUESTION MARK
0x0070: 0x00a8, # DIAERESIS
0x0071: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x0072: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x0073: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x0074: 0x00a0, # NO-BREAK SPACE
0x0075: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x0076: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x0077: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x0078: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x0079: 0x0060, # GRAVE ACCENT
0x007a: 0x003a, # COLON
0x007b: 0x0023, # NUMBER SIGN
0x007c: 0x0040, # COMMERCIAL AT
0x007d: 0x0027, # APOSTROPHE
0x007e: 0x003d, # EQUALS SIGN
0x007f: 0x0022, # QUOTATION MARK
0x0080: 0x0385, # GREEK DIALYTIKA TONOS
0x0081: 0x0061, # LATIN SMALL LETTER A
0x0082: 0x0062, # LATIN SMALL LETTER B
0x0083: 0x0063, # LATIN SMALL LETTER C
0x0084: 0x0064, # LATIN SMALL LETTER D
0x0085: 0x0065, # LATIN SMALL LETTER E
0x0086: 0x0066, # LATIN SMALL LETTER F
0x0087: 0x0067, # LATIN SMALL LETTER G
0x0088: 0x0068, # LATIN SMALL LETTER H
0x0089: 0x0069, # LATIN SMALL LETTER I
0x008a: 0x03b1, # GREEK SMALL LETTER ALPHA
0x008b: 0x03b2, # GREEK SMALL LETTER BETA
0x008c: 0x03b3, # GREEK SMALL LETTER GAMMA
0x008d: 0x03b4, # GREEK SMALL LETTER DELTA
0x008e: 0x03b5, # GREEK SMALL LETTER EPSILON
0x008f: 0x03b6, # GREEK SMALL LETTER ZETA
0x0090: 0x00b0, # DEGREE SIGN
0x0091: 0x006a, # LATIN SMALL LETTER J
0x0092: 0x006b, # LATIN SMALL LETTER K
0x0093: 0x006c, # LATIN SMALL LETTER L
0x0094: 0x006d, # LATIN SMALL LETTER M
0x0095: 0x006e, # LATIN SMALL LETTER N
0x0096: 0x006f, # LATIN SMALL LETTER O
0x0097: 0x0070, # LATIN SMALL LETTER P
0x0098: 0x0071, # LATIN SMALL LETTER Q
0x0099: 0x0072, # LATIN SMALL LETTER R
0x009a: 0x03b7, # GREEK SMALL LETTER ETA
0x009b: 0x03b8, # GREEK SMALL LETTER THETA
0x009c: 0x03b9, # GREEK SMALL LETTER IOTA
0x009d: 0x03ba, # GREEK SMALL LETTER KAPPA
0x009e: 0x03bb, # GREEK SMALL LETTER LAMDA
0x009f: 0x03bc, # GREEK SMALL LETTER MU
0x00a0: 0x00b4, # ACUTE ACCENT
0x00a1: 0x007e, # TILDE
0x00a2: 0x0073, # LATIN SMALL LETTER S
0x00a3: 0x0074, # LATIN SMALL LETTER T
0x00a4: 0x0075, # LATIN SMALL LETTER U
0x00a5: 0x0076, # LATIN SMALL LETTER V
0x00a6: 0x0077, # LATIN SMALL LETTER W
0x00a7: 0x0078, # LATIN SMALL LETTER X
0x00a8: 0x0079, # LATIN SMALL LETTER Y
0x00a9: 0x007a, # LATIN SMALL LETTER Z
0x00aa: 0x03bd, # GREEK SMALL LETTER NU
0x00ab: 0x03be, # GREEK SMALL LETTER XI
0x00ac: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00ad: 0x03c0, # GREEK SMALL LETTER PI
0x00ae: 0x03c1, # GREEK SMALL LETTER RHO
0x00af: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00b0: 0x00a3, # POUND SIGN
0x00b1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00b2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00b3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00b4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00b5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00b6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00b7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00b8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00b9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ba: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00bb: 0x03c4, # GREEK SMALL LETTER TAU
0x00bc: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00bd: 0x03c6, # GREEK SMALL LETTER PHI
0x00be: 0x03c7, # GREEK SMALL LETTER CHI
0x00bf: 0x03c8, # GREEK SMALL LETTER PSI
0x00c0: 0x007b, # LEFT CURLY BRACKET
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
0x00ca: 0x00ad, # SOFT HYPHEN
0x00cb: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00cc: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00cd: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00ce: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00cf: 0x2015, # HORIZONTAL BAR
0x00d0: 0x007d, # RIGHT CURLY BRACKET
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
0x00da: 0x00b1, # PLUS-MINUS SIGN
0x00db: 0x00bd, # VULGAR FRACTION ONE HALF
0x00dc: 0x001a, # SUBSTITUTE
0x00dd: 0x0387, # GREEK ANO TELEIA
0x00de: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00df: 0x00a6, # BROKEN BAR
0x00e0: 0x005c, # REVERSE SOLIDUS
0x00e1: 0x001a, # SUBSTITUTE
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
0x00ea: 0x00b2, # SUPERSCRIPT TWO
0x00eb: 0x00a7, # SECTION SIGN
0x00ec: 0x001a, # SUBSTITUTE
0x00ed: 0x001a, # SUBSTITUTE
0x00ee: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ef: 0x00ac, # NOT SIGN
0x00f0: 0x0030, # DIGIT ZERO
0x00f1: 0x0031, # DIGIT ONE
0x00f2: 0x0032, # DIGIT TWO
0x00f3: 0x0033, # DIGIT THREE
0x00f4: 0x0034, # DIGIT FOUR
0x00f5: 0x0035, # DIGIT FIVE
0x00f6: 0x0036, # DIGIT SIX
0x00f7: 0x0037, # DIGIT SEVEN
0x00f8: 0x0038, # DIGIT EIGHT
0x00f9: 0x0039, # DIGIT NINE
0x00fa: 0x00b3, # SUPERSCRIPT THREE
0x00fb: 0x00a9, # COPYRIGHT SIGN
0x00fc: 0x001a, # SUBSTITUTE
0x00fd: 0x001a, # SUBSTITUTE
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ff: 0x009f, # CONTROL
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,49 @@
""" Python Character Mapping Codec generated from '8859-1.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
def __init__(self,stream,errors='strict'):
codecs.StreamWriter.__init__(self,strict,errors)
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,92 @@
""" Python Character Mapping Codec generated from '8859-10.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE
0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE
0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00af: 0x014a, # LATIN CAPITAL LETTER ENG
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE
0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE
0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00bd: 0x2015, # HORIZONTAL BAR
0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00bf: 0x014b, # LATIN SMALL LETTER ENG
0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE
0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

102
Lib/encodings/iso8859_13.py Normal file
View File

@ -0,0 +1,102 @@
""" Python Character Mapping Codec generated from '8859-13.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x00af: 0x00c6, # LATIN CAPITAL LETTER AE
0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x00bf: 0x00e6, # LATIN SMALL LETTER AE
0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,77 @@
""" Python Character Mapping Codec generated from '8859-14.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE
0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE
0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE
0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE
0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE
0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE
0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE
0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE
0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE
0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE
0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE
0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE
0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE
0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE
0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE
0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS
0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS
0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE
0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE
0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,54 @@
""" Python Character Mapping Codec generated from '8859-15.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a4: 0x20ac, # EURO SIGN
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

103
Lib/encodings/iso8859_2.py Normal file
View File

@ -0,0 +1,103 @@
""" Python Character Mapping Codec generated from '8859-2.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x02d8, # BREVE
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00b2: 0x02db, # OGONEK
0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON
0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x00b7: 0x02c7, # CARON
0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON
0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON
0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON
0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,74 @@
""" Python Character Mapping Codec generated from '8859-3.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
0x00a2: 0x02d8, # BREVE
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
0x00ff: 0x02d9, # DOT ABOVE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,96 @@
""" Python Character Mapping Codec generated from '8859-4.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE
0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE
0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00b2: 0x02db, # OGONEK
0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE
0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00b7: 0x02c7, # CARON
0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE
0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG
0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00bf: 0x014b, # LATIN SMALL LETTER ENG
0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE
0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00ff: 0x02d9, # DOT ABOVE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

140
Lib/encodings/iso8859_5.py Normal file
View File

@ -0,0 +1,140 @@
""" Python Character Mapping Codec generated from '8859-5.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE
0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE
0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE
0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE
0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE
0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A
0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O
0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U
0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E
0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00d0: 0x0430, # CYRILLIC SMALL LETTER A
0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE
0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE
0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00d8: 0x0438, # CYRILLIC SMALL LETTER I
0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00da: 0x043a, # CYRILLIC SMALL LETTER KA
0x00db: 0x043b, # CYRILLIC SMALL LETTER EL
0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM
0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN
0x00de: 0x043e, # CYRILLIC SMALL LETTER O
0x00df: 0x043f, # CYRILLIC SMALL LETTER PE
0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
0x00f0: 0x2116, # NUMERO SIGN
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE
0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE
0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE
0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI
0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE
0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE
0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE
0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE
0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE
0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,94 @@
""" Python Character Mapping Codec generated from '8859-6.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00ac: 0x060c, # ARABIC COMMA
0x00bb: 0x061b, # ARABIC SEMICOLON
0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c1: 0x0621, # ARABIC LETTER HAMZA
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
0x00c7: 0x0627, # ARABIC LETTER ALEF
0x00c8: 0x0628, # ARABIC LETTER BEH
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
0x00ca: 0x062a, # ARABIC LETTER TEH
0x00cb: 0x062b, # ARABIC LETTER THEH
0x00cc: 0x062c, # ARABIC LETTER JEEM
0x00cd: 0x062d, # ARABIC LETTER HAH
0x00ce: 0x062e, # ARABIC LETTER KHAH
0x00cf: 0x062f, # ARABIC LETTER DAL
0x00d0: 0x0630, # ARABIC LETTER THAL
0x00d1: 0x0631, # ARABIC LETTER REH
0x00d2: 0x0632, # ARABIC LETTER ZAIN
0x00d3: 0x0633, # ARABIC LETTER SEEN
0x00d4: 0x0634, # ARABIC LETTER SHEEN
0x00d5: 0x0635, # ARABIC LETTER SAD
0x00d6: 0x0636, # ARABIC LETTER DAD
0x00d7: 0x0637, # ARABIC LETTER TAH
0x00d8: 0x0638, # ARABIC LETTER ZAH
0x00d9: 0x0639, # ARABIC LETTER AIN
0x00da: 0x063a, # ARABIC LETTER GHAIN
0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0x0641, # ARABIC LETTER FEH
0x00e2: 0x0642, # ARABIC LETTER QAF
0x00e3: 0x0643, # ARABIC LETTER KAF
0x00e4: 0x0644, # ARABIC LETTER LAM
0x00e5: 0x0645, # ARABIC LETTER MEEM
0x00e6: 0x0646, # ARABIC LETTER NOON
0x00e7: 0x0647, # ARABIC LETTER HEH
0x00e8: 0x0648, # ARABIC LETTER WAW
0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA
0x00ea: 0x064a, # ARABIC LETTER YEH
0x00eb: 0x064b, # ARABIC FATHATAN
0x00ec: 0x064c, # ARABIC DAMMATAN
0x00ed: 0x064d, # ARABIC KASRATAN
0x00ee: 0x064e, # ARABIC FATHA
0x00ef: 0x064f, # ARABIC DAMMA
0x00f0: 0x0650, # ARABIC KASRA
0x00f1: 0x0651, # ARABIC SHADDA
0x00f2: 0x0652, # ARABIC SUKUN
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

120
Lib/encodings/iso8859_7.py Normal file
View File

@ -0,0 +1,120 @@
""" Python Character Mapping Codec generated from '8859-7.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00af: 0x2015, # HORIZONTAL BAR
0x00b4: 0x0384, # GREEK TONOS
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA
0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA
0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA
0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA
0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA
0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA
0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x00cc: 0x039c, # GREEK CAPITAL LETTER MU
0x00cd: 0x039d, # GREEK CAPITAL LETTER NU
0x00ce: 0x039e, # GREEK CAPITAL LETTER XI
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI
0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI
0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA
0x00e7: 0x03b7, # GREEK SMALL LETTER ETA
0x00e8: 0x03b8, # GREEK SMALL LETTER THETA
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00ec: 0x03bc, # GREEK SMALL LETTER MU
0x00ed: 0x03bd, # GREEK SMALL LETTER NU
0x00ee: 0x03be, # GREEK SMALL LETTER XI
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
0x00f1: 0x03c1, # GREEK SMALL LETTER RHO
0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00f6: 0x03c6, # GREEK SMALL LETTER PHI
0x00f7: 0x03c7, # GREEK SMALL LETTER CHI
0x00f8: 0x03c8, # GREEK SMALL LETTER PSI
0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,77 @@
""" Python Character Mapping Codec generated from '8859-8.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00aa: 0x00d7, # MULTIPLICATION SIGN
0x00af: 0x203e, # OVERLINE
0x00ba: 0x00f7, # DIVISION SIGN
0x00df: 0x2017, # DOUBLE LOW LINE
0x00e0: 0x05d0, # HEBREW LETTER ALEF
0x00e1: 0x05d1, # HEBREW LETTER BET
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
0x00e3: 0x05d3, # HEBREW LETTER DALET
0x00e4: 0x05d4, # HEBREW LETTER HE
0x00e5: 0x05d5, # HEBREW LETTER VAV
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
0x00e7: 0x05d7, # HEBREW LETTER HET
0x00e8: 0x05d8, # HEBREW LETTER TET
0x00e9: 0x05d9, # HEBREW LETTER YOD
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
0x00eb: 0x05db, # HEBREW LETTER KAF
0x00ec: 0x05dc, # HEBREW LETTER LAMED
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
0x00ee: 0x05de, # HEBREW LETTER MEM
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
0x00f0: 0x05e0, # HEBREW LETTER NUN
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
0x00f2: 0x05e2, # HEBREW LETTER AYIN
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
0x00f4: 0x05e4, # HEBREW LETTER PE
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
0x00f6: 0x05e6, # HEBREW LETTER TSADI
0x00f7: 0x05e7, # HEBREW LETTER QOF
0x00f8: 0x05e8, # HEBREW LETTER RESH
0x00f9: 0x05e9, # HEBREW LETTER SHIN
0x00fa: 0x05ea, # HEBREW LETTER TAV
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,52 @@
""" Python Character Mapping Codec generated from '8859-9.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

174
Lib/encodings/koi8_r.py Normal file
View File

@ -0,0 +1,174 @@
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x008b: 0x2580, # UPPER HALF BLOCK
0x008c: 0x2584, # LOWER HALF BLOCK
0x008d: 0x2588, # FULL BLOCK
0x008e: 0x258c, # LEFT HALF BLOCK
0x008f: 0x2590, # RIGHT HALF BLOCK
0x0090: 0x2591, # LIGHT SHADE
0x0091: 0x2592, # MEDIUM SHADE
0x0092: 0x2593, # DARK SHADE
0x0093: 0x2320, # TOP HALF INTEGRAL
0x0094: 0x25a0, # BLACK SQUARE
0x0095: 0x2219, # BULLET OPERATOR
0x0096: 0x221a, # SQUARE ROOT
0x0097: 0x2248, # ALMOST EQUAL TO
0x0098: 0x2264, # LESS-THAN OR EQUAL TO
0x0099: 0x2265, # GREATER-THAN OR EQUAL TO
0x009a: 0x00a0, # NO-BREAK SPACE
0x009b: 0x2321, # BOTTOM HALF INTEGRAL
0x009c: 0x00b0, # DEGREE SIGN
0x009d: 0x00b2, # SUPERSCRIPT TWO
0x009e: 0x00b7, # MIDDLE DOT
0x009f: 0x00f7, # DIVISION SIGN
0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO
0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00bf: 0x00a9, # COPYRIGHT SIGN
0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU
0x00c1: 0x0430, # CYRILLIC SMALL LETTER A
0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE
0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF
0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA
0x00c9: 0x0438, # CYRILLIC SMALL LETTER I
0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA
0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL
0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM
0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN
0x00cf: 0x043e, # CYRILLIC SMALL LETTER O
0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE
0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA
0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER
0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES
0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE
0x00d5: 0x0443, # CYRILLIC SMALL LETTER U
0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE
0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00dc: 0x044d, # CYRILLIC SMALL LETTER E
0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A
0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I
0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O
0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U
0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

35
Lib/encodings/latin_1.py Normal file
View File

@ -0,0 +1,35 @@
""" Python 'latin-1' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.latin_1_encode
decode = codecs.latin_1_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
class StreamConverter(StreamWriter,StreamReader):
encode = codecs.latin_1_decode
decode = codecs.latin_1_encode
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -0,0 +1,169 @@
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00a0: 0x2020, # DAGGER
0x00a1: 0x00b0, # DEGREE SIGN
0x00a4: 0x00a7, # SECTION SIGN
0x00a5: 0x2022, # BULLET
0x00a6: 0x00b6, # PILCROW SIGN
0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x2122, # TRADE MARK SIGN
0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE
0x00b0: 0x221e, # INFINITY
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE
0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI
0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE
0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE
0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE
0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE
0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE
0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x221a, # SQUARE ROOT
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00c5: 0x2248, # ALMOST EQUAL TO
0x00c6: 0x2206, # INCREMENT
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE
0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE
0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE
0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2014, # EM DASH
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE
0x00dc: 0x2116, # NUMERO SIGN
0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00de: 0x0451, # CYRILLIC SMALL LETTER IO
0x00df: 0x044f, # CYRILLIC SMALL LETTER YA
0x00e0: 0x0430, # CYRILLIC SMALL LETTER A
0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE
0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE
0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00e8: 0x0438, # CYRILLIC SMALL LETTER I
0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA
0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL
0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM
0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN
0x00ee: 0x043e, # CYRILLIC SMALL LETTER O
0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE
0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER
0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES
0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE
0x00f3: 0x0443, # CYRILLIC SMALL LETTER U
0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF
0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x00a4, # CURRENCY SIGN
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

172
Lib/encodings/mac_greek.py Normal file
View File

@ -0,0 +1,172 @@
""" Python Character Mapping Codec generated from 'GREEK.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00b9, # SUPERSCRIPT ONE
0x0082: 0x00b2, # SUPERSCRIPT TWO
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0084: 0x00b3, # SUPERSCRIPT THREE
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0087: 0x0385, # GREEK DIALYTIKA TONOS
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x008b: 0x0384, # GREEK TONOS
0x008c: 0x00a8, # DIAERESIS
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0092: 0x00a3, # POUND SIGN
0x0093: 0x2122, # TRADE MARK SIGN
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0096: 0x2022, # BULLET
0x0097: 0x00bd, # VULGAR FRACTION ONE HALF
0x0098: 0x2030, # PER MILLE SIGN
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x009b: 0x00a6, # BROKEN BAR
0x009c: 0x00ad, # SOFT HYPHEN
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00a0: 0x2020, # DAGGER
0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA
0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA
0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA
0x00a5: 0x039e, # GREEK CAPITAL LETTER XI
0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00ac: 0x00a7, # SECTION SIGN
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x00b0, # DEGREE SIGN
0x00af: 0x0387, # GREEK ANO TELEIA
0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x00a5, # YEN SIGN
0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA
0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA
0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA
0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA
0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x00bb: 0x039c, # GREEK CAPITAL LETTER MU
0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI
0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00c1: 0x039d, # GREEK CAPITAL LETTER NU
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00c5: 0x2248, # ALMOST EQUAL TO
0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI
0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2015, # HORIZONTAL BAR
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
0x00e3: 0x03c8, # GREEK SMALL LETTER PSI
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00e6: 0x03c6, # GREEK SMALL LETTER PHI
0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00e8: 0x03b7, # GREEK SMALL LETTER ETA
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
0x00ea: 0x03be, # GREEK SMALL LETTER XI
0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA
0x00ed: 0x03bc, # GREEK SMALL LETTER MU
0x00ee: 0x03bd, # GREEK SMALL LETTER NU
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00f2: 0x03c1, # GREEK SMALL LETTER RHO
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
0x00f5: 0x03b8, # GREEK SMALL LETTER THETA
0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00f8: 0x03c7, # GREEK SMALL LETTER CHI
0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA
0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00ff: None, # UNDEFINED
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,168 @@
""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00a1: 0x00b0, # DEGREE SIGN
0x00a4: 0x00a7, # SECTION SIGN
0x00a5: 0x2022, # BULLET
0x00a6: 0x00b6, # PILCROW SIGN
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x2122, # TRADE MARK SIGN
0x00ab: 0x00b4, # ACUTE ACCENT
0x00ac: 0x00a8, # DIAERESIS
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00b0: 0x221e, # INFINITY
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x00a5, # YEN SIGN
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
0x00b7: 0x2211, # N-ARY SUMMATION
0x00b8: 0x220f, # N-ARY PRODUCT
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
0x00ba: 0x222b, # INTEGRAL
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00bd: 0x2126, # OHM SIGN
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00c0: 0x00bf, # INVERTED QUESTION MARK
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x221a, # SQUARE ROOT
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00c5: 0x2248, # ALMOST EQUAL TO
0x00c6: 0x2206, # INCREMENT
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2014, # EM DASH
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x25ca, # LOZENGE
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00da: 0x2044, # FRACTION SLASH
0x00db: 0x00a4, # CURRENCY SIGN
0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH
0x00dd: 0x00f0, # LATIN SMALL LETTER ETH
0x00df: 0x00fe, # LATIN SMALL LETTER THORN
0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00e1: 0x00b7, # MIDDLE DOT
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00e4: 0x2030, # PER MILLE SIGN
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00f0: None, # UNDEFINED
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x00f7: 0x02dc, # SMALL TILDE
0x00f8: 0x00af, # MACRON
0x00f9: 0x02d8, # BREVE
0x00fa: 0x02d9, # DOT ABOVE
0x00fb: 0x02da, # RING ABOVE
0x00fc: 0x00b8, # CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

172
Lib/encodings/mac_latin2.py Normal file
View File

@ -0,0 +1,172 @@
""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON
0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00a0: 0x2020, # DAGGER
0x00a1: 0x00b0, # DEGREE SIGN
0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00a4: 0x00a7, # SECTION SIGN
0x00a5: 0x2022, # BULLET
0x00a6: 0x00b6, # PILCROW SIGN
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x2122, # TRADE MARK SIGN
0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00ac: 0x00a8, # DIAERESIS
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
0x00b7: 0x2211, # N-ARY SUMMATION
0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON
0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x221a, # SQUARE ROOT
0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
0x00c6: 0x2206, # INCREMENT
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON
0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2014, # EM DASH
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x25ca, # LOZENGE
0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00ff: 0x02c7, # CARON
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

169
Lib/encodings/mac_roman.py Normal file
View File

@ -0,0 +1,169 @@
""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00a0: 0x2020, # DAGGER
0x00a1: 0x00b0, # DEGREE SIGN
0x00a4: 0x00a7, # SECTION SIGN
0x00a5: 0x2022, # BULLET
0x00a6: 0x00b6, # PILCROW SIGN
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x2122, # TRADE MARK SIGN
0x00ab: 0x00b4, # ACUTE ACCENT
0x00ac: 0x00a8, # DIAERESIS
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00b0: 0x221e, # INFINITY
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x00a5, # YEN SIGN
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
0x00b7: 0x2211, # N-ARY SUMMATION
0x00b8: 0x220f, # N-ARY PRODUCT
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
0x00ba: 0x222b, # INTEGRAL
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00bd: 0x2126, # OHM SIGN
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00c0: 0x00bf, # INVERTED QUESTION MARK
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x221a, # SQUARE ROOT
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00c5: 0x2248, # ALMOST EQUAL TO
0x00c6: 0x2206, # INCREMENT
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2014, # EM DASH
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x25ca, # LOZENGE
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00da: 0x2044, # FRACTION SLASH
0x00db: 0x00a4, # CURRENCY SIGN
0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
0x00de: 0xfb01, # LATIN SMALL LIGATURE FI
0x00df: 0xfb02, # LATIN SMALL LIGATURE FL
0x00e0: 0x2021, # DOUBLE DAGGER
0x00e1: 0x00b7, # MIDDLE DOT
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00e4: 0x2030, # PER MILLE SIGN
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00f0: None, # UNDEFINED
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x00f7: 0x02dc, # SMALL TILDE
0x00f8: 0x00af, # MACRON
0x00f9: 0x02d8, # BREVE
0x00fa: 0x02d9, # DOT ABOVE
0x00fb: 0x02da, # RING ABOVE
0x00fc: 0x00b8, # CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,169 @@
""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = {
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00a0: 0x2020, # DAGGER
0x00a1: 0x00b0, # DEGREE SIGN
0x00a4: 0x00a7, # SECTION SIGN
0x00a5: 0x2022, # BULLET
0x00a6: 0x00b6, # PILCROW SIGN
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
0x00a8: 0x00ae, # REGISTERED SIGN
0x00aa: 0x2122, # TRADE MARK SIGN
0x00ab: 0x00b4, # ACUTE ACCENT
0x00ac: 0x00a8, # DIAERESIS
0x00ad: 0x2260, # NOT EQUAL TO
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00b0: 0x221e, # INFINITY
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
0x00b4: 0x00a5, # YEN SIGN
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
0x00b7: 0x2211, # N-ARY SUMMATION
0x00b8: 0x220f, # N-ARY PRODUCT
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
0x00ba: 0x222b, # INTEGRAL
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00bd: 0x2126, # OHM SIGN
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00c0: 0x00bf, # INVERTED QUESTION MARK
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
0x00c2: 0x00ac, # NOT SIGN
0x00c3: 0x221a, # SQUARE ROOT
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00c5: 0x2248, # ALMOST EQUAL TO
0x00c6: 0x2206, # INCREMENT
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
0x00ca: 0x00a0, # NO-BREAK SPACE
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
0x00d0: 0x2013, # EN DASH
0x00d1: 0x2014, # EM DASH
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00d6: 0x00f7, # DIVISION SIGN
0x00d7: 0x25ca, # LOZENGE
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00e0: 0x2021, # DOUBLE DAGGER
0x00e1: 0x00b7, # MIDDLE DOT
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00e4: 0x2030, # PER MILLE SIGN
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00f0: None, # UNDEFINED
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00f5: None, # UNDEFINED
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x00f7: 0x02dc, # SMALL TILDE
0x00f8: 0x00af, # MACRON
0x00f9: 0x02d8, # BREVE
0x00fa: 0x02d9, # DOT ABOVE
0x00fb: 0x02da, # RING ABOVE
0x00fc: 0x00b8, # CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
}
### Encoding Map
encoding_map = {}
for k,v in decoding_map.items():
encoding_map[v] = k

View File

@ -0,0 +1,30 @@
""" Python 'raw-unicode-escape' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.raw_unicode_escape_encode
decode = codecs.raw_unicode_escape_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -0,0 +1,30 @@
""" Python 'unicode-escape' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.unicode_escape_encode
decode = codecs.unicode_escape_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -0,0 +1,30 @@
""" Python 'unicode-internal' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.unicode_internal_encode
decode = codecs.unicode_internal_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

31
Lib/encodings/utf_16.py Normal file
View File

@ -0,0 +1,31 @@
""" Python 'utf-16' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.utf_16_encode
decode = codecs.utf_16_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -0,0 +1,31 @@
""" Python 'utf-16-be' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.utf_16_be_encode
decode = codecs.utf_16_be_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -0,0 +1,31 @@
""" Python 'utf-16-le' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.utf_16_le_encode
decode = codecs.utf_16_le_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

31
Lib/encodings/utf_8.py Normal file
View File

@ -0,0 +1,31 @@
""" Python 'utf-8' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.utf_8_encode
decode = codecs.utf_8_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)