Marc-Andre Lemburg: Unicode encodings.
This commit is contained in:
parent
21288edad0
commit
0229bf6001
|
@ -0,0 +1,78 @@
|
|||
""" Standard "encodings" Package
|
||||
|
||||
Standard Python encoding modules are stored in this package
|
||||
directory.
|
||||
|
||||
Codec modules must have names corresponding to standard lower-case
|
||||
encoding names. Hyphens are automatically converted to
|
||||
underscores, e.g. 'utf-8' is looked up as module utf_8.
|
||||
|
||||
Each codec module must export the following interface:
|
||||
|
||||
* getregentry() -> (encoder, decoder, stream_reader, stream_writer)
|
||||
The getregentry() API must return callable objects which adhere to
|
||||
the Python Codec Interface Standard.
|
||||
|
||||
In addition, a module may optionally also define the following
|
||||
APIs which are then used by the package's codec search function:
|
||||
|
||||
* getaliases() -> sequence of encoding name strings to use as aliases
|
||||
|
||||
Alias names returned by getaliases() must be lower-case.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import string,codecs,aliases
|
||||
|
||||
_cache = {}
|
||||
_unkown = '--unkown--'
|
||||
|
||||
def search_function(encoding):
|
||||
|
||||
# Cache lookup
|
||||
entry = _cache.get(encoding,_unkown)
|
||||
if entry is not _unkown:
|
||||
return entry
|
||||
|
||||
# Import the module
|
||||
modname = string.replace(encoding,'-','_')
|
||||
modname = aliases.aliases.get(modname,modname)
|
||||
try:
|
||||
mod = __import__(modname,globals(),locals(),'*')
|
||||
except ImportError,why:
|
||||
_cache[encoding] = None
|
||||
return None
|
||||
|
||||
# Now ask the module for the registry entry
|
||||
try:
|
||||
entry = tuple(mod.getregentry())
|
||||
except AttributeError:
|
||||
entry = ()
|
||||
if len(entry) != 4:
|
||||
raise SystemError,\
|
||||
'module "%s.%s" failed to register' % \
|
||||
(__name__,modname)
|
||||
for obj in entry:
|
||||
if not callable(obj):
|
||||
raise SystemError,\
|
||||
'incompatible codecs in module "%s.%s"' % \
|
||||
(__name__,modname)
|
||||
|
||||
# Cache the encoding and its aliases
|
||||
_cache[encoding] = entry
|
||||
try:
|
||||
codecaliases = mod.getaliases()
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
for alias in codecaliases:
|
||||
_cache[alias] = entry
|
||||
return entry
|
||||
|
||||
# Register the search_function in the Python codec registry
|
||||
codecs.register(search_function)
|
|
@ -0,0 +1,57 @@
|
|||
""" Encoding Aliases Support
|
||||
|
||||
This module is used by the encodings package search function to
|
||||
map encodings names to module names.
|
||||
|
||||
Note that the search function converts the encoding names to lower
|
||||
case and replaces hyphens with underscores *before* performing the
|
||||
lookup.
|
||||
|
||||
"""
|
||||
aliases = {
|
||||
|
||||
# Latin-1
|
||||
'latin': 'latin_1',
|
||||
'latin1': 'latin_1',
|
||||
|
||||
# UTF-8
|
||||
'utf': 'utf_8',
|
||||
'utf8': 'utf_8',
|
||||
'u8': 'utf_8',
|
||||
|
||||
# UTF-16
|
||||
'utf16': 'utf_16',
|
||||
'u16': 'utf_16',
|
||||
'utf_16be': 'utf_16_be',
|
||||
'utf_16le': 'utf_16_le',
|
||||
'UnicodeBigUnmarked': 'utf_16_be',
|
||||
'UnicodeLittleUnmarked': 'utf_16_le',
|
||||
|
||||
# ASCII
|
||||
'us_ascii': 'ascii',
|
||||
|
||||
# ISO
|
||||
'iso8859_1': 'latin_1',
|
||||
'iso_8859_1': 'latin_1',
|
||||
'iso_8859_10': 'iso8859_10',
|
||||
'iso_8859_13': 'iso8859_13',
|
||||
'iso_8859_14': 'iso8859_14',
|
||||
'iso_8859_15': 'iso8859_15',
|
||||
'iso_8859_2': 'iso8859_2',
|
||||
'iso_8859_3': 'iso8859_3',
|
||||
'iso_8859_4': 'iso8859_4',
|
||||
'iso_8859_5': 'iso8859_5',
|
||||
'iso_8859_6': 'iso8859_6',
|
||||
'iso_8859_7': 'iso8859_7',
|
||||
'iso_8859_8': 'iso8859_8',
|
||||
'iso_8859_9': 'iso8859_9',
|
||||
|
||||
# Mac
|
||||
'MacCentralEurope': 'mac_latin2',
|
||||
'MacCyrillic': 'mac_cyrillic',
|
||||
'MacGreek': 'mac_greek',
|
||||
'MacIceland': 'mac_iceland',
|
||||
'MacRoman': 'mac_roman',
|
||||
'MacTurkish': 'mac_turkish',
|
||||
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
""" Python 'ascii' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.ascii_encode
|
||||
decode = codecs.ascii_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
class StreamConverter(StreamWriter,StreamReader):
|
||||
|
||||
encode = codecs.ascii_decode
|
||||
decode = codecs.ascii_encode
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
|
@ -0,0 +1,51 @@
|
|||
""" Generic Python Character Mapping Codec.
|
||||
|
||||
Use this codec directly rather than through the automatic
|
||||
conversion mechanisms supplied by unicode() and .encode().
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.charmap_encode
|
||||
decode = codecs.charmap_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
|
||||
def __init__(self,stream,errors='strict',mapping=None):
|
||||
|
||||
codecs.StreamWriter.__init__(self,strict,errors)
|
||||
self.mapping = mapping
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return Codec.encode(input,errors,self.mapping)
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
|
||||
def __init__(self,stream,errors='strict',mapping=None):
|
||||
|
||||
codecs.StreamReader.__init__(self,strict,errors)
|
||||
self.mapping = mapping
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return Codec.decode(input,errors,self.mapping)
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||
|
|
@ -0,0 +1,282 @@
|
|||
""" Python Character Mapping Codec generated from 'CP037.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
0x0007: 0x007f, # DELETE
|
||||
0x0008: 0x0097, # CONTROL
|
||||
0x0009: 0x008d, # CONTROL
|
||||
0x000a: 0x008e, # CONTROL
|
||||
0x0014: 0x009d, # CONTROL
|
||||
0x0015: 0x0085, # CONTROL
|
||||
0x0016: 0x0008, # BACKSPACE
|
||||
0x0017: 0x0087, # CONTROL
|
||||
0x001a: 0x0092, # CONTROL
|
||||
0x001b: 0x008f, # CONTROL
|
||||
0x0020: 0x0080, # CONTROL
|
||||
0x0021: 0x0081, # CONTROL
|
||||
0x0022: 0x0082, # CONTROL
|
||||
0x0023: 0x0083, # CONTROL
|
||||
0x0024: 0x0084, # CONTROL
|
||||
0x0025: 0x000a, # LINE FEED
|
||||
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0027: 0x001b, # ESCAPE
|
||||
0x0028: 0x0088, # CONTROL
|
||||
0x0029: 0x0089, # CONTROL
|
||||
0x002a: 0x008a, # CONTROL
|
||||
0x002b: 0x008b, # CONTROL
|
||||
0x002c: 0x008c, # CONTROL
|
||||
0x002d: 0x0005, # ENQUIRY
|
||||
0x002e: 0x0006, # ACKNOWLEDGE
|
||||
0x002f: 0x0007, # BELL
|
||||
0x0030: 0x0090, # CONTROL
|
||||
0x0031: 0x0091, # CONTROL
|
||||
0x0032: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0033: 0x0093, # CONTROL
|
||||
0x0034: 0x0094, # CONTROL
|
||||
0x0035: 0x0095, # CONTROL
|
||||
0x0036: 0x0096, # CONTROL
|
||||
0x0037: 0x0004, # END OF TRANSMISSION
|
||||
0x0038: 0x0098, # CONTROL
|
||||
0x0039: 0x0099, # CONTROL
|
||||
0x003a: 0x009a, # CONTROL
|
||||
0x003b: 0x009b, # CONTROL
|
||||
0x003c: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x003e: 0x009e, # CONTROL
|
||||
0x003f: 0x001a, # SUBSTITUTE
|
||||
0x0040: 0x0020, # SPACE
|
||||
0x0041: 0x00a0, # NO-BREAK SPACE
|
||||
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x004a: 0x00a2, # CENT SIGN
|
||||
0x004b: 0x002e, # FULL STOP
|
||||
0x004c: 0x003c, # LESS-THAN SIGN
|
||||
0x004d: 0x0028, # LEFT PARENTHESIS
|
||||
0x004e: 0x002b, # PLUS SIGN
|
||||
0x004f: 0x007c, # VERTICAL LINE
|
||||
0x0050: 0x0026, # AMPERSAND
|
||||
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x005a: 0x0021, # EXCLAMATION MARK
|
||||
0x005b: 0x0024, # DOLLAR SIGN
|
||||
0x005c: 0x002a, # ASTERISK
|
||||
0x005d: 0x0029, # RIGHT PARENTHESIS
|
||||
0x005e: 0x003b, # SEMICOLON
|
||||
0x005f: 0x00ac, # NOT SIGN
|
||||
0x0060: 0x002d, # HYPHEN-MINUS
|
||||
0x0061: 0x002f, # SOLIDUS
|
||||
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x006a: 0x00a6, # BROKEN BAR
|
||||
0x006b: 0x002c, # COMMA
|
||||
0x006c: 0x0025, # PERCENT SIGN
|
||||
0x006d: 0x005f, # LOW LINE
|
||||
0x006e: 0x003e, # GREATER-THAN SIGN
|
||||
0x006f: 0x003f, # QUESTION MARK
|
||||
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x0079: 0x0060, # GRAVE ACCENT
|
||||
0x007a: 0x003a, # COLON
|
||||
0x007b: 0x0023, # NUMBER SIGN
|
||||
0x007c: 0x0040, # COMMERCIAL AT
|
||||
0x007d: 0x0027, # APOSTROPHE
|
||||
0x007e: 0x003d, # EQUALS SIGN
|
||||
0x007f: 0x0022, # QUOTATION MARK
|
||||
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x0081: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0082: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0083: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0084: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0085: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0086: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0087: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0088: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0089: 0x0069, # LATIN SMALL LETTER I
|
||||
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x008f: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x0090: 0x00b0, # DEGREE SIGN
|
||||
0x0091: 0x006a, # LATIN SMALL LETTER J
|
||||
0x0092: 0x006b, # LATIN SMALL LETTER K
|
||||
0x0093: 0x006c, # LATIN SMALL LETTER L
|
||||
0x0094: 0x006d, # LATIN SMALL LETTER M
|
||||
0x0095: 0x006e, # LATIN SMALL LETTER N
|
||||
0x0096: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0097: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0098: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0099: 0x0072, # LATIN SMALL LETTER R
|
||||
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x009d: 0x00b8, # CEDILLA
|
||||
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x009f: 0x00a4, # CURRENCY SIGN
|
||||
0x00a0: 0x00b5, # MICRO SIGN
|
||||
0x00a1: 0x007e, # TILDE
|
||||
0x00a2: 0x0073, # LATIN SMALL LETTER S
|
||||
0x00a3: 0x0074, # LATIN SMALL LETTER T
|
||||
0x00a4: 0x0075, # LATIN SMALL LETTER U
|
||||
0x00a5: 0x0076, # LATIN SMALL LETTER V
|
||||
0x00a6: 0x0077, # LATIN SMALL LETTER W
|
||||
0x00a7: 0x0078, # LATIN SMALL LETTER X
|
||||
0x00a8: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x00a9: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ab: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00af: 0x00ae, # REGISTERED SIGN
|
||||
0x00b0: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x00b1: 0x00a3, # POUND SIGN
|
||||
0x00b2: 0x00a5, # YEN SIGN
|
||||
0x00b3: 0x00b7, # MIDDLE DOT
|
||||
0x00b4: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b5: 0x00a7, # SECTION SIGN
|
||||
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00ba: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x00bb: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x00bc: 0x00af, # MACRON
|
||||
0x00bd: 0x00a8, # DIAERESIS
|
||||
0x00be: 0x00b4, # ACUTE ACCENT
|
||||
0x00bf: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00c0: 0x007b, # LEFT CURLY BRACKET
|
||||
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x00ca: 0x00ad, # SOFT HYPHEN
|
||||
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00d0: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x00da: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00e0: 0x005c, # REVERSE SOLIDUS
|
||||
0x00e1: 0x00f7, # DIVISION SIGN
|
||||
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x00ea: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00f0: 0x0030, # DIGIT ZERO
|
||||
0x00f1: 0x0031, # DIGIT ONE
|
||||
0x00f2: 0x0032, # DIGIT TWO
|
||||
0x00f3: 0x0033, # DIGIT THREE
|
||||
0x00f4: 0x0034, # DIGIT FOUR
|
||||
0x00f5: 0x0035, # DIGIT FIVE
|
||||
0x00f6: 0x0036, # DIGIT SIX
|
||||
0x00f7: 0x0037, # DIGIT SEVEN
|
||||
0x00f8: 0x0038, # DIGIT EIGHT
|
||||
0x00f9: 0x0039, # DIGIT NINE
|
||||
0x00fa: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,140 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1006.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
0x00a4: 0x06f3, # EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
0x00a5: 0x06f4, # EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
0x00a6: 0x06f5, # EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
0x00a7: 0x06f6, # EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
0x00a8: 0x06f7, # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
0x00a9: 0x06f8, # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
0x00aa: 0x06f9, # EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
0x00ab: 0x060c, # ARABIC COMMA
|
||||
0x00ac: 0x061b, # ARABIC SEMICOLON
|
||||
0x00ae: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00af: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
|
||||
0x00b0: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
|
||||
0x00b1: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
|
||||
0x00b2: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
|
||||
0x00b3: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
|
||||
0x00b4: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
|
||||
0x00b5: 0xfb56, # ARABIC LETTER PEH ISOLATED FORM
|
||||
0x00b6: 0xfb58, # ARABIC LETTER PEH INITIAL FORM
|
||||
0x00b7: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
|
||||
0x00b8: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
|
||||
0x00b9: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
|
||||
0x00ba: 0xfb66, # ARABIC LETTER TTEH ISOLATED FORM
|
||||
0x00bb: 0xfb68, # ARABIC LETTER TTEH INITIAL FORM
|
||||
0x00bc: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
|
||||
0x00bd: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
|
||||
0x00be: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
|
||||
0x00bf: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
|
||||
0x00c0: 0xfb7a, # ARABIC LETTER TCHEH ISOLATED FORM
|
||||
0x00c1: 0xfb7c, # ARABIC LETTER TCHEH INITIAL FORM
|
||||
0x00c2: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
|
||||
0x00c3: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
|
||||
0x00c4: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
|
||||
0x00c5: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
|
||||
0x00c6: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
|
||||
0x00c7: 0xfb84, # ARABIC LETTER DAHAL ISOLATED FORMN
|
||||
0x00c8: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
|
||||
0x00c9: 0xfead, # ARABIC LETTER REH ISOLATED FORM
|
||||
0x00ca: 0xfb8c, # ARABIC LETTER RREH ISOLATED FORM
|
||||
0x00cb: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
|
||||
0x00cc: 0xfb8a, # ARABIC LETTER JEH ISOLATED FORM
|
||||
0x00cd: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
|
||||
0x00ce: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
|
||||
0x00cf: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
|
||||
0x00d0: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
|
||||
0x00d1: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
|
||||
0x00d2: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
|
||||
0x00d3: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
|
||||
0x00d4: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
|
||||
0x00d5: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
|
||||
0x00d6: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
|
||||
0x00d7: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
|
||||
0x00d8: 0xfeca, # ARABIC LETTER AIN FINAL FORM
|
||||
0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
|
||||
0x00da: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
|
||||
0x00db: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
|
||||
0x00dc: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
|
||||
0x00dd: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
|
||||
0x00de: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
|
||||
0x00df: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
|
||||
0x00e0: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
|
||||
0x00e1: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
|
||||
0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
|
||||
0x00e3: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
|
||||
0x00e4: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
|
||||
0x00e5: 0xfb92, # ARABIC LETTER GAF ISOLATED FORM
|
||||
0x00e6: 0xfb94, # ARABIC LETTER GAF INITIAL FORM
|
||||
0x00e7: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
|
||||
0x00e8: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
|
||||
0x00e9: 0xfee0, # ARABIC LETTER LAM MEDIAL FORM
|
||||
0x00ea: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
|
||||
0x00eb: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
|
||||
0x00ec: 0xfb9e, # ARABIC LETTER NOON GHUNNA ISOLATED FORM
|
||||
0x00ed: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
|
||||
0x00ee: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
|
||||
0x00ef: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x00f0: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
|
||||
0x00f1: 0xfba6, # ARABIC LETTER HEH GOAL ISOLATED FORM
|
||||
0x00f2: 0xfba8, # ARABIC LETTER HEH GOAL INITIAL FORM
|
||||
0x00f3: 0xfba9, # ARABIC LETTER HEH GOAL MEDIAL FORM
|
||||
0x00f4: 0xfbaa, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
|
||||
0x00f5: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
|
||||
0x00f6: 0xfe89, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x00f7: 0xfe8a, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
|
||||
0x00f8: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
|
||||
0x00f9: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
||||
0x00fa: 0xfef2, # ARABIC LETTER YEH FINAL FORM
|
||||
0x00fb: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
|
||||
0x00fc: 0xfbb0, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
||||
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
|
||||
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,282 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1026.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
0x0007: 0x007f, # DELETE
|
||||
0x0008: 0x0097, # CONTROL
|
||||
0x0009: 0x008d, # CONTROL
|
||||
0x000a: 0x008e, # CONTROL
|
||||
0x0014: 0x009d, # CONTROL
|
||||
0x0015: 0x0085, # CONTROL
|
||||
0x0016: 0x0008, # BACKSPACE
|
||||
0x0017: 0x0087, # CONTROL
|
||||
0x001a: 0x0092, # CONTROL
|
||||
0x001b: 0x008f, # CONTROL
|
||||
0x0020: 0x0080, # CONTROL
|
||||
0x0021: 0x0081, # CONTROL
|
||||
0x0022: 0x0082, # CONTROL
|
||||
0x0023: 0x0083, # CONTROL
|
||||
0x0024: 0x0084, # CONTROL
|
||||
0x0025: 0x000a, # LINE FEED
|
||||
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0027: 0x001b, # ESCAPE
|
||||
0x0028: 0x0088, # CONTROL
|
||||
0x0029: 0x0089, # CONTROL
|
||||
0x002a: 0x008a, # CONTROL
|
||||
0x002b: 0x008b, # CONTROL
|
||||
0x002c: 0x008c, # CONTROL
|
||||
0x002d: 0x0005, # ENQUIRY
|
||||
0x002e: 0x0006, # ACKNOWLEDGE
|
||||
0x002f: 0x0007, # BELL
|
||||
0x0030: 0x0090, # CONTROL
|
||||
0x0031: 0x0091, # CONTROL
|
||||
0x0032: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0033: 0x0093, # CONTROL
|
||||
0x0034: 0x0094, # CONTROL
|
||||
0x0035: 0x0095, # CONTROL
|
||||
0x0036: 0x0096, # CONTROL
|
||||
0x0037: 0x0004, # END OF TRANSMISSION
|
||||
0x0038: 0x0098, # CONTROL
|
||||
0x0039: 0x0099, # CONTROL
|
||||
0x003a: 0x009a, # CONTROL
|
||||
0x003b: 0x009b, # CONTROL
|
||||
0x003c: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x003e: 0x009e, # CONTROL
|
||||
0x003f: 0x001a, # SUBSTITUTE
|
||||
0x0040: 0x0020, # SPACE
|
||||
0x0041: 0x00a0, # NO-BREAK SPACE
|
||||
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0048: 0x007b, # LEFT CURLY BRACKET
|
||||
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x004a: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x004b: 0x002e, # FULL STOP
|
||||
0x004c: 0x003c, # LESS-THAN SIGN
|
||||
0x004d: 0x0028, # LEFT PARENTHESIS
|
||||
0x004e: 0x002b, # PLUS SIGN
|
||||
0x004f: 0x0021, # EXCLAMATION MARK
|
||||
0x0050: 0x0026, # AMPERSAND
|
||||
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x005a: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x005b: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x005c: 0x002a, # ASTERISK
|
||||
0x005d: 0x0029, # RIGHT PARENTHESIS
|
||||
0x005e: 0x003b, # SEMICOLON
|
||||
0x005f: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x0060: 0x002d, # HYPHEN-MINUS
|
||||
0x0061: 0x002f, # SOLIDUS
|
||||
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0068: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x006a: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x006b: 0x002c, # COMMA
|
||||
0x006c: 0x0025, # PERCENT SIGN
|
||||
0x006d: 0x005f, # LOW LINE
|
||||
0x006e: 0x003e, # GREATER-THAN SIGN
|
||||
0x006f: 0x003f, # QUESTION MARK
|
||||
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x0079: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x007a: 0x003a, # COLON
|
||||
0x007b: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x007c: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x007d: 0x0027, # APOSTROPHE
|
||||
0x007e: 0x003d, # EQUALS SIGN
|
||||
0x007f: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x0081: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0082: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0083: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0084: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0085: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0086: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0087: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0088: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0089: 0x0069, # LATIN SMALL LETTER I
|
||||
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008c: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x008d: 0x0060, # GRAVE ACCENT
|
||||
0x008e: 0x00a6, # BROKEN BAR
|
||||
0x008f: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x0090: 0x00b0, # DEGREE SIGN
|
||||
0x0091: 0x006a, # LATIN SMALL LETTER J
|
||||
0x0092: 0x006b, # LATIN SMALL LETTER K
|
||||
0x0093: 0x006c, # LATIN SMALL LETTER L
|
||||
0x0094: 0x006d, # LATIN SMALL LETTER M
|
||||
0x0095: 0x006e, # LATIN SMALL LETTER N
|
||||
0x0096: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0097: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0098: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0099: 0x0072, # LATIN SMALL LETTER R
|
||||
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x009d: 0x00b8, # CEDILLA
|
||||
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x009f: 0x00a4, # CURRENCY SIGN
|
||||
0x00a0: 0x00b5, # MICRO SIGN
|
||||
0x00a1: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00a2: 0x0073, # LATIN SMALL LETTER S
|
||||
0x00a3: 0x0074, # LATIN SMALL LETTER T
|
||||
0x00a4: 0x0075, # LATIN SMALL LETTER U
|
||||
0x00a5: 0x0076, # LATIN SMALL LETTER V
|
||||
0x00a6: 0x0077, # LATIN SMALL LETTER W
|
||||
0x00a7: 0x0078, # LATIN SMALL LETTER X
|
||||
0x00a8: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x00a9: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ab: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00ac: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x00ad: 0x0024, # DOLLAR SIGN
|
||||
0x00ae: 0x0040, # COMMERCIAL AT
|
||||
0x00af: 0x00ae, # REGISTERED SIGN
|
||||
0x00b0: 0x00a2, # CENT SIGN
|
||||
0x00b1: 0x00a3, # POUND SIGN
|
||||
0x00b2: 0x00a5, # YEN SIGN
|
||||
0x00b3: 0x00b7, # MIDDLE DOT
|
||||
0x00b4: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b5: 0x00a7, # SECTION SIGN
|
||||
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00ba: 0x00ac, # NOT SIGN
|
||||
0x00bb: 0x007c, # VERTICAL LINE
|
||||
0x00bc: 0x00af, # MACRON
|
||||
0x00bd: 0x00a8, # DIAERESIS
|
||||
0x00be: 0x00b4, # ACUTE ACCENT
|
||||
0x00bf: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00c0: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x00ca: 0x00ad, # SOFT HYPHEN
|
||||
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00cc: 0x007e, # TILDE
|
||||
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00d0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x00da: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00dc: 0x005c, # REVERSE SOLIDUS
|
||||
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00e0: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00e1: 0x00f7, # DIVISION SIGN
|
||||
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x00ea: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00ec: 0x0023, # NUMBER SIGN
|
||||
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00f0: 0x0030, # DIGIT ZERO
|
||||
0x00f1: 0x0031, # DIGIT ONE
|
||||
0x00f2: 0x0032, # DIGIT TWO
|
||||
0x00f3: 0x0033, # DIGIT THREE
|
||||
0x00f4: 0x0034, # DIGIT FOUR
|
||||
0x00f5: 0x0035, # DIGIT FIVE
|
||||
0x00f6: 0x0036, # DIGIT SIX
|
||||
0x00f7: 0x0037, # DIGIT SEVEN
|
||||
0x00f8: 0x0038, # DIGIT EIGHT
|
||||
0x00f9: 0x0039, # DIGIT NINE
|
||||
0x00fa: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00fc: 0x0022, # QUOTATION MARK
|
||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,125 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1250.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: None, # UNDEFINED
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: None, # UNDEFINED
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x008d: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: None, # UNDEFINED
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x009d: 0x0165, # LATIN SMALL LETTER T WITH CARON
|
||||
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x009f: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00a1: 0x02c7, # CARON
|
||||
0x00a2: 0x02d8, # BREVE
|
||||
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00a5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00b2: 0x02db, # OGONEK
|
||||
0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x00b9: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00bc: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00be: 0x013e, # LATIN SMALL LETTER L WITH CARON
|
||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON
|
||||
0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON
|
||||
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON
|
||||
0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||
0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,159 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1251.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x20ac, # EURO SIGN
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: 0x0409, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x040a, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x008d: 0x040c, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x008e: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x008f: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x0090: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: None, # UNDEFINED
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: 0x0459, # CYRILLIC SMALL LETTER LJE
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x045a, # CYRILLIC SMALL LETTER NJE
|
||||
0x009d: 0x045c, # CYRILLIC SMALL LETTER KJE
|
||||
0x009e: 0x045b, # CYRILLIC SMALL LETTER TSHE
|
||||
0x009f: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||
0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE
|
||||
0x00a5: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00aa: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00af: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00b4: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00b9: 0x2116, # NUMERO SIGN
|
||||
0x00ba: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE
|
||||
0x00bd: 0x0405, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x00be: 0x0455, # CYRILLIC SMALL LETTER DZE
|
||||
0x00bf: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00e0: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00e8: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00ee: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00f3: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,78 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1252.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: 0x02dc, # SMALL TILDE
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,153 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1253.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: None, # UNDEFINED
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: None, # UNDEFINED
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: None, # UNDEFINED
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: None, # UNDEFINED
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a1: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
0x00a2: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x00aa: None, # UNDEFINED
|
||||
0x00af: 0x2015, # HORIZONTAL BAR
|
||||
0x00b4: 0x0384, # GREEK TONOS
|
||||
0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA
|
||||
0x00cc: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x00cd: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x00ce: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x00d2: None, # UNDEFINED
|
||||
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x00e7: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x00e8: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA
|
||||
0x00ec: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00ed: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00ee: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00f1: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00f6: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00f7: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00f8: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,84 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1254.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: 0x02dc, # SMALL TILDE
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,145 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1255.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: None, # UNDEFINED
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: None, # UNDEFINED
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: 0x02dc, # SMALL TILDE
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a4: 0x20aa, # NEW SHEQEL SIGN
|
||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00ba: 0x00f7, # DIVISION SIGN
|
||||
0x00c0: 0x05b0, # HEBREW POINT SHEVA
|
||||
0x00c1: 0x05b1, # HEBREW POINT HATAF SEGOL
|
||||
0x00c2: 0x05b2, # HEBREW POINT HATAF PATAH
|
||||
0x00c3: 0x05b3, # HEBREW POINT HATAF QAMATS
|
||||
0x00c4: 0x05b4, # HEBREW POINT HIRIQ
|
||||
0x00c5: 0x05b5, # HEBREW POINT TSERE
|
||||
0x00c6: 0x05b6, # HEBREW POINT SEGOL
|
||||
0x00c7: 0x05b7, # HEBREW POINT PATAH
|
||||
0x00c8: 0x05b8, # HEBREW POINT QAMATS
|
||||
0x00c9: 0x05b9, # HEBREW POINT HOLAM
|
||||
0x00ca: None, # UNDEFINED
|
||||
0x00cb: 0x05bb, # HEBREW POINT QUBUTS
|
||||
0x00cc: 0x05bc, # HEBREW POINT DAGESH OR MAPIQ
|
||||
0x00cd: 0x05bd, # HEBREW POINT METEG
|
||||
0x00ce: 0x05be, # HEBREW PUNCTUATION MAQAF
|
||||
0x00cf: 0x05bf, # HEBREW POINT RAFE
|
||||
0x00d0: 0x05c0, # HEBREW PUNCTUATION PASEQ
|
||||
0x00d1: 0x05c1, # HEBREW POINT SHIN DOT
|
||||
0x00d2: 0x05c2, # HEBREW POINT SIN DOT
|
||||
0x00d3: 0x05c3, # HEBREW PUNCTUATION SOF PASUQ
|
||||
0x00d4: 0x05f0, # HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||||
0x00d5: 0x05f1, # HEBREW LIGATURE YIDDISH VAV YOD
|
||||
0x00d6: 0x05f2, # HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
0x00d7: 0x05f3, # HEBREW PUNCTUATION GERESH
|
||||
0x00d8: 0x05f4, # HEBREW PUNCTUATION GERSHAYIM
|
||||
0x00d9: None, # UNDEFINED
|
||||
0x00da: None, # UNDEFINED
|
||||
0x00db: None, # UNDEFINED
|
||||
0x00dc: None, # UNDEFINED
|
||||
0x00dd: None, # UNDEFINED
|
||||
0x00de: None, # UNDEFINED
|
||||
0x00df: None, # UNDEFINED
|
||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x00e3: 0x05d3, # HEBREW LETTER DALET
|
||||
0x00e4: 0x05d4, # HEBREW LETTER HE
|
||||
0x00e5: 0x05d5, # HEBREW LETTER VAV
|
||||
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x00e7: 0x05d7, # HEBREW LETTER HET
|
||||
0x00e8: 0x05d8, # HEBREW LETTER TET
|
||||
0x00e9: 0x05d9, # HEBREW LETTER YOD
|
||||
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x00eb: 0x05db, # HEBREW LETTER KAF
|
||||
0x00ec: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x00ee: 0x05de, # HEBREW LETTER MEM
|
||||
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x00f0: 0x05e0, # HEBREW LETTER NUN
|
||||
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x00f2: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x00f4: 0x05e4, # HEBREW LETTER PE
|
||||
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x00f6: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x00f7: 0x05e7, # HEBREW LETTER QOF
|
||||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||
0x00fb: None, # UNDEFINED
|
||||
0x00fc: None, # UNDEFINED
|
||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,131 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1256.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: 0x067e, # ARABIC LETTER PEH
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: 0x0679, # ARABIC LETTER TTEH
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x008d: 0x0686, # ARABIC LETTER TCHEH
|
||||
0x008e: 0x0698, # ARABIC LETTER JEH
|
||||
0x008f: 0x0688, # ARABIC LETTER DDAL
|
||||
0x0090: 0x06af, # ARABIC LETTER GAF
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: 0x06a9, # ARABIC LETTER KEHEH
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: 0x0691, # ARABIC LETTER RREH
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x009d: 0x200c, # ZERO WIDTH NON-JOINER
|
||||
0x009e: 0x200d, # ZERO WIDTH JOINER
|
||||
0x009f: 0x06ba, # ARABIC LETTER NOON GHUNNA
|
||||
0x00a1: 0x060c, # ARABIC COMMA
|
||||
0x00aa: 0x06be, # ARABIC LETTER HEH DOACHASHMEE
|
||||
0x00ba: 0x061b, # ARABIC SEMICOLON
|
||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00c0: 0x06c1, # ARABIC LETTER HEH GOAL
|
||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x00c7: 0x0627, # ARABIC LETTER ALEF
|
||||
0x00c8: 0x0628, # ARABIC LETTER BEH
|
||||
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
|
||||
0x00ca: 0x062a, # ARABIC LETTER TEH
|
||||
0x00cb: 0x062b, # ARABIC LETTER THEH
|
||||
0x00cc: 0x062c, # ARABIC LETTER JEEM
|
||||
0x00cd: 0x062d, # ARABIC LETTER HAH
|
||||
0x00ce: 0x062e, # ARABIC LETTER KHAH
|
||||
0x00cf: 0x062f, # ARABIC LETTER DAL
|
||||
0x00d0: 0x0630, # ARABIC LETTER THAL
|
||||
0x00d1: 0x0631, # ARABIC LETTER REH
|
||||
0x00d2: 0x0632, # ARABIC LETTER ZAIN
|
||||
0x00d3: 0x0633, # ARABIC LETTER SEEN
|
||||
0x00d4: 0x0634, # ARABIC LETTER SHEEN
|
||||
0x00d5: 0x0635, # ARABIC LETTER SAD
|
||||
0x00d6: 0x0636, # ARABIC LETTER DAD
|
||||
0x00d8: 0x0637, # ARABIC LETTER TAH
|
||||
0x00d9: 0x0638, # ARABIC LETTER ZAH
|
||||
0x00da: 0x0639, # ARABIC LETTER AIN
|
||||
0x00db: 0x063a, # ARABIC LETTER GHAIN
|
||||
0x00dc: 0x0640, # ARABIC TATWEEL
|
||||
0x00dd: 0x0641, # ARABIC LETTER FEH
|
||||
0x00de: 0x0642, # ARABIC LETTER QAF
|
||||
0x00df: 0x0643, # ARABIC LETTER KAF
|
||||
0x00e1: 0x0644, # ARABIC LETTER LAM
|
||||
0x00e3: 0x0645, # ARABIC LETTER MEEM
|
||||
0x00e4: 0x0646, # ARABIC LETTER NOON
|
||||
0x00e5: 0x0647, # ARABIC LETTER HEH
|
||||
0x00e6: 0x0648, # ARABIC LETTER WAW
|
||||
0x00ec: 0x0649, # ARABIC LETTER ALEF MAKSURA
|
||||
0x00ed: 0x064a, # ARABIC LETTER YEH
|
||||
0x00f0: 0x064b, # ARABIC FATHATAN
|
||||
0x00f1: 0x064c, # ARABIC DAMMATAN
|
||||
0x00f2: 0x064d, # ARABIC KASRATAN
|
||||
0x00f3: 0x064e, # ARABIC FATHA
|
||||
0x00f5: 0x064f, # ARABIC DAMMA
|
||||
0x00f6: 0x0650, # ARABIC KASRA
|
||||
0x00f8: 0x0651, # ARABIC SHADDA
|
||||
0x00fa: 0x0652, # ARABIC SUKUN
|
||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,133 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1257.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: None, # UNDEFINED
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: None, # UNDEFINED
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: None, # UNDEFINED
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: None, # UNDEFINED
|
||||
0x008d: 0x00a8, # DIAERESIS
|
||||
0x008e: 0x02c7, # CARON
|
||||
0x008f: 0x00b8, # CEDILLA
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: None, # UNDEFINED
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: 0x00af, # MACRON
|
||||
0x009e: 0x02db, # OGONEK
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a1: None, # UNDEFINED
|
||||
0x00a5: None, # UNDEFINED
|
||||
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x00af: 0x00c6, # LATIN CAPITAL LETTER AE
|
||||
0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x00bf: 0x00e6, # LATIN SMALL LETTER AE
|
||||
0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,92 @@
|
|||
""" Python Character Mapping Codec generated from 'CP1258.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: 0x2020, # DAGGER
|
||||
0x0087: 0x2021, # DOUBLE DAGGER
|
||||
0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x0089: 0x2030, # PER MILLE SIGN
|
||||
0x008a: None, # UNDEFINED
|
||||
0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: 0x02dc, # SMALL TILDE
|
||||
0x0099: 0x2122, # TRADE MARK SIGN
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x009c: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x00cc: 0x0300, # COMBINING GRAVE ACCENT
|
||||
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00d2: 0x0309, # COMBINING HOOK ABOVE
|
||||
0x00d5: 0x01a0, # LATIN CAPITAL LETTER O WITH HORN
|
||||
0x00dd: 0x01af, # LATIN CAPITAL LETTER U WITH HORN
|
||||
0x00de: 0x0303, # COMBINING TILDE
|
||||
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x00ec: 0x0301, # COMBINING ACUTE ACCENT
|
||||
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00f2: 0x0323, # COMBINING DOT BELOW
|
||||
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
|
||||
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
|
||||
0x00fe: 0x20ab, # DONG SIGN
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,282 @@
|
|||
""" Python Character Mapping Codec generated from 'CP424.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0004: 0x009c, # SELECT
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # REQUIRED NEW LINE
|
||||
0x0007: 0x007f, # DELETE
|
||||
0x0008: 0x0097, # GRAPHIC ESCAPE
|
||||
0x0009: 0x008d, # SUPERSCRIPT
|
||||
0x000a: 0x008e, # REPEAT
|
||||
0x0014: 0x009d, # RESTORE/ENABLE PRESENTATION
|
||||
0x0015: 0x0085, # NEW LINE
|
||||
0x0016: 0x0008, # BACKSPACE
|
||||
0x0017: 0x0087, # PROGRAM OPERATOR COMMUNICATION
|
||||
0x001a: 0x0092, # UNIT BACK SPACE
|
||||
0x001b: 0x008f, # CUSTOMER USE ONE
|
||||
0x0020: 0x0080, # DIGIT SELECT
|
||||
0x0021: 0x0081, # START OF SIGNIFICANCE
|
||||
0x0022: 0x0082, # FIELD SEPARATOR
|
||||
0x0023: 0x0083, # WORD UNDERSCORE
|
||||
0x0024: 0x0084, # BYPASS OR INHIBIT PRESENTATION
|
||||
0x0025: 0x000a, # LINE FEED
|
||||
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0027: 0x001b, # ESCAPE
|
||||
0x0028: 0x0088, # SET ATTRIBUTE
|
||||
0x0029: 0x0089, # START FIELD EXTENDED
|
||||
0x002a: 0x008a, # SET MODE OR SWITCH
|
||||
0x002b: 0x008b, # CONTROL SEQUENCE PREFIX
|
||||
0x002c: 0x008c, # MODIFY FIELD ATTRIBUTE
|
||||
0x002d: 0x0005, # ENQUIRY
|
||||
0x002e: 0x0006, # ACKNOWLEDGE
|
||||
0x002f: 0x0007, # BELL
|
||||
0x0030: 0x0090, # <reserved>
|
||||
0x0031: 0x0091, # <reserved>
|
||||
0x0032: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0033: 0x0093, # INDEX RETURN
|
||||
0x0034: 0x0094, # PRESENTATION POSITION
|
||||
0x0035: 0x0095, # TRANSPARENT
|
||||
0x0036: 0x0096, # NUMERIC BACKSPACE
|
||||
0x0037: 0x0004, # END OF TRANSMISSION
|
||||
0x0038: 0x0098, # SUBSCRIPT
|
||||
0x0039: 0x0099, # INDENT TABULATION
|
||||
0x003a: 0x009a, # REVERSE FORM FEED
|
||||
0x003b: 0x009b, # CUSTOMER USE THREE
|
||||
0x003c: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x003e: 0x009e, # <reserved>
|
||||
0x003f: 0x001a, # SUBSTITUTE
|
||||
0x0040: 0x0020, # SPACE
|
||||
0x0041: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x0042: 0x05d1, # HEBREW LETTER BET
|
||||
0x0043: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x0044: 0x05d3, # HEBREW LETTER DALET
|
||||
0x0045: 0x05d4, # HEBREW LETTER HE
|
||||
0x0046: 0x05d5, # HEBREW LETTER VAV
|
||||
0x0047: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x0048: 0x05d7, # HEBREW LETTER HET
|
||||
0x0049: 0x05d8, # HEBREW LETTER TET
|
||||
0x004a: 0x00a2, # CENT SIGN
|
||||
0x004b: 0x002e, # FULL STOP
|
||||
0x004c: 0x003c, # LESS-THAN SIGN
|
||||
0x004d: 0x0028, # LEFT PARENTHESIS
|
||||
0x004e: 0x002b, # PLUS SIGN
|
||||
0x004f: 0x007c, # VERTICAL LINE
|
||||
0x0050: 0x0026, # AMPERSAND
|
||||
0x0051: 0x05d9, # HEBREW LETTER YOD
|
||||
0x0052: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x0053: 0x05db, # HEBREW LETTER KAF
|
||||
0x0054: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x0055: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x0056: 0x05de, # HEBREW LETTER MEM
|
||||
0x0057: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x0058: 0x05e0, # HEBREW LETTER NUN
|
||||
0x0059: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x005a: 0x0021, # EXCLAMATION MARK
|
||||
0x005b: 0x0024, # DOLLAR SIGN
|
||||
0x005c: 0x002a, # ASTERISK
|
||||
0x005d: 0x0029, # RIGHT PARENTHESIS
|
||||
0x005e: 0x003b, # SEMICOLON
|
||||
0x005f: 0x00ac, # NOT SIGN
|
||||
0x0060: 0x002d, # HYPHEN-MINUS
|
||||
0x0061: 0x002f, # SOLIDUS
|
||||
0x0062: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x0063: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x0064: 0x05e4, # HEBREW LETTER PE
|
||||
0x0065: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x0066: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x0067: 0x05e7, # HEBREW LETTER QOF
|
||||
0x0068: 0x05e8, # HEBREW LETTER RESH
|
||||
0x0069: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x006a: 0x00a6, # BROKEN BAR
|
||||
0x006b: 0x002c, # COMMA
|
||||
0x006c: 0x0025, # PERCENT SIGN
|
||||
0x006d: 0x005f, # LOW LINE
|
||||
0x006e: 0x003e, # GREATER-THAN SIGN
|
||||
0x006f: 0x003f, # QUESTION MARK
|
||||
0x0070: None, # UNDEFINED
|
||||
0x0071: 0x05ea, # HEBREW LETTER TAV
|
||||
0x0072: None, # UNDEFINED
|
||||
0x0073: None, # UNDEFINED
|
||||
0x0074: 0x00a0, # NO-BREAK SPACE
|
||||
0x0075: None, # UNDEFINED
|
||||
0x0076: None, # UNDEFINED
|
||||
0x0077: None, # UNDEFINED
|
||||
0x0078: 0x2017, # DOUBLE LOW LINE
|
||||
0x0079: 0x0060, # GRAVE ACCENT
|
||||
0x007a: 0x003a, # COLON
|
||||
0x007b: 0x0023, # NUMBER SIGN
|
||||
0x007c: 0x0040, # COMMERCIAL AT
|
||||
0x007d: 0x0027, # APOSTROPHE
|
||||
0x007e: 0x003d, # EQUALS SIGN
|
||||
0x007f: 0x0022, # QUOTATION MARK
|
||||
0x0080: None, # UNDEFINED
|
||||
0x0081: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0082: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0083: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0084: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0085: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0086: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0087: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0088: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0089: 0x0069, # LATIN SMALL LETTER I
|
||||
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008c: None, # UNDEFINED
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x0090: 0x00b0, # DEGREE SIGN
|
||||
0x0091: 0x006a, # LATIN SMALL LETTER J
|
||||
0x0092: 0x006b, # LATIN SMALL LETTER K
|
||||
0x0093: 0x006c, # LATIN SMALL LETTER L
|
||||
0x0094: 0x006d, # LATIN SMALL LETTER M
|
||||
0x0095: 0x006e, # LATIN SMALL LETTER N
|
||||
0x0096: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0097: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0098: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0099: 0x0072, # LATIN SMALL LETTER R
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: None, # UNDEFINED
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: 0x00b8, # CEDILLA
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: 0x00a4, # CURRENCY SIGN
|
||||
0x00a0: 0x00b5, # MICRO SIGN
|
||||
0x00a1: 0x007e, # TILDE
|
||||
0x00a2: 0x0073, # LATIN SMALL LETTER S
|
||||
0x00a3: 0x0074, # LATIN SMALL LETTER T
|
||||
0x00a4: 0x0075, # LATIN SMALL LETTER U
|
||||
0x00a5: 0x0076, # LATIN SMALL LETTER V
|
||||
0x00a6: 0x0077, # LATIN SMALL LETTER W
|
||||
0x00a7: 0x0078, # LATIN SMALL LETTER X
|
||||
0x00a8: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x00a9: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x00aa: None, # UNDEFINED
|
||||
0x00ab: None, # UNDEFINED
|
||||
0x00ac: None, # UNDEFINED
|
||||
0x00ad: None, # UNDEFINED
|
||||
0x00ae: None, # UNDEFINED
|
||||
0x00af: 0x00ae, # REGISTERED SIGN
|
||||
0x00b0: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x00b1: 0x00a3, # POUND SIGN
|
||||
0x00b2: 0x00a5, # YEN SIGN
|
||||
0x00b3: 0x00b7, # MIDDLE DOT
|
||||
0x00b4: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b5: 0x00a7, # SECTION SIGN
|
||||
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00ba: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x00bb: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x00bc: 0x00af, # MACRON
|
||||
0x00bd: 0x00a8, # DIAERESIS
|
||||
0x00be: 0x00b4, # ACUTE ACCENT
|
||||
0x00bf: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00c0: 0x007b, # LEFT CURLY BRACKET
|
||||
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x00ca: 0x00ad, # SOFT HYPHEN
|
||||
0x00cb: None, # UNDEFINED
|
||||
0x00cc: None, # UNDEFINED
|
||||
0x00cd: None, # UNDEFINED
|
||||
0x00ce: None, # UNDEFINED
|
||||
0x00cf: None, # UNDEFINED
|
||||
0x00d0: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x00da: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00db: None, # UNDEFINED
|
||||
0x00dc: None, # UNDEFINED
|
||||
0x00dd: None, # UNDEFINED
|
||||
0x00de: None, # UNDEFINED
|
||||
0x00df: None, # UNDEFINED
|
||||
0x00e0: 0x005c, # REVERSE SOLIDUS
|
||||
0x00e1: 0x00f7, # DIVISION SIGN
|
||||
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x00ea: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00eb: None, # UNDEFINED
|
||||
0x00ec: None, # UNDEFINED
|
||||
0x00ed: None, # UNDEFINED
|
||||
0x00ee: None, # UNDEFINED
|
||||
0x00ef: None, # UNDEFINED
|
||||
0x00f0: 0x0030, # DIGIT ZERO
|
||||
0x00f1: 0x0031, # DIGIT ONE
|
||||
0x00f2: 0x0032, # DIGIT TWO
|
||||
0x00f3: 0x0033, # DIGIT THREE
|
||||
0x00f4: 0x0034, # DIGIT FOUR
|
||||
0x00f5: 0x0035, # DIGIT FIVE
|
||||
0x00f6: 0x0036, # DIGIT SIX
|
||||
0x00f7: 0x0037, # DIGIT SEVEN
|
||||
0x00f8: 0x0038, # DIGIT EIGHT
|
||||
0x00f9: 0x0039, # DIGIT NINE
|
||||
0x00fa: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fb: None, # UNDEFINED
|
||||
0x00fc: None, # UNDEFINED
|
||||
0x00fd: None, # UNDEFINED
|
||||
0x00fe: None, # UNDEFINED
|
||||
0x00ff: 0x009f, # EIGHT ONES
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP437.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00a2, # CENT SIGN
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00a5, # YEN SIGN
|
||||
0x009e: 0x20a7, # PESETA SIGN
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x2310, # REVERSED NOT SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,282 @@
|
|||
""" Python Character Mapping Codec generated from 'CP500.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
0x0007: 0x007f, # DELETE
|
||||
0x0008: 0x0097, # CONTROL
|
||||
0x0009: 0x008d, # CONTROL
|
||||
0x000a: 0x008e, # CONTROL
|
||||
0x0014: 0x009d, # CONTROL
|
||||
0x0015: 0x0085, # CONTROL
|
||||
0x0016: 0x0008, # BACKSPACE
|
||||
0x0017: 0x0087, # CONTROL
|
||||
0x001a: 0x0092, # CONTROL
|
||||
0x001b: 0x008f, # CONTROL
|
||||
0x0020: 0x0080, # CONTROL
|
||||
0x0021: 0x0081, # CONTROL
|
||||
0x0022: 0x0082, # CONTROL
|
||||
0x0023: 0x0083, # CONTROL
|
||||
0x0024: 0x0084, # CONTROL
|
||||
0x0025: 0x000a, # LINE FEED
|
||||
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0027: 0x001b, # ESCAPE
|
||||
0x0028: 0x0088, # CONTROL
|
||||
0x0029: 0x0089, # CONTROL
|
||||
0x002a: 0x008a, # CONTROL
|
||||
0x002b: 0x008b, # CONTROL
|
||||
0x002c: 0x008c, # CONTROL
|
||||
0x002d: 0x0005, # ENQUIRY
|
||||
0x002e: 0x0006, # ACKNOWLEDGE
|
||||
0x002f: 0x0007, # BELL
|
||||
0x0030: 0x0090, # CONTROL
|
||||
0x0031: 0x0091, # CONTROL
|
||||
0x0032: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0033: 0x0093, # CONTROL
|
||||
0x0034: 0x0094, # CONTROL
|
||||
0x0035: 0x0095, # CONTROL
|
||||
0x0036: 0x0096, # CONTROL
|
||||
0x0037: 0x0004, # END OF TRANSMISSION
|
||||
0x0038: 0x0098, # CONTROL
|
||||
0x0039: 0x0099, # CONTROL
|
||||
0x003a: 0x009a, # CONTROL
|
||||
0x003b: 0x009b, # CONTROL
|
||||
0x003c: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x003e: 0x009e, # CONTROL
|
||||
0x003f: 0x001a, # SUBSTITUTE
|
||||
0x0040: 0x0020, # SPACE
|
||||
0x0041: 0x00a0, # NO-BREAK SPACE
|
||||
0x0042: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0043: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0044: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0045: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0046: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x0047: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0048: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0049: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x004a: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x004b: 0x002e, # FULL STOP
|
||||
0x004c: 0x003c, # LESS-THAN SIGN
|
||||
0x004d: 0x0028, # LEFT PARENTHESIS
|
||||
0x004e: 0x002b, # PLUS SIGN
|
||||
0x004f: 0x0021, # EXCLAMATION MARK
|
||||
0x0050: 0x0026, # AMPERSAND
|
||||
0x0051: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0052: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0053: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0054: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0055: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0056: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0057: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0058: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0059: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x005a: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x005b: 0x0024, # DOLLAR SIGN
|
||||
0x005c: 0x002a, # ASTERISK
|
||||
0x005d: 0x0029, # RIGHT PARENTHESIS
|
||||
0x005e: 0x003b, # SEMICOLON
|
||||
0x005f: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x0060: 0x002d, # HYPHEN-MINUS
|
||||
0x0061: 0x002f, # SOLIDUS
|
||||
0x0062: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x0063: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0064: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x0065: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x0066: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x0067: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0068: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0069: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x006a: 0x00a6, # BROKEN BAR
|
||||
0x006b: 0x002c, # COMMA
|
||||
0x006c: 0x0025, # PERCENT SIGN
|
||||
0x006d: 0x005f, # LOW LINE
|
||||
0x006e: 0x003e, # GREATER-THAN SIGN
|
||||
0x006f: 0x003f, # QUESTION MARK
|
||||
0x0070: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x0071: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0072: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x0073: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x0074: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x0075: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x0076: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x0077: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x0078: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x0079: 0x0060, # GRAVE ACCENT
|
||||
0x007a: 0x003a, # COLON
|
||||
0x007b: 0x0023, # NUMBER SIGN
|
||||
0x007c: 0x0040, # COMMERCIAL AT
|
||||
0x007d: 0x0027, # APOSTROPHE
|
||||
0x007e: 0x003d, # EQUALS SIGN
|
||||
0x007f: 0x0022, # QUOTATION MARK
|
||||
0x0080: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x0081: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0082: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0083: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0084: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0085: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0086: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0087: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0088: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0089: 0x0069, # LATIN SMALL LETTER I
|
||||
0x008a: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008b: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x008c: 0x00f0, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x008d: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x008e: 0x00fe, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x008f: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x0090: 0x00b0, # DEGREE SIGN
|
||||
0x0091: 0x006a, # LATIN SMALL LETTER J
|
||||
0x0092: 0x006b, # LATIN SMALL LETTER K
|
||||
0x0093: 0x006c, # LATIN SMALL LETTER L
|
||||
0x0094: 0x006d, # LATIN SMALL LETTER M
|
||||
0x0095: 0x006e, # LATIN SMALL LETTER N
|
||||
0x0096: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0097: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0098: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0099: 0x0072, # LATIN SMALL LETTER R
|
||||
0x009a: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x009b: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x009c: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x009d: 0x00b8, # CEDILLA
|
||||
0x009e: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x009f: 0x00a4, # CURRENCY SIGN
|
||||
0x00a0: 0x00b5, # MICRO SIGN
|
||||
0x00a1: 0x007e, # TILDE
|
||||
0x00a2: 0x0073, # LATIN SMALL LETTER S
|
||||
0x00a3: 0x0074, # LATIN SMALL LETTER T
|
||||
0x00a4: 0x0075, # LATIN SMALL LETTER U
|
||||
0x00a5: 0x0076, # LATIN SMALL LETTER V
|
||||
0x00a6: 0x0077, # LATIN SMALL LETTER W
|
||||
0x00a7: 0x0078, # LATIN SMALL LETTER X
|
||||
0x00a8: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x00a9: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x00aa: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ab: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00ac: 0x00d0, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00ad: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00ae: 0x00de, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00af: 0x00ae, # REGISTERED SIGN
|
||||
0x00b0: 0x00a2, # CENT SIGN
|
||||
0x00b1: 0x00a3, # POUND SIGN
|
||||
0x00b2: 0x00a5, # YEN SIGN
|
||||
0x00b3: 0x00b7, # MIDDLE DOT
|
||||
0x00b4: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b5: 0x00a7, # SECTION SIGN
|
||||
0x00b7: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00b8: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00b9: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00ba: 0x00ac, # NOT SIGN
|
||||
0x00bb: 0x007c, # VERTICAL LINE
|
||||
0x00bc: 0x00af, # MACRON
|
||||
0x00bd: 0x00a8, # DIAERESIS
|
||||
0x00be: 0x00b4, # ACUTE ACCENT
|
||||
0x00bf: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00c0: 0x007b, # LEFT CURLY BRACKET
|
||||
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x00ca: 0x00ad, # SOFT HYPHEN
|
||||
0x00cb: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00cc: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00cd: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00ce: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00cf: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00d0: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x00da: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00db: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00dc: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00dd: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00de: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00df: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00e0: 0x005c, # REVERSE SOLIDUS
|
||||
0x00e1: 0x00f7, # DIVISION SIGN
|
||||
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x00ea: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00eb: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00ec: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00ed: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00f0: 0x0030, # DIGIT ZERO
|
||||
0x00f1: 0x0031, # DIGIT ONE
|
||||
0x00f2: 0x0032, # DIGIT TWO
|
||||
0x00f3: 0x0033, # DIGIT THREE
|
||||
0x00f4: 0x0034, # DIGIT FOUR
|
||||
0x00f5: 0x0035, # DIGIT FIVE
|
||||
0x00f6: 0x0036, # DIGIT SIX
|
||||
0x00f7: 0x0037, # DIGIT SEVEN
|
||||
0x00f8: 0x0038, # DIGIT EIGHT
|
||||
0x00f9: 0x0039, # DIGIT NINE
|
||||
0x00fa: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fb: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00fc: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP737.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x0086: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x0087: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA
|
||||
0x008b: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x008c: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x008d: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x008f: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x0099: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x009b: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x009d: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x009e: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x009f: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA
|
||||
0x00a3: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00a4: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00a5: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00a7: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00a8: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00ab: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00ad: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ae: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00af: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP775.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0096: 0x00a2, # CENT SIGN
|
||||
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x009e: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x009f: 0x00a4, # CURRENCY SIGN
|
||||
0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00a7: 0x00a6, # BROKEN BAR
|
||||
0x00a8: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00a9: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00f4: 0x00b6, # PILCROW SIGN
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00fc: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP850.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x009e: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00b8: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x00a2, # CENT SIGN
|
||||
0x00be: 0x00a5, # YEN SIGN
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x00a4, # CURRENCY SIGN
|
||||
0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
|
||||
0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
|
||||
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x00a6, # BROKEN BAR
|
||||
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
|
||||
0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
|
||||
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00ee: 0x00af, # MACRON
|
||||
0x00ef: 0x00b4, # ACUTE ACCENT
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2017, # DOUBLE LOW LINE
|
||||
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00f4: 0x00b6, # PILCROW SIGN
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x00b8, # CEDILLA
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x00a8, # DIAERESIS
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00fc: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP852.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON
|
||||
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON
|
||||
0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x009e: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x00a4, # CURRENCY SIGN
|
||||
0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON
|
||||
0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON
|
||||
0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x00ef: 0x00b4, # ACUTE ACCENT
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00f2: 0x02db, # OGONEK
|
||||
0x00f3: 0x02c7, # CARON
|
||||
0x00f4: 0x02d8, # BREVE
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x00b8, # CEDILLA
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x00a8, # DIAERESIS
|
||||
0x00fa: 0x02d9, # DOT ABOVE
|
||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP855.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||
0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0084: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE
|
||||
0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x008c: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x008e: 0x0458, # CYRILLIC SMALL LETTER JE
|
||||
0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE
|
||||
0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE
|
||||
0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE
|
||||
0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE
|
||||
0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE
|
||||
0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||
0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x009c: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x00b7: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x00a4, # CURRENCY SIGN
|
||||
0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x00d6: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x00de: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x00e7: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x00ef: 0x2116, # NUMERO SIGN
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x00f7: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x00fd: 0x00a7, # SECTION SIGN
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP856.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x0083: 0x05d3, # HEBREW LETTER DALET
|
||||
0x0084: 0x05d4, # HEBREW LETTER HE
|
||||
0x0085: 0x05d5, # HEBREW LETTER VAV
|
||||
0x0086: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x0087: 0x05d7, # HEBREW LETTER HET
|
||||
0x0088: 0x05d8, # HEBREW LETTER TET
|
||||
0x0089: 0x05d9, # HEBREW LETTER YOD
|
||||
0x008a: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x008b: 0x05db, # HEBREW LETTER KAF
|
||||
0x008c: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x008e: 0x05de, # HEBREW LETTER MEM
|
||||
0x008f: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x0090: 0x05e0, # HEBREW LETTER NUN
|
||||
0x0091: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x0092: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x0093: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x0094: 0x05e4, # HEBREW LETTER PE
|
||||
0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x0096: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x0097: 0x05e7, # HEBREW LETTER QOF
|
||||
0x0098: 0x05e8, # HEBREW LETTER RESH
|
||||
0x0099: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x009a: 0x05ea, # HEBREW LETTER TAV
|
||||
0x009b: None, # UNDEFINED
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a0: None, # UNDEFINED
|
||||
0x00a1: None, # UNDEFINED
|
||||
0x00a2: None, # UNDEFINED
|
||||
0x00a3: None, # UNDEFINED
|
||||
0x00a4: None, # UNDEFINED
|
||||
0x00a5: None, # UNDEFINED
|
||||
0x00a6: None, # UNDEFINED
|
||||
0x00a7: None, # UNDEFINED
|
||||
0x00a8: None, # UNDEFINED
|
||||
0x00a9: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: None, # UNDEFINED
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: None, # UNDEFINED
|
||||
0x00b6: None, # UNDEFINED
|
||||
0x00b7: None, # UNDEFINED
|
||||
0x00b8: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x00a2, # CENT SIGN
|
||||
0x00be: 0x00a5, # YEN SIGN
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: None, # UNDEFINED
|
||||
0x00c7: None, # UNDEFINED
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x00a4, # CURRENCY SIGN
|
||||
0x00d0: None, # UNDEFINED
|
||||
0x00d1: None, # UNDEFINED
|
||||
0x00d2: None, # UNDEFINED
|
||||
0x00d3: None, # UNDEFINEDS
|
||||
0x00d4: None, # UNDEFINED
|
||||
0x00d5: None, # UNDEFINED
|
||||
0x00d6: None, # UNDEFINEDE
|
||||
0x00d7: None, # UNDEFINED
|
||||
0x00d8: None, # UNDEFINED
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x00a6, # BROKEN BAR
|
||||
0x00de: None, # UNDEFINED
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: None, # UNDEFINED
|
||||
0x00e1: None, # UNDEFINED
|
||||
0x00e2: None, # UNDEFINED
|
||||
0x00e3: None, # UNDEFINED
|
||||
0x00e4: None, # UNDEFINED
|
||||
0x00e5: None, # UNDEFINED
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: None, # UNDEFINED
|
||||
0x00e8: None, # UNDEFINED
|
||||
0x00e9: None, # UNDEFINED
|
||||
0x00ea: None, # UNDEFINED
|
||||
0x00eb: None, # UNDEFINED
|
||||
0x00ec: None, # UNDEFINED
|
||||
0x00ed: None, # UNDEFINED
|
||||
0x00ee: 0x00af, # MACRON
|
||||
0x00ef: 0x00b4, # ACUTE ACCENT
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2017, # DOUBLE LOW LINE
|
||||
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00f4: 0x00b6, # PILCROW SIGN
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x00b8, # CEDILLA
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x00a8, # DIAERESIS
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00fc: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,173 @@
|
|||
""" Python Character Mapping Codec generated from 'CP857.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00b8: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x00a2, # CENT SIGN
|
||||
0x00be: 0x00a5, # YEN SIGN
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x00a4, # CURRENCY SIGN
|
||||
0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00d5: None, # UNDEFINED
|
||||
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x00a6, # BROKEN BAR
|
||||
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: None, # UNDEFINED
|
||||
0x00e8: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00ee: 0x00af, # MACRON
|
||||
0x00ef: 0x00b4, # ACUTE ACCENT
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: None, # UNDEFINED
|
||||
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00f4: 0x00b6, # PILCROW SIGN
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x00b8, # CEDILLA
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x00a8, # DIAERESIS
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00fc: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP860.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00a2, # CENT SIGN
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x009e: 0x20a7, # PESETA SIGN
|
||||
0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP861.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH
|
||||
0x008c: 0x00f0, # LATIN SMALL LETTER ETH
|
||||
0x008d: 0x00de, # LATIN CAPITAL LETTER THORN
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x00fe, # LATIN SMALL LETTER THORN
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x009e: 0x20a7, # PESETA SIGN
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x2310, # REVERSED NOT SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP862.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x0081: 0x05d1, # HEBREW LETTER BET
|
||||
0x0082: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x0083: 0x05d3, # HEBREW LETTER DALET
|
||||
0x0084: 0x05d4, # HEBREW LETTER HE
|
||||
0x0085: 0x05d5, # HEBREW LETTER VAV
|
||||
0x0086: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x0087: 0x05d7, # HEBREW LETTER HET
|
||||
0x0088: 0x05d8, # HEBREW LETTER TET
|
||||
0x0089: 0x05d9, # HEBREW LETTER YOD
|
||||
0x008a: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x008b: 0x05db, # HEBREW LETTER KAF
|
||||
0x008c: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x008e: 0x05de, # HEBREW LETTER MEM
|
||||
0x008f: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x0090: 0x05e0, # HEBREW LETTER NUN
|
||||
0x0091: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x0092: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x0093: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x0094: 0x05e4, # HEBREW LETTER PE
|
||||
0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x0096: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x0097: 0x05e7, # HEBREW LETTER QOF
|
||||
0x0098: 0x05e8, # HEBREW LETTER RESH
|
||||
0x0099: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x009a: 0x05ea, # HEBREW LETTER TAV
|
||||
0x009b: 0x00a2, # CENT SIGN
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00a5, # YEN SIGN
|
||||
0x009e: 0x20a7, # PESETA SIGN
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x2310, # REVERSED NOT SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP863.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00b6, # PILCROW SIGN
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x2017, # DOUBLE LOW LINE
|
||||
0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x008f: 0x00a7, # SECTION SIGN
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x00a4, # CURRENCY SIGN
|
||||
0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00a2, # CENT SIGN
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00a6, # BROKEN BAR
|
||||
0x00a1: 0x00b4, # ACUTE ACCENT
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00a8, # DIAERESIS
|
||||
0x00a5: 0x00b8, # CEDILLA
|
||||
0x00a6: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00a7: 0x00af, # MACRON
|
||||
0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00a9: 0x2310, # REVERSED NOT SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,172 @@
|
|||
""" Python Character Mapping Codec generated from 'CP864.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0025: 0x066a, # ARABIC PERCENT SIGN
|
||||
0x0080: 0x00b0, # DEGREE SIGN
|
||||
0x0081: 0x00b7, # MIDDLE DOT
|
||||
0x0082: 0x2219, # BULLET OPERATOR
|
||||
0x0083: 0x221a, # SQUARE ROOT
|
||||
0x0084: 0x2592, # MEDIUM SHADE
|
||||
0x0085: 0x2500, # FORMS LIGHT HORIZONTAL
|
||||
0x0086: 0x2502, # FORMS LIGHT VERTICAL
|
||||
0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT
|
||||
0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL
|
||||
0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT
|
||||
0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL
|
||||
0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT
|
||||
0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT
|
||||
0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT
|
||||
0x008f: 0x2518, # FORMS LIGHT UP AND LEFT
|
||||
0x0090: 0x03b2, # GREEK SMALL BETA
|
||||
0x0091: 0x221e, # INFINITY
|
||||
0x0092: 0x03c6, # GREEK SMALL PHI
|
||||
0x0093: 0x00b1, # PLUS-OR-MINUS SIGN
|
||||
0x0094: 0x00bd, # FRACTION 1/2
|
||||
0x0095: 0x00bc, # FRACTION 1/4
|
||||
0x0096: 0x2248, # ALMOST EQUAL TO
|
||||
0x0097: 0x00ab, # LEFT POINTING GUILLEMET
|
||||
0x0098: 0x00bb, # RIGHT POINTING GUILLEMET
|
||||
0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
|
||||
0x009b: None, # UNDEFINED
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
|
||||
0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a1: 0x00ad, # SOFT HYPHEN
|
||||
0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
|
||||
0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
|
||||
0x00a6: None, # UNDEFINED
|
||||
0x00a7: None, # UNDEFINED
|
||||
0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
|
||||
0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
|
||||
0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
|
||||
0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
|
||||
0x00ac: 0x060c, # ARABIC COMMA
|
||||
0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
|
||||
0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
|
||||
0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
|
||||
0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO
|
||||
0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE
|
||||
0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO
|
||||
0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE
|
||||
0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR
|
||||
0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE
|
||||
0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX
|
||||
0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN
|
||||
0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT
|
||||
0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE
|
||||
0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
|
||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||
0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
|
||||
0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
|
||||
0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
|
||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00c0: 0x00a2, # CENT SIGN
|
||||
0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
|
||||
0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
|
||||
0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
|
||||
0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM
|
||||
0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
|
||||
0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
|
||||
0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
|
||||
0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
|
||||
0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
|
||||
0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
|
||||
0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
|
||||
0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
|
||||
0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
|
||||
0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
|
||||
0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
|
||||
0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM
|
||||
0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
|
||||
0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
|
||||
0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
|
||||
0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
|
||||
0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
|
||||
0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
|
||||
0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
|
||||
0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
|
||||
0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
|
||||
0x00db: 0x00a6, # BROKEN VERTICAL BAR
|
||||
0x00dc: 0x00ac, # NOT SIGN
|
||||
0x00dd: 0x00f7, # DIVISION SIGN
|
||||
0x00de: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
|
||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||
0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
|
||||
0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
|
||||
0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
|
||||
0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
|
||||
0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
|
||||
0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
|
||||
0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM
|
||||
0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
|
||||
0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
|
||||
0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
|
||||
0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
|
||||
0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
|
||||
0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
|
||||
0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
|
||||
0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
|
||||
0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
|
||||
0x00f1: 0x0651, # ARABIC SHADDAH
|
||||
0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
|
||||
0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM
|
||||
0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM
|
||||
0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM
|
||||
0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM
|
||||
0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
|
||||
0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
|
||||
0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
|
||||
0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
|
||||
0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
|
||||
0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
|
||||
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP865.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x009e: 0x20a7, # PESETA SIGN
|
||||
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00a8: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00a9: 0x2310, # REVERSED NOT SIGN
|
||||
0x00aa: 0x00ac, # NOT SIGN
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00a4, # CURRENCY SIGN
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00e6: 0x00b5, # MICRO SIGN
|
||||
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00ec: 0x221e, # INFINITY
|
||||
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00ef: 0x2229, # INTERSECTION
|
||||
0x00f0: 0x2261, # IDENTICAL TO
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00f4: 0x2320, # TOP HALF INTEGRAL
|
||||
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x00f6: 0x00f7, # DIVISION SIGN
|
||||
0x00f7: 0x2248, # ALMOST EQUAL TO
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
0x00fd: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP866.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x258c, # LEFT HALF BLOCK
|
||||
0x00de: 0x2590, # RIGHT HALF BLOCK
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x2219, # BULLET OPERATOR
|
||||
0x00fa: 0x00b7, # MIDDLE DOT
|
||||
0x00fb: 0x221a, # SQUARE ROOT
|
||||
0x00fc: 0x2116, # NUMERO SIGN
|
||||
0x00fd: 0x00a4, # CURRENCY SIGN
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'CP869.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: None, # UNDEFINED
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: None, # UNDEFINED
|
||||
0x0083: None, # UNDEFINED
|
||||
0x0084: None, # UNDEFINED
|
||||
0x0085: None, # UNDEFINED
|
||||
0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0087: None, # UNDEFINED
|
||||
0x0088: 0x00b7, # MIDDLE DOT
|
||||
0x0089: 0x00ac, # NOT SIGN
|
||||
0x008a: 0x00a6, # BROKEN BAR
|
||||
0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x008e: 0x2015, # HORIZONTAL BAR
|
||||
0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x0093: None, # UNDEFINED
|
||||
0x0094: None, # UNDEFINED
|
||||
0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x0097: 0x00a9, # COPYRIGHT SIGN
|
||||
0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0099: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x009a: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x009c: 0x00a3, # POUND SIGN
|
||||
0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00b0: 0x2591, # LIGHT SHADE
|
||||
0x00b1: 0x2592, # MEDIUM SHADE
|
||||
0x00b2: 0x2593, # DARK SHADE
|
||||
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA
|
||||
0x00b7: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x00b8: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00bd: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00d7: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x00db: 0x2588, # FULL BLOCK
|
||||
0x00dc: 0x2584, # LOWER HALF BLOCK
|
||||
0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00df: 0x2580, # UPPER HALF BLOCK
|
||||
0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x00e1: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x00e2: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA
|
||||
0x00e6: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00e7: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00e8: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00ea: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00eb: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00ee: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00ef: 0x0384, # GREEK TONOS
|
||||
0x00f0: 0x00ad, # SOFT HYPHEN
|
||||
0x00f1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00f3: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00f4: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00f5: 0x00a7, # SECTION SIGN
|
||||
0x00f6: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00f7: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
0x00f8: 0x00b0, # DEGREE SIGN
|
||||
0x00f9: 0x00a8, # DIAERESIS
|
||||
0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00fe: 0x25a0, # BLACK SQUARE
|
||||
0x00ff: 0x00a0, # NO-BREAK SPACE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,173 @@
|
|||
""" Python Character Mapping Codec generated from 'CP874.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x20ac, # EURO SIGN
|
||||
0x0081: None, # UNDEFINED
|
||||
0x0082: None, # UNDEFINED
|
||||
0x0083: None, # UNDEFINED
|
||||
0x0084: None, # UNDEFINED
|
||||
0x0085: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x0086: None, # UNDEFINED
|
||||
0x0087: None, # UNDEFINED
|
||||
0x0088: None, # UNDEFINED
|
||||
0x0089: None, # UNDEFINED
|
||||
0x008a: None, # UNDEFINED
|
||||
0x008b: None, # UNDEFINED
|
||||
0x008c: None, # UNDEFINED
|
||||
0x008d: None, # UNDEFINED
|
||||
0x008e: None, # UNDEFINED
|
||||
0x008f: None, # UNDEFINED
|
||||
0x0090: None, # UNDEFINED
|
||||
0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x0095: 0x2022, # BULLET
|
||||
0x0096: 0x2013, # EN DASH
|
||||
0x0097: 0x2014, # EM DASH
|
||||
0x0098: None, # UNDEFINED
|
||||
0x0099: None, # UNDEFINED
|
||||
0x009a: None, # UNDEFINED
|
||||
0x009b: None, # UNDEFINED
|
||||
0x009c: None, # UNDEFINED
|
||||
0x009d: None, # UNDEFINED
|
||||
0x009e: None, # UNDEFINED
|
||||
0x009f: None, # UNDEFINED
|
||||
0x00a1: 0x0e01, # THAI CHARACTER KO KAI
|
||||
0x00a2: 0x0e02, # THAI CHARACTER KHO KHAI
|
||||
0x00a3: 0x0e03, # THAI CHARACTER KHO KHUAT
|
||||
0x00a4: 0x0e04, # THAI CHARACTER KHO KHWAI
|
||||
0x00a5: 0x0e05, # THAI CHARACTER KHO KHON
|
||||
0x00a6: 0x0e06, # THAI CHARACTER KHO RAKHANG
|
||||
0x00a7: 0x0e07, # THAI CHARACTER NGO NGU
|
||||
0x00a8: 0x0e08, # THAI CHARACTER CHO CHAN
|
||||
0x00a9: 0x0e09, # THAI CHARACTER CHO CHING
|
||||
0x00aa: 0x0e0a, # THAI CHARACTER CHO CHANG
|
||||
0x00ab: 0x0e0b, # THAI CHARACTER SO SO
|
||||
0x00ac: 0x0e0c, # THAI CHARACTER CHO CHOE
|
||||
0x00ad: 0x0e0d, # THAI CHARACTER YO YING
|
||||
0x00ae: 0x0e0e, # THAI CHARACTER DO CHADA
|
||||
0x00af: 0x0e0f, # THAI CHARACTER TO PATAK
|
||||
0x00b0: 0x0e10, # THAI CHARACTER THO THAN
|
||||
0x00b1: 0x0e11, # THAI CHARACTER THO NANGMONTHO
|
||||
0x00b2: 0x0e12, # THAI CHARACTER THO PHUTHAO
|
||||
0x00b3: 0x0e13, # THAI CHARACTER NO NEN
|
||||
0x00b4: 0x0e14, # THAI CHARACTER DO DEK
|
||||
0x00b5: 0x0e15, # THAI CHARACTER TO TAO
|
||||
0x00b6: 0x0e16, # THAI CHARACTER THO THUNG
|
||||
0x00b7: 0x0e17, # THAI CHARACTER THO THAHAN
|
||||
0x00b8: 0x0e18, # THAI CHARACTER THO THONG
|
||||
0x00b9: 0x0e19, # THAI CHARACTER NO NU
|
||||
0x00ba: 0x0e1a, # THAI CHARACTER BO BAIMAI
|
||||
0x00bb: 0x0e1b, # THAI CHARACTER PO PLA
|
||||
0x00bc: 0x0e1c, # THAI CHARACTER PHO PHUNG
|
||||
0x00bd: 0x0e1d, # THAI CHARACTER FO FA
|
||||
0x00be: 0x0e1e, # THAI CHARACTER PHO PHAN
|
||||
0x00bf: 0x0e1f, # THAI CHARACTER FO FAN
|
||||
0x00c0: 0x0e20, # THAI CHARACTER PHO SAMPHAO
|
||||
0x00c1: 0x0e21, # THAI CHARACTER MO MA
|
||||
0x00c2: 0x0e22, # THAI CHARACTER YO YAK
|
||||
0x00c3: 0x0e23, # THAI CHARACTER RO RUA
|
||||
0x00c4: 0x0e24, # THAI CHARACTER RU
|
||||
0x00c5: 0x0e25, # THAI CHARACTER LO LING
|
||||
0x00c6: 0x0e26, # THAI CHARACTER LU
|
||||
0x00c7: 0x0e27, # THAI CHARACTER WO WAEN
|
||||
0x00c8: 0x0e28, # THAI CHARACTER SO SALA
|
||||
0x00c9: 0x0e29, # THAI CHARACTER SO RUSI
|
||||
0x00ca: 0x0e2a, # THAI CHARACTER SO SUA
|
||||
0x00cb: 0x0e2b, # THAI CHARACTER HO HIP
|
||||
0x00cc: 0x0e2c, # THAI CHARACTER LO CHULA
|
||||
0x00cd: 0x0e2d, # THAI CHARACTER O ANG
|
||||
0x00ce: 0x0e2e, # THAI CHARACTER HO NOKHUK
|
||||
0x00cf: 0x0e2f, # THAI CHARACTER PAIYANNOI
|
||||
0x00d0: 0x0e30, # THAI CHARACTER SARA A
|
||||
0x00d1: 0x0e31, # THAI CHARACTER MAI HAN-AKAT
|
||||
0x00d2: 0x0e32, # THAI CHARACTER SARA AA
|
||||
0x00d3: 0x0e33, # THAI CHARACTER SARA AM
|
||||
0x00d4: 0x0e34, # THAI CHARACTER SARA I
|
||||
0x00d5: 0x0e35, # THAI CHARACTER SARA II
|
||||
0x00d6: 0x0e36, # THAI CHARACTER SARA UE
|
||||
0x00d7: 0x0e37, # THAI CHARACTER SARA UEE
|
||||
0x00d8: 0x0e38, # THAI CHARACTER SARA U
|
||||
0x00d9: 0x0e39, # THAI CHARACTER SARA UU
|
||||
0x00da: 0x0e3a, # THAI CHARACTER PHINTHU
|
||||
0x00db: None, # UNDEFINED
|
||||
0x00dc: None, # UNDEFINED
|
||||
0x00dd: None, # UNDEFINED
|
||||
0x00de: None, # UNDEFINED
|
||||
0x00df: 0x0e3f, # THAI CURRENCY SYMBOL BAHT
|
||||
0x00e0: 0x0e40, # THAI CHARACTER SARA E
|
||||
0x00e1: 0x0e41, # THAI CHARACTER SARA AE
|
||||
0x00e2: 0x0e42, # THAI CHARACTER SARA O
|
||||
0x00e3: 0x0e43, # THAI CHARACTER SARA AI MAIMUAN
|
||||
0x00e4: 0x0e44, # THAI CHARACTER SARA AI MAIMALAI
|
||||
0x00e5: 0x0e45, # THAI CHARACTER LAKKHANGYAO
|
||||
0x00e6: 0x0e46, # THAI CHARACTER MAIYAMOK
|
||||
0x00e7: 0x0e47, # THAI CHARACTER MAITAIKHU
|
||||
0x00e8: 0x0e48, # THAI CHARACTER MAI EK
|
||||
0x00e9: 0x0e49, # THAI CHARACTER MAI THO
|
||||
0x00ea: 0x0e4a, # THAI CHARACTER MAI TRI
|
||||
0x00eb: 0x0e4b, # THAI CHARACTER MAI CHATTAWA
|
||||
0x00ec: 0x0e4c, # THAI CHARACTER THANTHAKHAT
|
||||
0x00ed: 0x0e4d, # THAI CHARACTER NIKHAHIT
|
||||
0x00ee: 0x0e4e, # THAI CHARACTER YAMAKKAN
|
||||
0x00ef: 0x0e4f, # THAI CHARACTER FONGMAN
|
||||
0x00f0: 0x0e50, # THAI DIGIT ZERO
|
||||
0x00f1: 0x0e51, # THAI DIGIT ONE
|
||||
0x00f2: 0x0e52, # THAI DIGIT TWO
|
||||
0x00f3: 0x0e53, # THAI DIGIT THREE
|
||||
0x00f4: 0x0e54, # THAI DIGIT FOUR
|
||||
0x00f5: 0x0e55, # THAI DIGIT FIVE
|
||||
0x00f6: 0x0e56, # THAI DIGIT SIX
|
||||
0x00f7: 0x0e57, # THAI DIGIT SEVEN
|
||||
0x00f8: 0x0e58, # THAI DIGIT EIGHT
|
||||
0x00f9: 0x0e59, # THAI DIGIT NINE
|
||||
0x00fa: 0x0e5a, # THAI CHARACTER ANGKHANKHU
|
||||
0x00fb: 0x0e5b, # THAI CHARACTER KHOMUT
|
||||
0x00fc: None, # UNDEFINED
|
||||
0x00fd: None, # UNDEFINED
|
||||
0x00fe: None, # UNDEFINED
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,283 @@
|
|||
""" Python Character Mapping Codec generated from 'CP875.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0004: 0x009c, # CONTROL
|
||||
0x0005: 0x0009, # HORIZONTAL TABULATION
|
||||
0x0006: 0x0086, # CONTROL
|
||||
0x0007: 0x007f, # DELETE
|
||||
0x0008: 0x0097, # CONTROL
|
||||
0x0009: 0x008d, # CONTROL
|
||||
0x000a: 0x008e, # CONTROL
|
||||
0x0014: 0x009d, # CONTROL
|
||||
0x0015: 0x0085, # CONTROL
|
||||
0x0016: 0x0008, # BACKSPACE
|
||||
0x0017: 0x0087, # CONTROL
|
||||
0x001a: 0x0092, # CONTROL
|
||||
0x001b: 0x008f, # CONTROL
|
||||
0x0020: 0x0080, # CONTROL
|
||||
0x0021: 0x0081, # CONTROL
|
||||
0x0022: 0x0082, # CONTROL
|
||||
0x0023: 0x0083, # CONTROL
|
||||
0x0024: 0x0084, # CONTROL
|
||||
0x0025: 0x000a, # LINE FEED
|
||||
0x0026: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0027: 0x001b, # ESCAPE
|
||||
0x0028: 0x0088, # CONTROL
|
||||
0x0029: 0x0089, # CONTROL
|
||||
0x002a: 0x008a, # CONTROL
|
||||
0x002b: 0x008b, # CONTROL
|
||||
0x002c: 0x008c, # CONTROL
|
||||
0x002d: 0x0005, # ENQUIRY
|
||||
0x002e: 0x0006, # ACKNOWLEDGE
|
||||
0x002f: 0x0007, # BELL
|
||||
0x0030: 0x0090, # CONTROL
|
||||
0x0031: 0x0091, # CONTROL
|
||||
0x0032: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0033: 0x0093, # CONTROL
|
||||
0x0034: 0x0094, # CONTROL
|
||||
0x0035: 0x0095, # CONTROL
|
||||
0x0036: 0x0096, # CONTROL
|
||||
0x0037: 0x0004, # END OF TRANSMISSION
|
||||
0x0038: 0x0098, # CONTROL
|
||||
0x0039: 0x0099, # CONTROL
|
||||
0x003a: 0x009a, # CONTROL
|
||||
0x003b: 0x009b, # CONTROL
|
||||
0x003c: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x003d: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x003e: 0x009e, # CONTROL
|
||||
0x003f: 0x001a, # SUBSTITUTE
|
||||
0x0040: 0x0020, # SPACE
|
||||
0x0041: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0042: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x0043: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0044: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x0045: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0046: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x0047: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x0048: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x0049: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x004a: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x004b: 0x002e, # FULL STOP
|
||||
0x004c: 0x003c, # LESS-THAN SIGN
|
||||
0x004d: 0x0028, # LEFT PARENTHESIS
|
||||
0x004e: 0x002b, # PLUS SIGN
|
||||
0x004f: 0x0021, # EXCLAMATION MARK
|
||||
0x0050: 0x0026, # AMPERSAND
|
||||
0x0051: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x0052: 0x039b, # GREEK CAPITAL LETTER LAMDA
|
||||
0x0053: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x0054: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x0055: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x0056: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x0057: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x0058: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x0059: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x005a: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x005b: 0x0024, # DOLLAR SIGN
|
||||
0x005c: 0x002a, # ASTERISK
|
||||
0x005d: 0x0029, # RIGHT PARENTHESIS
|
||||
0x005e: 0x003b, # SEMICOLON
|
||||
0x005f: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x0060: 0x002d, # HYPHEN-MINUS
|
||||
0x0061: 0x002f, # SOLIDUS
|
||||
0x0062: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x0063: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x0064: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x0065: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x0066: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x0067: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x0068: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x0069: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x006a: 0x007c, # VERTICAL LINE
|
||||
0x006b: 0x002c, # COMMA
|
||||
0x006c: 0x0025, # PERCENT SIGN
|
||||
0x006d: 0x005f, # LOW LINE
|
||||
0x006e: 0x003e, # GREATER-THAN SIGN
|
||||
0x006f: 0x003f, # QUESTION MARK
|
||||
0x0070: 0x00a8, # DIAERESIS
|
||||
0x0071: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0072: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x0073: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x0074: 0x00a0, # NO-BREAK SPACE
|
||||
0x0075: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x0076: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x0077: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x0078: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0079: 0x0060, # GRAVE ACCENT
|
||||
0x007a: 0x003a, # COLON
|
||||
0x007b: 0x0023, # NUMBER SIGN
|
||||
0x007c: 0x0040, # COMMERCIAL AT
|
||||
0x007d: 0x0027, # APOSTROPHE
|
||||
0x007e: 0x003d, # EQUALS SIGN
|
||||
0x007f: 0x0022, # QUOTATION MARK
|
||||
0x0080: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
0x0081: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0082: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0083: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0084: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0085: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0086: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0087: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0088: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0089: 0x0069, # LATIN SMALL LETTER I
|
||||
0x008a: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x008b: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x008c: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x008d: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x008e: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x008f: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x0090: 0x00b0, # DEGREE SIGN
|
||||
0x0091: 0x006a, # LATIN SMALL LETTER J
|
||||
0x0092: 0x006b, # LATIN SMALL LETTER K
|
||||
0x0093: 0x006c, # LATIN SMALL LETTER L
|
||||
0x0094: 0x006d, # LATIN SMALL LETTER M
|
||||
0x0095: 0x006e, # LATIN SMALL LETTER N
|
||||
0x0096: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0097: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0098: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0099: 0x0072, # LATIN SMALL LETTER R
|
||||
0x009a: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x009b: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x009c: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x009d: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x009e: 0x03bb, # GREEK SMALL LETTER LAMDA
|
||||
0x009f: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00a0: 0x00b4, # ACUTE ACCENT
|
||||
0x00a1: 0x007e, # TILDE
|
||||
0x00a2: 0x0073, # LATIN SMALL LETTER S
|
||||
0x00a3: 0x0074, # LATIN SMALL LETTER T
|
||||
0x00a4: 0x0075, # LATIN SMALL LETTER U
|
||||
0x00a5: 0x0076, # LATIN SMALL LETTER V
|
||||
0x00a6: 0x0077, # LATIN SMALL LETTER W
|
||||
0x00a7: 0x0078, # LATIN SMALL LETTER X
|
||||
0x00a8: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x00a9: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x00aa: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00ab: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00ac: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00ad: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00ae: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00af: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00b0: 0x00a3, # POUND SIGN
|
||||
0x00b1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x00b2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x00b3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x00b4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00b5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00b6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00b7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00b8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00b9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00ba: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00bb: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00bc: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00bd: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00be: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00bf: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00c0: 0x007b, # LEFT CURLY BRACKET
|
||||
0x00c1: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x00c2: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x00c3: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x00c4: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x00c5: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x00c6: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x00c7: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x00c8: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x00c9: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x00ca: 0x00ad, # SOFT HYPHEN
|
||||
0x00cb: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00cc: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00cd: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00ce: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00cf: 0x2015, # HORIZONTAL BAR
|
||||
0x00d0: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x00d1: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x00d2: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x00d3: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x00d4: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x00d5: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x00d6: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x00d7: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x00d8: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x00d9: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x00da: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00db: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00dc: 0x001a, # SUBSTITUTE
|
||||
0x00dd: 0x0387, # GREEK ANO TELEIA
|
||||
0x00de: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00df: 0x00a6, # BROKEN BAR
|
||||
0x00e0: 0x005c, # REVERSE SOLIDUS
|
||||
0x00e1: 0x001a, # SUBSTITUTE
|
||||
0x00e2: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x00e3: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x00e4: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x00e5: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x00e6: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x00e7: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x00e8: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x00e9: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x00ea: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00eb: 0x00a7, # SECTION SIGN
|
||||
0x00ec: 0x001a, # SUBSTITUTE
|
||||
0x00ed: 0x001a, # SUBSTITUTE
|
||||
0x00ee: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ef: 0x00ac, # NOT SIGN
|
||||
0x00f0: 0x0030, # DIGIT ZERO
|
||||
0x00f1: 0x0031, # DIGIT ONE
|
||||
0x00f2: 0x0032, # DIGIT TWO
|
||||
0x00f3: 0x0033, # DIGIT THREE
|
||||
0x00f4: 0x0034, # DIGIT FOUR
|
||||
0x00f5: 0x0035, # DIGIT FIVE
|
||||
0x00f6: 0x0036, # DIGIT SIX
|
||||
0x00f7: 0x0037, # DIGIT SEVEN
|
||||
0x00f8: 0x0038, # DIGIT EIGHT
|
||||
0x00f9: 0x0039, # DIGIT NINE
|
||||
0x00fa: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00fb: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00fc: 0x001a, # SUBSTITUTE
|
||||
0x00fd: 0x001a, # SUBSTITUTE
|
||||
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ff: 0x009f, # CONTROL
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,49 @@
|
|||
""" Python Character Mapping Codec generated from '8859-1.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
|
||||
def __init__(self,stream,errors='strict'):
|
||||
|
||||
codecs.StreamWriter.__init__(self,strict,errors)
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,92 @@
|
|||
""" Python Character Mapping Codec generated from '8859-10.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00a4: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE
|
||||
0x00a6: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00a8: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00a9: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00aa: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00ab: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0x00ac: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00ae: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00af: 0x014a, # LATIN CAPITAL LETTER ENG
|
||||
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00b2: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x00b3: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE
|
||||
0x00b6: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00b8: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00b9: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00ba: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00bb: 0x0167, # LATIN SMALL LETTER T WITH STROKE
|
||||
0x00bc: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00bd: 0x2015, # HORIZONTAL BAR
|
||||
0x00be: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00bf: 0x014b, # LATIN SMALL LETTER ENG
|
||||
0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00d7: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE
|
||||
0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,102 @@
|
|||
""" Python Character Mapping Codec generated from '8859-13.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00aa: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x00af: 0x00c6, # LATIN CAPITAL LETTER AE
|
||||
0x00b4: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00b8: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00ba: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x00bf: 0x00e6, # LATIN SMALL LETTER AE
|
||||
0x00c0: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00c1: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00c2: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x00c3: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x00c6: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00c7: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x00cb: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x00cc: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00cd: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00ce: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00cf: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00d0: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00d2: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00d4: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00d8: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00d9: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00da: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x00db: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00dd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00de: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00e0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00e1: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00e2: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x00e3: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x00e6: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00e7: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00eb: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x00ec: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x00ed: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00ee: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x00ef: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00f0: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00f2: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00f4: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x00f8: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00f9: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x00fa: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x00fb: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,77 @@
|
|||
""" Python Character Mapping Codec generated from '8859-14.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0x00a5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0x00a6: 0x1e0a, # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
0x00a8: 0x1e80, # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
0x00aa: 0x1e82, # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
0x00ab: 0x1e0b, # LATIN SMALL LETTER D WITH DOT ABOVE
|
||||
0x00ac: 0x1ef2, # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
0x00af: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00b0: 0x1e1e, # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
0x00b1: 0x1e1f, # LATIN SMALL LETTER F WITH DOT ABOVE
|
||||
0x00b2: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0x00b3: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0x00b4: 0x1e40, # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
0x00b5: 0x1e41, # LATIN SMALL LETTER M WITH DOT ABOVE
|
||||
0x00b7: 0x1e56, # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
0x00b8: 0x1e81, # LATIN SMALL LETTER W WITH GRAVE
|
||||
0x00b9: 0x1e57, # LATIN SMALL LETTER P WITH DOT ABOVE
|
||||
0x00ba: 0x1e83, # LATIN SMALL LETTER W WITH ACUTE
|
||||
0x00bb: 0x1e60, # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
0x00bc: 0x1ef3, # LATIN SMALL LETTER Y WITH GRAVE
|
||||
0x00bd: 0x1e84, # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
0x00be: 0x1e85, # LATIN SMALL LETTER W WITH DIAERESIS
|
||||
0x00bf: 0x1e61, # LATIN SMALL LETTER S WITH DOT ABOVE
|
||||
0x00d0: 0x0174, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0x00d7: 0x1e6a, # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
0x00de: 0x0176, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,54 @@
|
|||
""" Python Character Mapping Codec generated from '8859-15.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a4: 0x20ac, # EURO SIGN
|
||||
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00b4: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00b8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,103 @@
|
|||
""" Python Character Mapping Codec generated from '8859-2.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x02d8, # BREVE
|
||||
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00a5: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x00a6: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00ab: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x00ac: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00b2: 0x02db, # OGONEK
|
||||
0x00b3: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x00b5: 0x013e, # LATIN SMALL LETTER L WITH CARON
|
||||
0x00b6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x00b7: 0x02c7, # CARON
|
||||
0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00bb: 0x0165, # LATIN SMALL LETTER T WITH CARON
|
||||
0x00bc: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x00bd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00c0: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x00c3: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x00c5: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x00c6: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00cc: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x00cf: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00d1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00d2: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x00d5: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x00d8: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x00d9: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x00db: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x00de: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0x00e0: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x00e3: 0x0103, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x00e5: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x00e6: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00ec: 0x011b, # LATIN SMALL LETTER E WITH CARON
|
||||
0x00ef: 0x010f, # LATIN SMALL LETTER D WITH CARON
|
||||
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00f1: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00f2: 0x0148, # LATIN SMALL LETTER N WITH CARON
|
||||
0x00f5: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x00f8: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||
0x00f9: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,74 @@
|
|||
""" Python Character Mapping Codec generated from '8859-3.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0x00a2: 0x02d8, # BREVE
|
||||
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
|
||||
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||
0x00b9: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
|
||||
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
|
||||
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,96 @@
|
|||
""" Python Character Mapping Codec generated from '8859-4.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
|
||||
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x00a5: 0x0128, # LATIN CAPITAL LETTER I WITH TILDE
|
||||
0x00a6: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00a9: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00aa: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x00ab: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00ac: 0x0166, # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0x00ae: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00b1: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x00b2: 0x02db, # OGONEK
|
||||
0x00b3: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x00b5: 0x0129, # LATIN SMALL LETTER I WITH TILDE
|
||||
0x00b6: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00b7: 0x02c7, # CARON
|
||||
0x00b9: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00ba: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x00bb: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x00bc: 0x0167, # LATIN SMALL LETTER T WITH STROKE
|
||||
0x00bd: 0x014a, # LATIN CAPITAL LETTER ENG
|
||||
0x00be: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00bf: 0x014b, # LATIN SMALL LETTER ENG
|
||||
0x00c0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x00c7: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00c8: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x00ca: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00cc: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x00cf: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00d0: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x00d1: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00d2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00d3: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00d9: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00dd: 0x0168, # LATIN CAPITAL LETTER U WITH TILDE
|
||||
0x00de: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00e0: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x00e7: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00e8: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x00ea: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00ec: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x00ef: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x00f0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x00f1: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00f2: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x00f3: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00ff: 0x02d9, # DOT ABOVE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,140 @@
|
|||
""" Python Character Mapping Codec generated from '8859-5.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x00a4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00a5: 0x0405, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x00a6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00a7: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00a8: 0x0408, # CYRILLIC CAPITAL LETTER JE
|
||||
0x00a9: 0x0409, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x00aa: 0x040a, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x00ab: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x00ac: 0x040c, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x00ae: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x00af: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x00b0: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x00b1: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x00b2: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x00b3: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x00b4: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x00b5: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x00b6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x00b7: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x00b9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x00ba: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x00bb: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x00bc: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x00bd: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x00be: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x00bf: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x00c0: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x00c1: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x00c2: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x00c3: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x00c4: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x00c5: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x00c6: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x00c7: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x00c8: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x00c9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x00ca: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x00cb: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x00cc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x00cd: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x00ce: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x00cf: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00d0: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00d1: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00d2: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00d3: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00d4: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00d5: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00d7: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00d8: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00d9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00da: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00db: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00dc: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00dd: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00de: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00df: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00f0: 0x2116, # NUMERO SIGN
|
||||
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00f2: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||
0x00f3: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||
0x00f4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00f5: 0x0455, # CYRILLIC SMALL LETTER DZE
|
||||
0x00f6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00f7: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00f8: 0x0458, # CYRILLIC SMALL LETTER JE
|
||||
0x00f9: 0x0459, # CYRILLIC SMALL LETTER LJE
|
||||
0x00fa: 0x045a, # CYRILLIC SMALL LETTER NJE
|
||||
0x00fb: 0x045b, # CYRILLIC SMALL LETTER TSHE
|
||||
0x00fc: 0x045c, # CYRILLIC SMALL LETTER KJE
|
||||
0x00fd: 0x00a7, # SECTION SIGN
|
||||
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,94 @@
|
|||
""" Python Character Mapping Codec generated from '8859-6.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00ac: 0x060c, # ARABIC COMMA
|
||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x00c7: 0x0627, # ARABIC LETTER ALEF
|
||||
0x00c8: 0x0628, # ARABIC LETTER BEH
|
||||
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
|
||||
0x00ca: 0x062a, # ARABIC LETTER TEH
|
||||
0x00cb: 0x062b, # ARABIC LETTER THEH
|
||||
0x00cc: 0x062c, # ARABIC LETTER JEEM
|
||||
0x00cd: 0x062d, # ARABIC LETTER HAH
|
||||
0x00ce: 0x062e, # ARABIC LETTER KHAH
|
||||
0x00cf: 0x062f, # ARABIC LETTER DAL
|
||||
0x00d0: 0x0630, # ARABIC LETTER THAL
|
||||
0x00d1: 0x0631, # ARABIC LETTER REH
|
||||
0x00d2: 0x0632, # ARABIC LETTER ZAIN
|
||||
0x00d3: 0x0633, # ARABIC LETTER SEEN
|
||||
0x00d4: 0x0634, # ARABIC LETTER SHEEN
|
||||
0x00d5: 0x0635, # ARABIC LETTER SAD
|
||||
0x00d6: 0x0636, # ARABIC LETTER DAD
|
||||
0x00d7: 0x0637, # ARABIC LETTER TAH
|
||||
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
||||
0x00d9: 0x0639, # ARABIC LETTER AIN
|
||||
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||
0x00e1: 0x0641, # ARABIC LETTER FEH
|
||||
0x00e2: 0x0642, # ARABIC LETTER QAF
|
||||
0x00e3: 0x0643, # ARABIC LETTER KAF
|
||||
0x00e4: 0x0644, # ARABIC LETTER LAM
|
||||
0x00e5: 0x0645, # ARABIC LETTER MEEM
|
||||
0x00e6: 0x0646, # ARABIC LETTER NOON
|
||||
0x00e7: 0x0647, # ARABIC LETTER HEH
|
||||
0x00e8: 0x0648, # ARABIC LETTER WAW
|
||||
0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA
|
||||
0x00ea: 0x064a, # ARABIC LETTER YEH
|
||||
0x00eb: 0x064b, # ARABIC FATHATAN
|
||||
0x00ec: 0x064c, # ARABIC DAMMATAN
|
||||
0x00ed: 0x064d, # ARABIC KASRATAN
|
||||
0x00ee: 0x064e, # ARABIC FATHA
|
||||
0x00ef: 0x064f, # ARABIC DAMMA
|
||||
0x00f0: 0x0650, # ARABIC KASRA
|
||||
0x00f1: 0x0651, # ARABIC SHADDA
|
||||
0x00f2: 0x0652, # ARABIC SUKUN
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,120 @@
|
|||
""" Python Character Mapping Codec generated from '8859-7.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00af: 0x2015, # HORIZONTAL BAR
|
||||
0x00b4: 0x0384, # GREEK TONOS
|
||||
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
0x00b6: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x00b8: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x00b9: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x00ba: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x00bc: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x00be: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x00bf: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x00c0: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00c1: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x00c2: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x00c3: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00c4: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x00c5: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x00c6: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x00c7: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x00c8: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00c9: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x00ca: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x00cb: 0x039b, # GREEK CAPITAL LETTER LAMDA
|
||||
0x00cc: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x00cd: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x00ce: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x00d6: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00d7: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x00d8: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x00d9: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00da: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x00db: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00dc: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x00dd: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x00de: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x00df: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00e0: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x00e3: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00e6: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x00e7: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x00e8: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x00ea: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x00eb: 0x03bb, # GREEK SMALL LETTER LAMDA
|
||||
0x00ec: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00ed: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00ee: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00f1: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00f2: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00f5: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00f6: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00f7: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00f8: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00f9: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00fa: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,77 @@
|
|||
""" Python Character Mapping Codec generated from '8859-8.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00af: 0x203e, # OVERLINE
|
||||
0x00ba: 0x00f7, # DIVISION SIGN
|
||||
0x00df: 0x2017, # DOUBLE LOW LINE
|
||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x00e3: 0x05d3, # HEBREW LETTER DALET
|
||||
0x00e4: 0x05d4, # HEBREW LETTER HE
|
||||
0x00e5: 0x05d5, # HEBREW LETTER VAV
|
||||
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x00e7: 0x05d7, # HEBREW LETTER HET
|
||||
0x00e8: 0x05d8, # HEBREW LETTER TET
|
||||
0x00e9: 0x05d9, # HEBREW LETTER YOD
|
||||
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x00eb: 0x05db, # HEBREW LETTER KAF
|
||||
0x00ec: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x00ee: 0x05de, # HEBREW LETTER MEM
|
||||
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x00f0: 0x05e0, # HEBREW LETTER NUN
|
||||
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x00f2: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x00f4: 0x05e4, # HEBREW LETTER PE
|
||||
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x00f6: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x00f7: 0x05e7, # HEBREW LETTER QOF
|
||||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,52 @@
|
|||
""" Python Character Mapping Codec generated from '8859-9.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,174 @@
|
|||
""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x0083: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x0084: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x0085: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x0086: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x0087: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x0088: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x0089: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x008a: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x008b: 0x2580, # UPPER HALF BLOCK
|
||||
0x008c: 0x2584, # LOWER HALF BLOCK
|
||||
0x008d: 0x2588, # FULL BLOCK
|
||||
0x008e: 0x258c, # LEFT HALF BLOCK
|
||||
0x008f: 0x2590, # RIGHT HALF BLOCK
|
||||
0x0090: 0x2591, # LIGHT SHADE
|
||||
0x0091: 0x2592, # MEDIUM SHADE
|
||||
0x0092: 0x2593, # DARK SHADE
|
||||
0x0093: 0x2320, # TOP HALF INTEGRAL
|
||||
0x0094: 0x25a0, # BLACK SQUARE
|
||||
0x0095: 0x2219, # BULLET OPERATOR
|
||||
0x0096: 0x221a, # SQUARE ROOT
|
||||
0x0097: 0x2248, # ALMOST EQUAL TO
|
||||
0x0098: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x0099: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x009a: 0x00a0, # NO-BREAK SPACE
|
||||
0x009b: 0x2321, # BOTTOM HALF INTEGRAL
|
||||
0x009c: 0x00b0, # DEGREE SIGN
|
||||
0x009d: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x009e: 0x00b7, # MIDDLE DOT
|
||||
0x009f: 0x00f7, # DIVISION SIGN
|
||||
0x00a0: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x00a1: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x00a2: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x00a3: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00a4: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x00a5: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x00a6: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x00a7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x00a8: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x00a9: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x00aa: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x00ab: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x00ac: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x00ad: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x00ae: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x00af: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x00b0: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x00b1: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x00b2: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x00b3: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00b4: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x00b5: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x00b6: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00b7: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00b8: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x00b9: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00ba: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00bb: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x00bc: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x00bd: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x00be: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x00bf: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00c0: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00c1: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00c2: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00c3: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00c4: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00c5: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00c6: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00c7: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00c8: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00c9: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00ca: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00cb: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00cc: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00cd: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00ce: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00cf: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00d0: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00d1: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00d2: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00d3: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00d4: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00d5: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00d6: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00d7: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00d8: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00d9: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00da: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00db: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00dc: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00dd: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00de: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00df: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x00e0: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x00e1: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x00e2: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x00e3: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x00e4: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x00e5: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x00e6: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x00e7: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x00e8: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x00e9: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x00ea: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x00eb: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x00ec: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x00ed: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x00ee: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x00ef: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x00f0: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x00f1: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00f2: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x00f3: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x00f4: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x00f5: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x00f6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x00f7: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x00f8: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x00f9: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x00fa: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x00fb: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x00fc: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,35 @@
|
|||
""" Python 'latin-1' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.latin_1_encode
|
||||
decode = codecs.latin_1_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
class StreamConverter(StreamWriter,StreamReader):
|
||||
|
||||
encode = codecs.latin_1_decode
|
||||
decode = codecs.latin_1_encode
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
|
@ -0,0 +1,169 @@
|
|||
""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
|
||||
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
|
||||
0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
|
||||
0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
|
||||
0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
|
||||
0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
|
||||
0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
|
||||
0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
|
||||
0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
|
||||
0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
|
||||
0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
|
||||
0x00a0: 0x2020, # DAGGER
|
||||
0x00a1: 0x00b0, # DEGREE SIGN
|
||||
0x00a4: 0x00a7, # SECTION SIGN
|
||||
0x00a5: 0x2022, # BULLET
|
||||
0x00a6: 0x00b6, # PILCROW SIGN
|
||||
0x00a7: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x2122, # TRADE MARK SIGN
|
||||
0x00ab: 0x0402, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x00ac: 0x0452, # CYRILLIC SMALL LETTER DJE
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x0403, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x00af: 0x0453, # CYRILLIC SMALL LETTER GJE
|
||||
0x00b0: 0x221e, # INFINITY
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
|
||||
0x00b7: 0x0408, # CYRILLIC CAPITAL LETTER JE
|
||||
0x00b8: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00b9: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00ba: 0x0407, # CYRILLIC CAPITAL LETTER YI
|
||||
0x00bb: 0x0457, # CYRILLIC SMALL LETTER YI
|
||||
0x00bc: 0x0409, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x00bd: 0x0459, # CYRILLIC SMALL LETTER LJE
|
||||
0x00be: 0x040a, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x00bf: 0x045a, # CYRILLIC SMALL LETTER NJE
|
||||
0x00c0: 0x0458, # CYRILLIC SMALL LETTER JE
|
||||
0x00c1: 0x0405, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x221a, # SQUARE ROOT
|
||||
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00c5: 0x2248, # ALMOST EQUAL TO
|
||||
0x00c6: 0x2206, # INCREMENT
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x00cc: 0x045b, # CYRILLIC SMALL LETTER TSHE
|
||||
0x00cd: 0x040c, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x00ce: 0x045c, # CYRILLIC SMALL LETTER KJE
|
||||
0x00cf: 0x0455, # CYRILLIC SMALL LETTER DZE
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2014, # EM DASH
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00d8: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x00d9: 0x045e, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x00da: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x00db: 0x045f, # CYRILLIC SMALL LETTER DZHE
|
||||
0x00dc: 0x2116, # NUMERO SIGN
|
||||
0x00dd: 0x0401, # CYRILLIC CAPITAL LETTER IO
|
||||
0x00de: 0x0451, # CYRILLIC SMALL LETTER IO
|
||||
0x00df: 0x044f, # CYRILLIC SMALL LETTER YA
|
||||
0x00e0: 0x0430, # CYRILLIC SMALL LETTER A
|
||||
0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE
|
||||
0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE
|
||||
0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE
|
||||
0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE
|
||||
0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE
|
||||
0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE
|
||||
0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE
|
||||
0x00e8: 0x0438, # CYRILLIC SMALL LETTER I
|
||||
0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA
|
||||
0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL
|
||||
0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM
|
||||
0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN
|
||||
0x00ee: 0x043e, # CYRILLIC SMALL LETTER O
|
||||
0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE
|
||||
0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER
|
||||
0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES
|
||||
0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE
|
||||
0x00f3: 0x0443, # CYRILLIC SMALL LETTER U
|
||||
0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF
|
||||
0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA
|
||||
0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE
|
||||
0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE
|
||||
0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA
|
||||
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU
|
||||
0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
|
||||
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
|
||||
0x00ff: 0x00a4, # CURRENCY SIGN
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,172 @@
|
|||
""" Python Character Mapping Codec generated from 'GREEK.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x0082: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0084: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0087: 0x0385, # GREEK DIALYTIKA TONOS
|
||||
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x008b: 0x0384, # GREEK TONOS
|
||||
0x008c: 0x00a8, # DIAERESIS
|
||||
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0092: 0x00a3, # POUND SIGN
|
||||
0x0093: 0x2122, # TRADE MARK SIGN
|
||||
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0096: 0x2022, # BULLET
|
||||
0x0097: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x0098: 0x2030, # PER MILLE SIGN
|
||||
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x009b: 0x00a6, # BROKEN BAR
|
||||
0x009c: 0x00ad, # SOFT HYPHEN
|
||||
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00a0: 0x2020, # DAGGER
|
||||
0x00a1: 0x0393, # GREEK CAPITAL LETTER GAMMA
|
||||
0x00a2: 0x0394, # GREEK CAPITAL LETTER DELTA
|
||||
0x00a3: 0x0398, # GREEK CAPITAL LETTER THETA
|
||||
0x00a4: 0x039b, # GREEK CAPITAL LETTER LAMBDA
|
||||
0x00a5: 0x039e, # GREEK CAPITAL LETTER XI
|
||||
0x00a6: 0x03a0, # GREEK CAPITAL LETTER PI
|
||||
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x03a3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x00ab: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x00ac: 0x00a7, # SECTION SIGN
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x00b0, # DEGREE SIGN
|
||||
0x00af: 0x0387, # GREEK ANO TELEIA
|
||||
0x00b0: 0x0391, # GREEK CAPITAL LETTER ALPHA
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x00a5, # YEN SIGN
|
||||
0x00b5: 0x0392, # GREEK CAPITAL LETTER BETA
|
||||
0x00b6: 0x0395, # GREEK CAPITAL LETTER EPSILON
|
||||
0x00b7: 0x0396, # GREEK CAPITAL LETTER ZETA
|
||||
0x00b8: 0x0397, # GREEK CAPITAL LETTER ETA
|
||||
0x00b9: 0x0399, # GREEK CAPITAL LETTER IOTA
|
||||
0x00ba: 0x039a, # GREEK CAPITAL LETTER KAPPA
|
||||
0x00bb: 0x039c, # GREEK CAPITAL LETTER MU
|
||||
0x00bc: 0x03a6, # GREEK CAPITAL LETTER PHI
|
||||
0x00bd: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00be: 0x03a8, # GREEK CAPITAL LETTER PSI
|
||||
0x00bf: 0x03a9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x00c0: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x00c1: 0x039d, # GREEK CAPITAL LETTER NU
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x039f, # GREEK CAPITAL LETTER OMICRON
|
||||
0x00c4: 0x03a1, # GREEK CAPITAL LETTER RHO
|
||||
0x00c5: 0x2248, # ALMOST EQUAL TO
|
||||
0x00c6: 0x03a4, # GREEK CAPITAL LETTER TAU
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x03a5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x00cc: 0x03a7, # GREEK CAPITAL LETTER CHI
|
||||
0x00cd: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x00ce: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2015, # HORIZONTAL BAR
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x00d8: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x00d9: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x00da: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x00db: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x00dc: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x00dd: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x00de: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x00df: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x00e0: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x00e1: 0x03b1, # GREEK SMALL LETTER ALPHA
|
||||
0x00e2: 0x03b2, # GREEK SMALL LETTER BETA
|
||||
0x00e3: 0x03c8, # GREEK SMALL LETTER PSI
|
||||
0x00e4: 0x03b4, # GREEK SMALL LETTER DELTA
|
||||
0x00e5: 0x03b5, # GREEK SMALL LETTER EPSILON
|
||||
0x00e6: 0x03c6, # GREEK SMALL LETTER PHI
|
||||
0x00e7: 0x03b3, # GREEK SMALL LETTER GAMMA
|
||||
0x00e8: 0x03b7, # GREEK SMALL LETTER ETA
|
||||
0x00e9: 0x03b9, # GREEK SMALL LETTER IOTA
|
||||
0x00ea: 0x03be, # GREEK SMALL LETTER XI
|
||||
0x00eb: 0x03ba, # GREEK SMALL LETTER KAPPA
|
||||
0x00ec: 0x03bb, # GREEK SMALL LETTER LAMBDA
|
||||
0x00ed: 0x03bc, # GREEK SMALL LETTER MU
|
||||
0x00ee: 0x03bd, # GREEK SMALL LETTER NU
|
||||
0x00ef: 0x03bf, # GREEK SMALL LETTER OMICRON
|
||||
0x00f0: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00f1: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x00f2: 0x03c1, # GREEK SMALL LETTER RHO
|
||||
0x00f3: 0x03c3, # GREEK SMALL LETTER SIGMA
|
||||
0x00f4: 0x03c4, # GREEK SMALL LETTER TAU
|
||||
0x00f5: 0x03b8, # GREEK SMALL LETTER THETA
|
||||
0x00f6: 0x03c9, # GREEK SMALL LETTER OMEGA
|
||||
0x00f7: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x00f8: 0x03c7, # GREEK SMALL LETTER CHI
|
||||
0x00f9: 0x03c5, # GREEK SMALL LETTER UPSILON
|
||||
0x00fa: 0x03b6, # GREEK SMALL LETTER ZETA
|
||||
0x00fb: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x00fc: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x00ff: None, # UNDEFINED
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,168 @@
|
|||
""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00a0: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00a1: 0x00b0, # DEGREE SIGN
|
||||
0x00a4: 0x00a7, # SECTION SIGN
|
||||
0x00a5: 0x2022, # BULLET
|
||||
0x00a6: 0x00b6, # PILCROW SIGN
|
||||
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x2122, # TRADE MARK SIGN
|
||||
0x00ab: 0x00b4, # ACUTE ACCENT
|
||||
0x00ac: 0x00a8, # DIAERESIS
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00b0: 0x221e, # INFINITY
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x00a5, # YEN SIGN
|
||||
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
|
||||
0x00b7: 0x2211, # N-ARY SUMMATION
|
||||
0x00b8: 0x220f, # N-ARY PRODUCT
|
||||
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00ba: 0x222b, # INTEGRAL
|
||||
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00bd: 0x2126, # OHM SIGN
|
||||
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00c0: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x221a, # SQUARE ROOT
|
||||
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00c5: 0x2248, # ALMOST EQUAL TO
|
||||
0x00c6: 0x2206, # INCREMENT
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2014, # EM DASH
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x25ca, # LOZENGE
|
||||
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00da: 0x2044, # FRACTION SLASH
|
||||
0x00db: 0x00a4, # CURRENCY SIGN
|
||||
0x00dc: 0x00d0, # LATIN CAPITAL LETTER ETH
|
||||
0x00dd: 0x00f0, # LATIN SMALL LETTER ETH
|
||||
0x00df: 0x00fe, # LATIN SMALL LETTER THORN
|
||||
0x00e0: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00e1: 0x00b7, # MIDDLE DOT
|
||||
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00e4: 0x2030, # PER MILLE SIGN
|
||||
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00f0: None, # UNDEFINED
|
||||
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x00f7: 0x02dc, # SMALL TILDE
|
||||
0x00f8: 0x00af, # MACRON
|
||||
0x00f9: 0x02d8, # BREVE
|
||||
0x00fa: 0x02d9, # DOT ABOVE
|
||||
0x00fb: 0x02da, # RING ABOVE
|
||||
0x00fc: 0x00b8, # CEDILLA
|
||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,172 @@
|
|||
""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON
|
||||
0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON
|
||||
0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON
|
||||
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00a0: 0x2020, # DAGGER
|
||||
0x00a1: 0x00b0, # DEGREE SIGN
|
||||
0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x00a4: 0x00a7, # SECTION SIGN
|
||||
0x00a5: 0x2022, # BULLET
|
||||
0x00a6: 0x00b6, # PILCROW SIGN
|
||||
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x2122, # TRADE MARK SIGN
|
||||
0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x00ac: 0x00a8, # DIAERESIS
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
|
||||
0x00b7: 0x2211, # N-ARY SUMMATION
|
||||
0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON
|
||||
0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x221a, # SQUARE ROOT
|
||||
0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x00c6: 0x2206, # INCREMENT
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON
|
||||
0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2014, # EM DASH
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x25ca, # LOZENGE
|
||||
0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON
|
||||
0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON
|
||||
0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON
|
||||
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,169 @@
|
|||
""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00a0: 0x2020, # DAGGER
|
||||
0x00a1: 0x00b0, # DEGREE SIGN
|
||||
0x00a4: 0x00a7, # SECTION SIGN
|
||||
0x00a5: 0x2022, # BULLET
|
||||
0x00a6: 0x00b6, # PILCROW SIGN
|
||||
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x2122, # TRADE MARK SIGN
|
||||
0x00ab: 0x00b4, # ACUTE ACCENT
|
||||
0x00ac: 0x00a8, # DIAERESIS
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00b0: 0x221e, # INFINITY
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x00a5, # YEN SIGN
|
||||
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
|
||||
0x00b7: 0x2211, # N-ARY SUMMATION
|
||||
0x00b8: 0x220f, # N-ARY PRODUCT
|
||||
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00ba: 0x222b, # INTEGRAL
|
||||
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00bd: 0x2126, # OHM SIGN
|
||||
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00c0: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x221a, # SQUARE ROOT
|
||||
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00c5: 0x2248, # ALMOST EQUAL TO
|
||||
0x00c6: 0x2206, # INCREMENT
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2014, # EM DASH
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x25ca, # LOZENGE
|
||||
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00da: 0x2044, # FRACTION SLASH
|
||||
0x00db: 0x00a4, # CURRENCY SIGN
|
||||
0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x00de: 0xfb01, # LATIN SMALL LIGATURE FI
|
||||
0x00df: 0xfb02, # LATIN SMALL LIGATURE FL
|
||||
0x00e0: 0x2021, # DOUBLE DAGGER
|
||||
0x00e1: 0x00b7, # MIDDLE DOT
|
||||
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00e4: 0x2030, # PER MILLE SIGN
|
||||
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00f0: None, # UNDEFINED
|
||||
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00f5: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x00f7: 0x02dc, # SMALL TILDE
|
||||
0x00f8: 0x00af, # MACRON
|
||||
0x00f9: 0x02d8, # BREVE
|
||||
0x00fa: 0x02d9, # DOT ABOVE
|
||||
0x00fb: 0x02da, # RING ABOVE
|
||||
0x00fc: 0x00b8, # CEDILLA
|
||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,169 @@
|
|||
""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = {
|
||||
|
||||
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x008b: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x008c: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x0093: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x0098: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00a0: 0x2020, # DAGGER
|
||||
0x00a1: 0x00b0, # DEGREE SIGN
|
||||
0x00a4: 0x00a7, # SECTION SIGN
|
||||
0x00a5: 0x2022, # BULLET
|
||||
0x00a6: 0x00b6, # PILCROW SIGN
|
||||
0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
|
||||
0x00a8: 0x00ae, # REGISTERED SIGN
|
||||
0x00aa: 0x2122, # TRADE MARK SIGN
|
||||
0x00ab: 0x00b4, # ACUTE ACCENT
|
||||
0x00ac: 0x00a8, # DIAERESIS
|
||||
0x00ad: 0x2260, # NOT EQUAL TO
|
||||
0x00ae: 0x00c6, # LATIN CAPITAL LIGATURE AE
|
||||
0x00af: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00b0: 0x221e, # INFINITY
|
||||
0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
|
||||
0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
|
||||
0x00b4: 0x00a5, # YEN SIGN
|
||||
0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
|
||||
0x00b7: 0x2211, # N-ARY SUMMATION
|
||||
0x00b8: 0x220f, # N-ARY PRODUCT
|
||||
0x00b9: 0x03c0, # GREEK SMALL LETTER PI
|
||||
0x00ba: 0x222b, # INTEGRAL
|
||||
0x00bb: 0x00aa, # FEMININE ORDINAL INDICATOR
|
||||
0x00bc: 0x00ba, # MASCULINE ORDINAL INDICATOR
|
||||
0x00bd: 0x2126, # OHM SIGN
|
||||
0x00be: 0x00e6, # LATIN SMALL LIGATURE AE
|
||||
0x00bf: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00c0: 0x00bf, # INVERTED QUESTION MARK
|
||||
0x00c1: 0x00a1, # INVERTED EXCLAMATION MARK
|
||||
0x00c2: 0x00ac, # NOT SIGN
|
||||
0x00c3: 0x221a, # SQUARE ROOT
|
||||
0x00c4: 0x0192, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x00c5: 0x2248, # ALMOST EQUAL TO
|
||||
0x00c6: 0x2206, # INCREMENT
|
||||
0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
|
||||
0x00ca: 0x00a0, # NO-BREAK SPACE
|
||||
0x00cb: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00cc: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00ce: 0x0152, # LATIN CAPITAL LIGATURE OE
|
||||
0x00cf: 0x0153, # LATIN SMALL LIGATURE OE
|
||||
0x00d0: 0x2013, # EN DASH
|
||||
0x00d1: 0x2014, # EM DASH
|
||||
0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
|
||||
0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
|
||||
0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
|
||||
0x00d6: 0x00f7, # DIVISION SIGN
|
||||
0x00d7: 0x25ca, # LOZENGE
|
||||
0x00d8: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x00d9: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x00da: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x00db: 0x011f, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x00dc: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x00dd: 0x0131, # LATIN SMALL LETTER DOTLESS I
|
||||
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x00df: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x00e0: 0x2021, # DOUBLE DAGGER
|
||||
0x00e1: 0x00b7, # MIDDLE DOT
|
||||
0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x00e4: 0x2030, # PER MILLE SIGN
|
||||
0x00e5: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00e6: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00e8: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00e9: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00eb: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00ec: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00ed: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00f0: None, # UNDEFINED
|
||||
0x00f1: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00f3: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00f4: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00f5: None, # UNDEFINED
|
||||
0x00f6: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x00f7: 0x02dc, # SMALL TILDE
|
||||
0x00f8: 0x00af, # MACRON
|
||||
0x00f9: 0x02d8, # BREVE
|
||||
0x00fa: 0x02d9, # DOT ABOVE
|
||||
0x00fb: 0x02da, # RING ABOVE
|
||||
0x00fc: 0x00b8, # CEDILLA
|
||||
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
|
||||
0x00fe: 0x02db, # OGONEK
|
||||
0x00ff: 0x02c7, # CARON
|
||||
}
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {}
|
||||
for k,v in decoding_map.items():
|
||||
encoding_map[v] = k
|
|
@ -0,0 +1,30 @@
|
|||
""" Python 'raw-unicode-escape' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.raw_unicode_escape_encode
|
||||
decode = codecs.raw_unicode_escape_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
|
@ -0,0 +1,30 @@
|
|||
""" Python 'unicode-escape' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.unicode_escape_encode
|
||||
decode = codecs.unicode_escape_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
|
@ -0,0 +1,30 @@
|
|||
""" Python 'unicode-internal' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.unicode_internal_encode
|
||||
decode = codecs.unicode_internal_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
|
@ -0,0 +1,31 @@
|
|||
""" Python 'utf-16' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.utf_16_encode
|
||||
decode = codecs.utf_16_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
""" Python 'utf-16-be' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.utf_16_be_encode
|
||||
decode = codecs.utf_16_be_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
""" Python 'utf-16-le' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.utf_16_le_encode
|
||||
decode = codecs.utf_16_le_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
""" Python 'utf-8' Codec
|
||||
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
|
||||
# Note: Binding these as C functions will result in the class not
|
||||
# converting them to methods. This is intended.
|
||||
encode = codecs.utf_8_encode
|
||||
decode = codecs.utf_8_decode
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec,codecs.StreamReader):
|
||||
pass
|
||||
|
||||
### encodings module API
|
||||
|
||||
def getregentry():
|
||||
|
||||
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
|
||||
|
Loading…
Reference in New Issue