mirror of https://github.com/python/cpython
Merge the rest of the trunk.
Merged revisions 46490-46494,46496,46498,46500,46506,46521,46538,46558,46563-46567,46570-46571,46583,46593,46595-46598,46604,46606,46609-46753 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r46610 | martin.v.loewis | 2006-06-03 09:42:26 +0200 (Sat, 03 Jun 2006) | 2 lines Updated version (win32-icons2.zip) from #1490384. ........ r46612 | andrew.kuchling | 2006-06-03 20:09:41 +0200 (Sat, 03 Jun 2006) | 1 line [Bug #1472084] Fix description of do_tag ........ r46614 | andrew.kuchling | 2006-06-03 20:33:35 +0200 (Sat, 03 Jun 2006) | 1 line [Bug #1475554] Strengthen text to say 'must' instead of 'should' ........ r46616 | andrew.kuchling | 2006-06-03 20:41:28 +0200 (Sat, 03 Jun 2006) | 1 line [Bug #1441864] Clarify description of 'data' argument ........ r46617 | andrew.kuchling | 2006-06-03 20:43:24 +0200 (Sat, 03 Jun 2006) | 1 line Minor rewording ........ r46619 | andrew.kuchling | 2006-06-03 21:02:35 +0200 (Sat, 03 Jun 2006) | 9 lines [Bug #1497414] _self is a reserved word in the WATCOM 10.6 C compiler. Fix by renaming the variable. In a different module, Neal fixed it by renaming _self to self. There's already a variable named 'self' here, so I used selfptr. (I'm committing this on a Mac without Tk, but it's a simple search-and-replace. <crosses fingers>, so I'll watch the buildbots and see what happens.) ........ r46621 | fredrik.lundh | 2006-06-03 23:56:05 +0200 (Sat, 03 Jun 2006) | 5 lines "_self" is a said to be a reserved word in Watcom C 10.6. I'm not sure that's really standard compliant behaviour, but I guess we have to fix that anyway... ........ r46622 | andrew.kuchling | 2006-06-04 00:44:42 +0200 (Sun, 04 Jun 2006) | 1 line Update readme ........ r46623 | andrew.kuchling | 2006-06-04 00:59:23 +0200 (Sun, 04 Jun 2006) | 1 line Drop 0 parameter ........ r46624 | andrew.kuchling | 2006-06-04 00:59:59 +0200 (Sun, 04 Jun 2006) | 1 line Some code tidying; use curses.wrapper ........ r46625 | andrew.kuchling | 2006-06-04 01:02:15 +0200 (Sun, 04 Jun 2006) | 1 line Use True; value returned from main is unused ........ r46626 | andrew.kuchling | 2006-06-04 01:07:21 +0200 (Sun, 04 Jun 2006) | 1 line Use true division, and the True value ........ r46627 | andrew.kuchling | 2006-06-04 01:09:58 +0200 (Sun, 04 Jun 2006) | 1 line Docstring fix; use True ........ r46628 | andrew.kuchling | 2006-06-04 01:15:56 +0200 (Sun, 04 Jun 2006) | 1 line Put code in a main() function; loosen up the spacing to match current code style ........ r46629 | andrew.kuchling | 2006-06-04 01:39:07 +0200 (Sun, 04 Jun 2006) | 1 line Use functions; modernize code ........ r46630 | andrew.kuchling | 2006-06-04 01:43:22 +0200 (Sun, 04 Jun 2006) | 1 line This demo requires Medusa (not just asyncore); remove it ........ r46631 | andrew.kuchling | 2006-06-04 01:46:36 +0200 (Sun, 04 Jun 2006) | 2 lines Remove xmlrpc demo -- it duplicates the SimpleXMLRPCServer module. ........ r46632 | andrew.kuchling | 2006-06-04 01:47:22 +0200 (Sun, 04 Jun 2006) | 1 line Remove xmlrpc/ directory ........ r46633 | andrew.kuchling | 2006-06-04 01:51:21 +0200 (Sun, 04 Jun 2006) | 1 line Remove dangling reference ........ r46634 | andrew.kuchling | 2006-06-04 01:59:36 +0200 (Sun, 04 Jun 2006) | 1 line Add more whitespace; use a better socket name ........ r46635 | tim.peters | 2006-06-04 03:22:53 +0200 (Sun, 04 Jun 2006) | 2 lines Whitespace normalization. ........ r46637 | tim.peters | 2006-06-04 05:26:02 +0200 (Sun, 04 Jun 2006) | 16 lines In a PYMALLOC_DEBUG build obmalloc adds extra debugging info to each allocated block. This was using 4 bytes for each such piece of info regardless of platform. This didn't really matter before (proof: no bug reports, and the debug-build obmalloc would have assert-failed if it was ever asked for a chunk of memory >= 2**32 bytes), since container indices were plain ints. But after the Py_ssize_t changes, it's at least theoretically possible to allocate a list or string whose guts exceed 2**32 bytes, and the PYMALLOC_DEBUG routines would fail then (having only 4 bytes to record the originally requested size). Now we use sizeof(size_t) bytes for each of a PYMALLOC_DEBUG build's extra debugging fields. This won't make any difference on 32-bit boxes, but will add 16 bytes to each allocation in a debug build on a 64-bit box. ........ r46638 | tim.peters | 2006-06-04 05:38:04 +0200 (Sun, 04 Jun 2006) | 4 lines _PyObject_DebugMalloc(): The return value should add 2*sizeof(size_t) now, not 8. This probably accounts for current disasters on the 64-bit buildbot slaves. ........ r46639 | neal.norwitz | 2006-06-04 08:19:31 +0200 (Sun, 04 Jun 2006) | 1 line SF #1499797, Fix for memory leak in WindowsError_str ........ r46640 | andrew.macintyre | 2006-06-04 14:31:09 +0200 (Sun, 04 Jun 2006) | 2 lines Patch #1454481: Make thread stack size runtime tunable. ........ r46641 | andrew.macintyre | 2006-06-04 14:59:59 +0200 (Sun, 04 Jun 2006) | 2 lines clean up function declarations to conform to PEP-7 style. ........ r46642 | martin.blais | 2006-06-04 15:49:49 +0200 (Sun, 04 Jun 2006) | 15 lines Fixes in struct and socket from merge reviews. - Following Guido's comments, renamed * pack_to -> pack_into * recv_buf -> recv_into * recvfrom_buf -> recvfrom_into - Made fixes to _struct.c according to Neal Norwitz comments on the checkins list. - Converted some ints into the appropriate -- I hope -- ssize_t and size_t. ........ r46643 | ronald.oussoren | 2006-06-04 16:05:28 +0200 (Sun, 04 Jun 2006) | 3 lines "Import" LDFLAGS in Mac/OSX/Makefile.in to ensure pythonw gets build with the right compiler flags. ........ r46644 | ronald.oussoren | 2006-06-04 16:24:59 +0200 (Sun, 04 Jun 2006) | 2 lines Drop Mac wrappers for the WASTE library. ........ r46645 | tim.peters | 2006-06-04 17:49:07 +0200 (Sun, 04 Jun 2006) | 3 lines s_methods[]: Stop compiler warnings by casting s_unpack_from to PyCFunction. ........ r46646 | george.yoshida | 2006-06-04 19:04:12 +0200 (Sun, 04 Jun 2006) | 2 lines Remove a redundant word ........ r46647 | george.yoshida | 2006-06-04 19:17:25 +0200 (Sun, 04 Jun 2006) | 2 lines Markup fix ........ r46648 | martin.v.loewis | 2006-06-04 21:36:28 +0200 (Sun, 04 Jun 2006) | 2 lines Patch #1359618: Speed-up charmap encoder. ........ r46649 | georg.brandl | 2006-06-04 23:46:16 +0200 (Sun, 04 Jun 2006) | 3 lines Repair refleaks in unicodeobject. ........ r46650 | georg.brandl | 2006-06-04 23:56:52 +0200 (Sun, 04 Jun 2006) | 4 lines Patch #1346214: correctly optimize away "if 0"-style stmts (thanks to Neal for review) ........ r46651 | georg.brandl | 2006-06-05 00:15:37 +0200 (Mon, 05 Jun 2006) | 2 lines Bug #1500293: fix memory leaks in _subprocess module. ........ r46654 | tim.peters | 2006-06-05 01:43:53 +0200 (Mon, 05 Jun 2006) | 2 lines Whitespace normalization. ........ r46655 | tim.peters | 2006-06-05 01:52:47 +0200 (Mon, 05 Jun 2006) | 16 lines Revert revisions: 46640 Patch #1454481: Make thread stack size runtime tunable. 46647 Markup fix The first is causing many buildbots to fail test runs, and there are multiple causes with seemingly no immediate prospects for repairing them. See python-dev discussion. Note that a branch can (and should) be created for resolving these problems, like svn copy svn+ssh://svn.python.org/python/trunk -r46640 svn+ssh://svn.python.org/python/branches/NEW_BRANCH followed by merging rev 46647 to the new branch. ........ r46656 | andrew.kuchling | 2006-06-05 02:08:09 +0200 (Mon, 05 Jun 2006) | 1 line Mention second encoding speedup ........ r46657 | gregory.p.smith | 2006-06-05 02:31:01 +0200 (Mon, 05 Jun 2006) | 7 lines bugfix: when log_archive was called with the DB_ARCH_REMOVE flag present in BerkeleyDB >= 4.2 it tried to construct a list out of an uninitialized char **log_list. feature: export the DB_ARCH_REMOVE flag by name in the module on BerkeleyDB >= 4.2. ........ r46658 | gregory.p.smith | 2006-06-05 02:33:35 +0200 (Mon, 05 Jun 2006) | 5 lines fix a bug in the previous commit. don't leak empty list on error return and fix the additional rare (out of memory only) bug that it was supposed to fix of not freeing log_list when the python allocator failed. ........ r46660 | tim.peters | 2006-06-05 02:55:26 +0200 (Mon, 05 Jun 2006) | 9 lines "Flat is better than nested." Move the long-winded, multiply-nested -R support out of runtest() and into some module-level helper functions. This makes runtest() and the -R code easier to follow. That in turn allowed seeing some opportunities for code simplification, and made it obvious that reglog.txt never got closed. ........ r46661 | hyeshik.chang | 2006-06-05 02:59:54 +0200 (Mon, 05 Jun 2006) | 3 lines Fix a potentially invalid memory access of CJKCodecs' shift-jis decoder. (found by Neal Norwitz) ........ r46663 | gregory.p.smith | 2006-06-05 03:39:52 +0200 (Mon, 05 Jun 2006) | 3 lines * support DBEnv.log_stat() method on BerkeleyDB >= 4.0 [patch #1494885] ........ r46664 | tim.peters | 2006-06-05 03:43:03 +0200 (Mon, 05 Jun 2006) | 3 lines Remove doctest.testmod's deprecated (in 2.4) `isprivate` argument. A lot of hair went into supporting that! ........ r46665 | tim.peters | 2006-06-05 03:47:24 +0200 (Mon, 05 Jun 2006) | 2 lines Whitespace normalization. ........ r46666 | tim.peters | 2006-06-05 03:48:21 +0200 (Mon, 05 Jun 2006) | 2 lines Make doctest news more accurate. ........ r46667 | gregory.p.smith | 2006-06-05 03:56:15 +0200 (Mon, 05 Jun 2006) | 3 lines * support DBEnv.lsn_reset() method on BerkeleyDB >= 4.4 [patch #1494902] ........ r46668 | gregory.p.smith | 2006-06-05 04:02:25 +0200 (Mon, 05 Jun 2006) | 3 lines mention the just committed bsddb changes ........ r46671 | gregory.p.smith | 2006-06-05 19:38:04 +0200 (Mon, 05 Jun 2006) | 3 lines * add support for DBSequence objects [patch #1466734] ........ r46672 | gregory.p.smith | 2006-06-05 20:20:07 +0200 (Mon, 05 Jun 2006) | 3 lines forgot to add this file in previous commit ........ r46673 | tim.peters | 2006-06-05 20:36:12 +0200 (Mon, 05 Jun 2006) | 2 lines Whitespace normalization. ........ r46674 | tim.peters | 2006-06-05 20:36:54 +0200 (Mon, 05 Jun 2006) | 2 lines Add missing svn:eol-style property to text files. ........ r46675 | gregory.p.smith | 2006-06-05 20:48:21 +0200 (Mon, 05 Jun 2006) | 4 lines * fix DBCursor.pget() bug with keyword argument names when no data= is supplied [SF pybsddb bug #1477863] ........ r46676 | andrew.kuchling | 2006-06-05 21:05:32 +0200 (Mon, 05 Jun 2006) | 1 line Remove use of Trove name, which isn't very helpful to users ........ r46677 | andrew.kuchling | 2006-06-05 21:08:25 +0200 (Mon, 05 Jun 2006) | 1 line [Bug #1470026] Include link to list of classifiers ........ r46679 | tim.peters | 2006-06-05 22:48:49 +0200 (Mon, 05 Jun 2006) | 10 lines Access _struct attributes directly instead of mucking with getattr. string_reverse(): Simplify. assertRaises(): Raise TestFailed on failure. test_unpack_from(), test_pack_into(), test_pack_into_fn(): never use `assert` to test for an expected result (it doesn't test anything when Python is run with -O). ........ r46680 | tim.peters | 2006-06-05 22:49:27 +0200 (Mon, 05 Jun 2006) | 2 lines Add missing svn:eol-style property to text files. ........ r46681 | gregory.p.smith | 2006-06-06 01:38:06 +0200 (Tue, 06 Jun 2006) | 3 lines add depends = ['md5.h'] to the _md5 module extension for correctness sake. ........ r46682 | brett.cannon | 2006-06-06 01:51:55 +0200 (Tue, 06 Jun 2006) | 4 lines Add 3 more bytes to a buffer to cover constants in string and null byte on top of 10 possible digits for an int. Closes bug #1501223. ........ r46684 | gregory.p.smith | 2006-06-06 01:59:37 +0200 (Tue, 06 Jun 2006) | 5 lines - bsddb: the __len__ method of a DB object has been fixed to return correct results. It could previously incorrectly return 0 in some cases. Fixes SF bug 1493322 (pybsddb bug 1184012). ........ r46686 | tim.peters | 2006-06-06 02:25:07 +0200 (Tue, 06 Jun 2006) | 7 lines _PySys_Init(): It's rarely a good idea to size a buffer to the exact maximum size someone guesses is needed. In this case, if we're really worried about extreme integers, then "cp%d" can actually need 14 bytes (2 for "cp" + 1 for \0 at the end + 11 for -(2**31-1)). So reserve 128 bytes instead -- nothing is actually saved by making a stack-local buffer tiny. ........ r46687 | neal.norwitz | 2006-06-06 09:22:08 +0200 (Tue, 06 Jun 2006) | 1 line Remove unused variable (and stop compiler warning) ........ r46688 | neal.norwitz | 2006-06-06 09:23:01 +0200 (Tue, 06 Jun 2006) | 1 line Fix a bunch of parameter strings ........ r46689 | thomas.heller | 2006-06-06 13:34:33 +0200 (Tue, 06 Jun 2006) | 6 lines Convert CFieldObject tp_members to tp_getset, since there is no structmember typecode for Py_ssize_t fields. This should fix some of the errors on the PPC64 debian machine (64-bit, big endian). Assigning to readonly fields now raises AttributeError instead of TypeError, so the testcase has to be changed as well. ........ r46690 | thomas.heller | 2006-06-06 13:54:32 +0200 (Tue, 06 Jun 2006) | 1 line Damn - the sentinel was missing. And fix another silly mistake. ........ r46691 | martin.blais | 2006-06-06 14:46:55 +0200 (Tue, 06 Jun 2006) | 13 lines Normalized a few cases of whitespace in function declarations. Found them using:: find . -name '*.py' | while read i ; do grep 'def[^(]*( ' $i /dev/null ; done find . -name '*.py' | while read i ; do grep ' ):' $i /dev/null ; done (I was doing this all over my own code anyway, because I'd been using spaces in all defs, so I thought I'd make a run on the Python code as well. If you need to do such fixes in your own code, you can use xx-rename or parenregu.el within emacs.) ........ r46693 | thomas.heller | 2006-06-06 17:34:18 +0200 (Tue, 06 Jun 2006) | 1 line Specify argtypes for all test functions. Maybe that helps on strange ;-) architectures ........ r46694 | tim.peters | 2006-06-06 17:50:17 +0200 (Tue, 06 Jun 2006) | 5 lines BSequence_set_range(): Rev 46688 ("Fix a bunch of parameter strings") changed this function's signature seemingly by mistake, which is causing buildbots to fail test_bsddb3. Restored the pre-46688 signature. ........ r46695 | tim.peters | 2006-06-06 17:52:35 +0200 (Tue, 06 Jun 2006) | 4 lines On python-dev Thomas Heller said these were committed by mistake in rev 46693, so reverting this part of rev 46693. ........ r46696 | andrew.kuchling | 2006-06-06 19:10:41 +0200 (Tue, 06 Jun 2006) | 1 line Fix comment typo ........ r46697 | brett.cannon | 2006-06-06 20:08:16 +0200 (Tue, 06 Jun 2006) | 2 lines Fix coding style guide bug. ........ r46698 | thomas.heller | 2006-06-06 20:50:46 +0200 (Tue, 06 Jun 2006) | 2 lines Add a hack so that foreign functions returning float now do work on 64-bit big endian platforms. ........ r46699 | thomas.heller | 2006-06-06 21:25:13 +0200 (Tue, 06 Jun 2006) | 3 lines Use the same big-endian hack as in _ctypes/callproc.c for callback functions. This fixes the callback function tests that return float. ........ r46700 | ronald.oussoren | 2006-06-06 21:50:24 +0200 (Tue, 06 Jun 2006) | 5 lines * Ensure that "make altinstall" works when the tree was configured with --enable-framework * Also for --enable-framework: allow users to use --prefix to specify the location of the compatibility symlinks (such as /usr/local/bin/python) ........ r46701 | ronald.oussoren | 2006-06-06 21:56:00 +0200 (Tue, 06 Jun 2006) | 3 lines A quick hack to ensure the right key-bindings for IDLE on osx: install patched configuration files during a framework install. ........ r46702 | tim.peters | 2006-06-07 03:04:59 +0200 (Wed, 07 Jun 2006) | 4 lines dash_R_cleanup(): Clear filecmp._cache. This accounts for different results across -R runs (at least on Windows) of test_filecmp. ........ r46705 | tim.peters | 2006-06-07 08:57:51 +0200 (Wed, 07 Jun 2006) | 17 lines SF patch 1501987: Remove randomness from test_exceptions, from ?iga Seilnacht (sorry about the name, but Firefox on my box can't display the first character of the name -- the SF "Unix name" is zseil). This appears to cure the oddball intermittent leaks across runs when running test_exceptions under -R. I'm not sure why, but I'm too sleepy to care ;-) The thrust of the SF patch was to remove randomness in the pickle protocol used. I changed the patch to use range(pickle.HIGHEST_PROTOCOL + 1), to try both pickle and cPickle, and randomly mucked with other test lines to put statements on their own lines. Not a bugfix candidate (this is fiddling new-in-2.5 code). ........ r46706 | andrew.kuchling | 2006-06-07 15:55:33 +0200 (Wed, 07 Jun 2006) | 1 line Add an SQLite introduction, taken from the 'What's New' text ........ r46708 | andrew.kuchling | 2006-06-07 19:02:52 +0200 (Wed, 07 Jun 2006) | 1 line Mention other placeholders ........ r46709 | andrew.kuchling | 2006-06-07 19:03:46 +0200 (Wed, 07 Jun 2006) | 1 line Add an item; also, escape % ........ r46710 | andrew.kuchling | 2006-06-07 19:04:01 +0200 (Wed, 07 Jun 2006) | 1 line Mention other placeholders ........ r46716 | ronald.oussoren | 2006-06-07 20:57:44 +0200 (Wed, 07 Jun 2006) | 2 lines Move Mac/OSX/Tools one level up ........ r46717 | ronald.oussoren | 2006-06-07 20:58:01 +0200 (Wed, 07 Jun 2006) | 2 lines Move Mac/OSX/PythonLauncher one level up ........ r46718 | ronald.oussoren | 2006-06-07 20:58:42 +0200 (Wed, 07 Jun 2006) | 2 lines mv Mac/OSX/BuildScript one level up ........ r46719 | ronald.oussoren | 2006-06-07 21:02:03 +0200 (Wed, 07 Jun 2006) | 2 lines Move Mac/OSX/* one level up ........ r46720 | ronald.oussoren | 2006-06-07 21:06:01 +0200 (Wed, 07 Jun 2006) | 2 lines And the last bit: move IDLE one level up and adjust makefiles ........ r46723 | ronald.oussoren | 2006-06-07 21:38:53 +0200 (Wed, 07 Jun 2006) | 4 lines - Patch the correct version of python in the Info.plists at build time, instead of relying on a maintainer to update them before releases. - Remove the now empty Mac/OSX directory ........ r46727 | ronald.oussoren | 2006-06-07 22:18:44 +0200 (Wed, 07 Jun 2006) | 7 lines * If BuildApplet.py is used as an applet it starts with a version of sys.exutable that isn't usuable on an #!-line. That results in generated applets that don't actually work. Work around this problem by resetting sys.executable. * argvemulator.py didn't work on intel macs. This patch fixes this (bug #1491468) ........ r46728 | tim.peters | 2006-06-07 22:40:06 +0200 (Wed, 07 Jun 2006) | 2 lines Whitespace normalization. ........ r46729 | tim.peters | 2006-06-07 22:40:54 +0200 (Wed, 07 Jun 2006) | 2 lines Add missing svn:eol-style property to text files. ........ r46730 | thomas.heller | 2006-06-07 22:43:06 +0200 (Wed, 07 Jun 2006) | 7 lines Fix for foreign functions returning small structures on 64-bit big endian machines. Should fix the remaininf failure in the PPC64 Debian buildbot. Thanks to Matthias Klose for providing access to a machine to debug and test this. ........ r46731 | brett.cannon | 2006-06-07 23:48:17 +0200 (Wed, 07 Jun 2006) | 2 lines Clarify documentation for bf_getcharbuffer. ........ r46735 | neal.norwitz | 2006-06-08 07:12:45 +0200 (Thu, 08 Jun 2006) | 1 line Fix a refleak in recvfrom_into ........ r46736 | gregory.p.smith | 2006-06-08 07:17:08 +0200 (Thu, 08 Jun 2006) | 9 lines - bsddb: the bsddb.dbtables Modify method now raises the proper error and aborts the db transaction safely when a modifier callback fails. Fixes SF python patch/bug #1408584. Also cleans up the bsddb.dbtables docstrings since thats the only documentation that exists for that unadvertised module. (people really should really just use sqlite3) ........ r46737 | gregory.p.smith | 2006-06-08 07:38:11 +0200 (Thu, 08 Jun 2006) | 4 lines * Turn the deadlock situation described in SF bug #775414 into a DBDeadLockError exception. * add the test case for my previous dbtables commit. ........ r46738 | gregory.p.smith | 2006-06-08 07:39:54 +0200 (Thu, 08 Jun 2006) | 2 lines pasted set_lk_detect line in wrong spot in previous commit. fixed. passes tests this time. ........ r46739 | armin.rigo | 2006-06-08 12:56:24 +0200 (Thu, 08 Jun 2006) | 6 lines (arre, arigo) SF bug #1350060 Give a consistent behavior for comparison and hashing of method objects (both user- and built-in methods). Now compares the 'self' recursively. The hash was already asking for the hash of 'self'. ........ r46740 | andrew.kuchling | 2006-06-08 13:56:44 +0200 (Thu, 08 Jun 2006) | 1 line Typo fix ........ r46741 | georg.brandl | 2006-06-08 14:45:01 +0200 (Thu, 08 Jun 2006) | 2 lines Bug #1502750: Fix getargs "i" format to use LONG_MIN and LONG_MAX for bounds checking. ........ r46743 | georg.brandl | 2006-06-08 14:54:13 +0200 (Thu, 08 Jun 2006) | 2 lines Bug #1502728: Correctly link against librt library on HP-UX. ........ r46745 | georg.brandl | 2006-06-08 14:55:47 +0200 (Thu, 08 Jun 2006) | 3 lines Add news for recent bugfix. ........ r46746 | georg.brandl | 2006-06-08 15:31:07 +0200 (Thu, 08 Jun 2006) | 4 lines Argh. "integer" is a very confusing word ;) Actually, checking for INT_MAX and INT_MIN is correct since the format code explicitly handles a C "int". ........ r46748 | nick.coghlan | 2006-06-08 15:54:49 +0200 (Thu, 08 Jun 2006) | 1 line Add functools.update_wrapper() and functools.wraps() as described in PEP 356 ........ r46751 | georg.brandl | 2006-06-08 16:50:21 +0200 (Thu, 08 Jun 2006) | 4 lines Bug #1502805: don't alias file.__exit__ to file.close since the latter can return something that's true. ........ r46752 | georg.brandl | 2006-06-08 16:50:53 +0200 (Thu, 08 Jun 2006) | 3 lines Convert test_file to unittest. ........
This commit is contained in:
parent
1ba5b3b425
commit
73e5a5b65d
|
@ -57,8 +57,5 @@ tkinter Demos using the Tk interface (including Matt Conway's
|
|||
|
||||
xml Some XML demos.
|
||||
|
||||
xmlrpc XML-RPC server framework (but see the standard library
|
||||
module SimpleXMLRPCServer.py for a replacement).
|
||||
|
||||
zlib Some demos for the zlib module (see also the standard
|
||||
library module gzip.py).
|
||||
|
|
|
@ -59,32 +59,32 @@ del dbm, dim
|
|||
|
||||
_INT_TYPES = type(1), type(1L)
|
||||
|
||||
def _is_leap( year ): # 1 if leap year, else 0
|
||||
def _is_leap(year): # 1 if leap year, else 0
|
||||
if year % 4 != 0: return 0
|
||||
if year % 400 == 0: return 1
|
||||
return year % 100 != 0
|
||||
|
||||
def _days_in_year( year ): # number of days in year
|
||||
def _days_in_year(year): # number of days in year
|
||||
return 365 + _is_leap(year)
|
||||
|
||||
def _days_before_year( year ): # number of days before year
|
||||
def _days_before_year(year): # number of days before year
|
||||
return year*365L + (year+3)/4 - (year+99)/100 + (year+399)/400
|
||||
|
||||
def _days_in_month( month, year ): # number of days in month of year
|
||||
def _days_in_month(month, year): # number of days in month of year
|
||||
if month == 2 and _is_leap(year): return 29
|
||||
return _DAYS_IN_MONTH[month-1]
|
||||
|
||||
def _days_before_month( month, year ): # number of days in year before month
|
||||
def _days_before_month(month, year): # number of days in year before month
|
||||
return _DAYS_BEFORE_MONTH[month-1] + (month > 2 and _is_leap(year))
|
||||
|
||||
def _date2num( date ): # compute ordinal of date.month,day,year
|
||||
return _days_before_year( date.year ) + \
|
||||
_days_before_month( date.month, date.year ) + \
|
||||
def _date2num(date): # compute ordinal of date.month,day,year
|
||||
return _days_before_year(date.year) + \
|
||||
_days_before_month(date.month, date.year) + \
|
||||
date.day
|
||||
|
||||
_DI400Y = _days_before_year( 400 ) # number of days in 400 years
|
||||
_DI400Y = _days_before_year(400) # number of days in 400 years
|
||||
|
||||
def _num2date( n ): # return date with ordinal n
|
||||
def _num2date(n): # return date with ordinal n
|
||||
if type(n) not in _INT_TYPES:
|
||||
raise TypeError, 'argument must be integer: %r' % type(n)
|
||||
|
||||
|
@ -95,53 +95,53 @@ def _num2date( n ): # return date with ordinal n
|
|||
n400 = (n-1)/_DI400Y # # of 400-year blocks preceding
|
||||
year, n = 400 * n400, n - _DI400Y * n400
|
||||
more = n / 365
|
||||
dby = _days_before_year( more )
|
||||
dby = _days_before_year(more)
|
||||
if dby >= n:
|
||||
more = more - 1
|
||||
dby = dby - _days_in_year( more )
|
||||
dby = dby - _days_in_year(more)
|
||||
year, n = year + more, int(n - dby)
|
||||
|
||||
try: year = int(year) # chop to int, if it fits
|
||||
except (ValueError, OverflowError): pass
|
||||
|
||||
month = min( n/29 + 1, 12 )
|
||||
dbm = _days_before_month( month, year )
|
||||
month = min(n/29 + 1, 12)
|
||||
dbm = _days_before_month(month, year)
|
||||
if dbm >= n:
|
||||
month = month - 1
|
||||
dbm = dbm - _days_in_month( month, year )
|
||||
dbm = dbm - _days_in_month(month, year)
|
||||
|
||||
ans.month, ans.day, ans.year = month, n-dbm, year
|
||||
return ans
|
||||
|
||||
def _num2day( n ): # return weekday name of day with ordinal n
|
||||
def _num2day(n): # return weekday name of day with ordinal n
|
||||
return _DAY_NAMES[ int(n % 7) ]
|
||||
|
||||
|
||||
class Date:
|
||||
def __init__( self, month, day, year ):
|
||||
def __init__(self, month, day, year):
|
||||
if not 1 <= month <= 12:
|
||||
raise ValueError, 'month must be in 1..12: %r' % (month,)
|
||||
dim = _days_in_month( month, year )
|
||||
dim = _days_in_month(month, year)
|
||||
if not 1 <= day <= dim:
|
||||
raise ValueError, 'day must be in 1..%r: %r' % (dim, day)
|
||||
self.month, self.day, self.year = month, day, year
|
||||
self.ord = _date2num( self )
|
||||
self.ord = _date2num(self)
|
||||
|
||||
# don't allow setting existing attributes
|
||||
def __setattr__( self, name, value ):
|
||||
def __setattr__(self, name, value):
|
||||
if self.__dict__.has_key(name):
|
||||
raise AttributeError, 'read-only attribute ' + name
|
||||
self.__dict__[name] = value
|
||||
|
||||
def __cmp__( self, other ):
|
||||
return cmp( self.ord, other.ord )
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.ord, other.ord)
|
||||
|
||||
# define a hash function so dates can be used as dictionary keys
|
||||
def __hash__( self ):
|
||||
return hash( self.ord )
|
||||
def __hash__(self):
|
||||
return hash(self.ord)
|
||||
|
||||
# print as, e.g., Mon 16 Aug 1993
|
||||
def __repr__( self ):
|
||||
def __repr__(self):
|
||||
return '%.3s %2d %.3s %r' % (
|
||||
self.weekday(),
|
||||
self.day,
|
||||
|
@ -149,33 +149,33 @@ class Date:
|
|||
self.year)
|
||||
|
||||
# Python 1.1 coerces neither int+date nor date+int
|
||||
def __add__( self, n ):
|
||||
def __add__(self, n):
|
||||
if type(n) not in _INT_TYPES:
|
||||
raise TypeError, 'can\'t add %r to date' % type(n)
|
||||
return _num2date( self.ord + n )
|
||||
return _num2date(self.ord + n)
|
||||
__radd__ = __add__ # handle int+date
|
||||
|
||||
# Python 1.1 coerces neither date-int nor date-date
|
||||
def __sub__( self, other ):
|
||||
def __sub__(self, other):
|
||||
if type(other) in _INT_TYPES: # date-int
|
||||
return _num2date( self.ord - other )
|
||||
return _num2date(self.ord - other)
|
||||
else:
|
||||
return self.ord - other.ord # date-date
|
||||
|
||||
# complain about int-date
|
||||
def __rsub__( self, other ):
|
||||
def __rsub__(self, other):
|
||||
raise TypeError, 'Can\'t subtract date from integer'
|
||||
|
||||
def weekday( self ):
|
||||
return _num2day( self.ord )
|
||||
def weekday(self):
|
||||
return _num2day(self.ord)
|
||||
|
||||
def today():
|
||||
import time
|
||||
local = time.localtime(time.time())
|
||||
return Date( local[1], local[2], local[0] )
|
||||
return Date(local[1], local[2], local[0])
|
||||
|
||||
DateTestError = 'DateTestError'
|
||||
def test( firstyear, lastyear ):
|
||||
def test(firstyear, lastyear):
|
||||
a = Date(9,30,1913)
|
||||
b = Date(9,30,1914)
|
||||
if repr(a) != 'Tue 30 Sep 1913':
|
||||
|
@ -207,7 +207,7 @@ def test( firstyear, lastyear ):
|
|||
# verify date<->number conversions for first and last days for
|
||||
# all years in firstyear .. lastyear
|
||||
|
||||
lord = _days_before_year( firstyear )
|
||||
lord = _days_before_year(firstyear)
|
||||
y = firstyear
|
||||
while y <= lastyear:
|
||||
ford = lord + 1
|
||||
|
|
|
@ -11,14 +11,11 @@ I wouldn't mind someone else making an effort in that direction, of
|
|||
course.
|
||||
|
||||
ncurses.py -- currently only a panels demo
|
||||
XXX this won't work until panel support is checked in
|
||||
rain.py -- raindrops keep falling on my desktop
|
||||
tclock.py -- ASCII clock, by Howard Jones
|
||||
xmas.py -- I'm dreaming of an ASCII christmas
|
||||
|
||||
Please send bugfixes and new contributions to me or, even better,
|
||||
submit them to the Python Bug Tracker on SourceForge
|
||||
(<URL:http://sourceforge.net/bugs/?group_id=5470>).
|
||||
Please submit bugfixes and new contributions to the Python bug tracker.
|
||||
|
||||
|
||||
Other demos
|
||||
|
|
|
@ -44,14 +44,15 @@ class LifeBoard:
|
|||
scr -- curses screen object to use for display
|
||||
char -- character used to render live cells (default: '*')
|
||||
"""
|
||||
self.state={} ; self.scr=scr
|
||||
self.state = {}
|
||||
self.scr = scr
|
||||
Y, X = self.scr.getmaxyx()
|
||||
self.X, self.Y = X-2, Y-2-1
|
||||
self.char = char
|
||||
self.scr.clear()
|
||||
|
||||
# Draw a border around the board
|
||||
border_line='+'+(self.X*'-')+'+'
|
||||
border_line = '+'+(self.X*'-')+'+'
|
||||
self.scr.addstr(0, 0, border_line)
|
||||
self.scr.addstr(self.Y+1,0, border_line)
|
||||
for y in range(0, self.Y):
|
||||
|
@ -73,16 +74,16 @@ class LifeBoard:
|
|||
del self.state[x,y]
|
||||
self.scr.addch(y+1, x+1, ' ')
|
||||
else:
|
||||
self.state[x,y]=1
|
||||
self.state[x,y] = 1
|
||||
self.scr.addch(y+1, x+1, self.char)
|
||||
self.scr.refresh()
|
||||
|
||||
def erase(self):
|
||||
"""Clear the entire board and update the board display"""
|
||||
self.state={}
|
||||
self.display(update_board=0)
|
||||
self.state = {}
|
||||
self.display(update_board=False)
|
||||
|
||||
def display(self, update_board=1):
|
||||
def display(self, update_board=True):
|
||||
"""Display the whole board, optionally computing one generation"""
|
||||
M,N = self.X, self.Y
|
||||
if not update_board:
|
||||
|
@ -95,42 +96,46 @@ class LifeBoard:
|
|||
self.scr.refresh()
|
||||
return
|
||||
|
||||
d={} ; self.boring=1
|
||||
d = {}
|
||||
self.boring = 1
|
||||
for i in range(0, M):
|
||||
L=range( max(0, i-1), min(M, i+2) )
|
||||
L = range( max(0, i-1), min(M, i+2) )
|
||||
for j in range(0, N):
|
||||
s=0
|
||||
live=self.state.has_key( (i,j) )
|
||||
s = 0
|
||||
live = self.state.has_key( (i,j) )
|
||||
for k in range( max(0, j-1), min(N, j+2) ):
|
||||
for l in L:
|
||||
if self.state.has_key( (l,k) ):
|
||||
s=s+1
|
||||
s=s-live
|
||||
if s==3:
|
||||
s += 1
|
||||
s -= live
|
||||
if s == 3:
|
||||
# Birth
|
||||
d[i,j]=1
|
||||
d[i,j] = 1
|
||||
self.scr.addch(j+1, i+1, self.char)
|
||||
if not live: self.boring=0
|
||||
elif s==2 and live: d[i,j]=1 # Survival
|
||||
if not live: self.boring = 0
|
||||
elif s == 2 and live: d[i,j] = 1 # Survival
|
||||
elif live:
|
||||
# Death
|
||||
self.scr.addch(j+1, i+1, ' ')
|
||||
self.boring=0
|
||||
self.state=d
|
||||
self.boring = 0
|
||||
self.state = d
|
||||
self.scr.refresh()
|
||||
|
||||
def makeRandom(self):
|
||||
"Fill the board with a random pattern"
|
||||
self.state={}
|
||||
self.state = {}
|
||||
for i in range(0, self.X):
|
||||
for j in range(0, self.Y):
|
||||
if random.random() > 0.5: self.set(j,i)
|
||||
if random.random() > 0.5:
|
||||
self.set(j,i)
|
||||
|
||||
|
||||
def erase_menu(stdscr, menu_y):
|
||||
"Clear the space where the menu resides"
|
||||
stdscr.move(menu_y, 0) ; stdscr.clrtoeol()
|
||||
stdscr.move(menu_y+1, 0) ; stdscr.clrtoeol()
|
||||
stdscr.move(menu_y, 0)
|
||||
stdscr.clrtoeol()
|
||||
stdscr.move(menu_y+1, 0)
|
||||
stdscr.clrtoeol()
|
||||
|
||||
def display_menu(stdscr, menu_y):
|
||||
"Display the menu of possible keystroke commands"
|
||||
|
@ -140,18 +145,17 @@ def display_menu(stdscr, menu_y):
|
|||
stdscr.addstr(menu_y+1, 4,
|
||||
'E)rase the board, R)andom fill, S)tep once or C)ontinuously, Q)uit')
|
||||
|
||||
def main(stdscr):
|
||||
|
||||
def keyloop(stdscr):
|
||||
# Clear the screen and display the menu of keys
|
||||
stdscr.clear()
|
||||
stdscr_y, stdscr_x = stdscr.getmaxyx()
|
||||
menu_y=(stdscr_y-3)-1
|
||||
menu_y = (stdscr_y-3)-1
|
||||
display_menu(stdscr, menu_y)
|
||||
|
||||
# Allocate a subwindow for the Life board and create the board object
|
||||
subwin=stdscr.subwin(stdscr_y-3, stdscr_x, 0, 0)
|
||||
board=LifeBoard(subwin, char=ord('*'))
|
||||
board.display(update_board=0)
|
||||
subwin = stdscr.subwin(stdscr_y-3, stdscr_x, 0, 0)
|
||||
board = LifeBoard(subwin, char=ord('*'))
|
||||
board.display(update_board=False)
|
||||
|
||||
# xpos, ypos are the cursor's position
|
||||
xpos, ypos = board.X/2, board.Y/2
|
||||
|
@ -159,9 +163,9 @@ def main(stdscr):
|
|||
# Main loop:
|
||||
while (1):
|
||||
stdscr.move(1+ypos, 1+xpos) # Move the cursor
|
||||
c=stdscr.getch() # Get a keystroke
|
||||
c = stdscr.getch() # Get a keystroke
|
||||
if 0<c<256:
|
||||
c=chr(c)
|
||||
c = chr(c)
|
||||
if c in ' \n':
|
||||
board.toggle(ypos, xpos)
|
||||
elif c in 'Cc':
|
||||
|
@ -173,50 +177,40 @@ def main(stdscr):
|
|||
# if no keystroke is available, instead of waiting.
|
||||
stdscr.nodelay(1)
|
||||
while (1):
|
||||
c=stdscr.getch()
|
||||
if c!=-1: break
|
||||
stdscr.addstr(0,0, '/'); stdscr.refresh()
|
||||
c = stdscr.getch()
|
||||
if c != -1:
|
||||
break
|
||||
stdscr.addstr(0,0, '/')
|
||||
stdscr.refresh()
|
||||
board.display()
|
||||
stdscr.addstr(0,0, '+'); stdscr.refresh()
|
||||
stdscr.addstr(0,0, '+')
|
||||
stdscr.refresh()
|
||||
|
||||
stdscr.nodelay(0) # Disable nodelay mode
|
||||
display_menu(stdscr, menu_y)
|
||||
|
||||
elif c in 'Ee': board.erase()
|
||||
elif c in 'Qq': break
|
||||
elif c in 'Ee':
|
||||
board.erase()
|
||||
elif c in 'Qq':
|
||||
break
|
||||
elif c in 'Rr':
|
||||
board.makeRandom()
|
||||
board.display(update_board=0)
|
||||
board.display(update_board=False)
|
||||
elif c in 'Ss':
|
||||
board.display()
|
||||
else: pass # Ignore incorrect keys
|
||||
elif c==curses.KEY_UP and ypos>0: ypos=ypos-1
|
||||
elif c==curses.KEY_DOWN and ypos<board.Y-1: ypos=ypos+1
|
||||
elif c==curses.KEY_LEFT and xpos>0: xpos=xpos-1
|
||||
elif c==curses.KEY_RIGHT and xpos<board.X-1: xpos=xpos+1
|
||||
else: pass # Ignore incorrect keys
|
||||
elif c == curses.KEY_UP and ypos>0: ypos -= 1
|
||||
elif c == curses.KEY_DOWN and ypos<board.Y-1: ypos += 1
|
||||
elif c == curses.KEY_LEFT and xpos>0: xpos -= 1
|
||||
elif c == curses.KEY_RIGHT and xpos<board.X-1: xpos += 1
|
||||
else:
|
||||
# Ignore incorrect keys
|
||||
pass
|
||||
|
||||
if __name__=='__main__':
|
||||
try:
|
||||
# Initialize curses
|
||||
stdscr=curses.initscr()
|
||||
# Turn off echoing of keys, and enter cbreak mode,
|
||||
# where no buffering is performed on keyboard input
|
||||
curses.noecho() ; curses.cbreak()
|
||||
|
||||
# In keypad mode, escape sequences for special keys
|
||||
# (like the cursor keys) will be interpreted and
|
||||
# a special value like curses.KEY_LEFT will be returned
|
||||
stdscr.keypad(1)
|
||||
main(stdscr) # Enter the main loop
|
||||
# Set everything back to normal
|
||||
stdscr.keypad(0)
|
||||
curses.echo() ; curses.nocbreak()
|
||||
curses.endwin() # Terminate curses
|
||||
except:
|
||||
# In the event of an error, restore the terminal
|
||||
# to a sane state.
|
||||
stdscr.keypad(0)
|
||||
curses.echo() ; curses.nocbreak()
|
||||
curses.endwin()
|
||||
traceback.print_exc() # Print the exception
|
||||
def main(stdscr):
|
||||
keyloop(stdscr) # Enter the main loop
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
curses.wrapper(main)
|
||||
|
|
|
@ -48,7 +48,7 @@ def main(win):
|
|||
ypos[j] = randrange(0, r) + 2
|
||||
|
||||
j = 0
|
||||
while 1:
|
||||
while True:
|
||||
x = randrange(0, c) + 2
|
||||
y = randrange(0, r) + 2
|
||||
|
||||
|
@ -83,7 +83,7 @@ def main(win):
|
|||
|
||||
ch = stdscr.getch()
|
||||
if ch == ord('q') or ch == ord('Q'):
|
||||
return 0
|
||||
return
|
||||
elif ch == ord('s'):
|
||||
stdscr.nodelay(0)
|
||||
elif ch == ord(' '):
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
"""repeat <shell-command>
|
||||
|
||||
This simple program repeatedly (with 1-second intervals) executes the
|
||||
This simple program repeatedly (at 1-second intervals) executes the
|
||||
shell command given on the command line and displays the output (or as
|
||||
much of it as fits on the screen). It uses curses to paint each new
|
||||
output on top of the old output, so that if nothing changes, the
|
||||
|
@ -38,7 +38,7 @@ def main():
|
|||
sys.exit(sts)
|
||||
w = curses.initscr()
|
||||
try:
|
||||
while 1:
|
||||
while True:
|
||||
w.erase()
|
||||
try:
|
||||
w.addstr(text)
|
||||
|
|
|
@ -14,7 +14,8 @@ def sign(_x):
|
|||
return 1
|
||||
|
||||
def A2XY(angle, radius):
|
||||
return int(round(ASPECT * radius * sin(angle))), int(round(radius * cos(angle)))
|
||||
return (int(round(ASPECT * radius * sin(angle))),
|
||||
int(round(radius * cos(angle))))
|
||||
|
||||
def plot(x, y, col):
|
||||
stdscr.addch(y, x, col)
|
||||
|
@ -37,9 +38,9 @@ def dline(pair, from_x, from_y, x2, y2, ch):
|
|||
y = from_y
|
||||
|
||||
if ax > ay:
|
||||
d = ay - ax / 2
|
||||
d = ay - ax // 2
|
||||
|
||||
while 1:
|
||||
while True:
|
||||
plot(x, y, ch)
|
||||
if x == x2:
|
||||
return
|
||||
|
@ -50,9 +51,9 @@ def dline(pair, from_x, from_y, x2, y2, ch):
|
|||
x += sx
|
||||
d += ay
|
||||
else:
|
||||
d = ax - ay / 2
|
||||
d = ax - ay // 2
|
||||
|
||||
while 1:
|
||||
while True:
|
||||
plot(x, y, ch)
|
||||
if y == y2:
|
||||
return
|
||||
|
@ -78,12 +79,12 @@ def main(win):
|
|||
curses.init_pair(2, curses.COLOR_MAGENTA, my_bg)
|
||||
curses.init_pair(3, curses.COLOR_GREEN, my_bg)
|
||||
|
||||
cx = (curses.COLS - 1) / 2
|
||||
cy = curses.LINES / 2
|
||||
ch = min( cy-1, int(cx / ASPECT) - 1)
|
||||
mradius = (3 * ch) / 4
|
||||
hradius = ch / 2
|
||||
sradius = 5 * ch / 6
|
||||
cx = (curses.COLS - 1) // 2
|
||||
cy = curses.LINES // 2
|
||||
ch = min( cy-1, int(cx // ASPECT) - 1)
|
||||
mradius = (3 * ch) // 4
|
||||
hradius = ch // 2
|
||||
sradius = 5 * ch // 6
|
||||
|
||||
for i in range(0, 12):
|
||||
sangle = (i + 1) * 2.0 * pi / 12.0
|
||||
|
@ -96,7 +97,7 @@ def main(win):
|
|||
|
||||
sradius = max(sradius-4, 8)
|
||||
|
||||
while 1:
|
||||
while True:
|
||||
curses.napms(1000)
|
||||
|
||||
tim = time.time()
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# $Id$
|
||||
#
|
||||
# I'm dreaming of an ascii character-based monochrome Christmas,
|
||||
# Just like the one's I used to know!
|
||||
# Just like the ones I used to know!
|
||||
# Via a full duplex communications channel,
|
||||
# At 9600 bits per second,
|
||||
# Even though it's kinda slow.
|
||||
|
@ -272,7 +272,7 @@ def strng5():
|
|||
def blinkit():
|
||||
treescrn8.touchwin()
|
||||
|
||||
for cycle in range(0, 5):
|
||||
for cycle in range(5):
|
||||
if cycle == 0:
|
||||
treescrn3.overlay(treescrn8)
|
||||
treescrn8.refresh()
|
||||
|
@ -380,7 +380,7 @@ def reindeer():
|
|||
middeer0.refresh()
|
||||
w_del_msg.refresh()
|
||||
|
||||
for looper in range(0, 2):
|
||||
for looper in range(2):
|
||||
deer_step(middeer3, y_pos, x_pos)
|
||||
deer_step(middeer2, y_pos, x_pos)
|
||||
deer_step(middeer1, y_pos, x_pos)
|
||||
|
|
|
@ -19,4 +19,3 @@ mcast.py A Python translation of
|
|||
/usr/people/4Dgifts/examples/network/mcast.c
|
||||
(Note that IN.py is in ../../lib/sgi.)
|
||||
|
||||
See also ../../lib/nntp.py for another example of socket code.
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# Echo client demo using Unix sockets
|
||||
# Piet van Oostrum
|
||||
|
||||
from socket import *
|
||||
FILE = 'blabla'
|
||||
|
||||
FILE = 'unix-socket'
|
||||
s = socket(AF_UNIX, SOCK_STREAM)
|
||||
s.connect(FILE)
|
||||
s.send('Hello, world')
|
||||
|
|
|
@ -1,17 +1,24 @@
|
|||
# Echo server demo using Unix sockets (handles one connection only)
|
||||
# Piet van Oostrum
|
||||
|
||||
import os
|
||||
from socket import *
|
||||
FILE = 'blabla'
|
||||
|
||||
FILE = 'unix-socket'
|
||||
s = socket(AF_UNIX, SOCK_STREAM)
|
||||
s.bind(FILE)
|
||||
|
||||
print 'Sock name is: ['+s.getsockname()+']'
|
||||
|
||||
# Wait for a connection
|
||||
s.listen(1)
|
||||
conn, addr = s.accept()
|
||||
print 'Connected by', addr
|
||||
while 1:
|
||||
|
||||
while True:
|
||||
data = conn.recv(1024)
|
||||
if not data: break
|
||||
if not data:
|
||||
break
|
||||
conn.send(data)
|
||||
|
||||
conn.close()
|
||||
os.unlink(FILE)
|
||||
|
|
|
@ -4,14 +4,14 @@ from Coroutine import *
|
|||
|
||||
# fringe visits a nested list in inorder, and detaches for each non-list
|
||||
# element; raises EarlyExit after the list is exhausted
|
||||
def fringe( co, list ):
|
||||
def fringe(co, list):
|
||||
for x in list:
|
||||
if type(x) is type([]):
|
||||
fringe(co, x)
|
||||
else:
|
||||
co.back(x)
|
||||
|
||||
def printinorder( list ):
|
||||
def printinorder(list):
|
||||
co = Coroutine()
|
||||
f = co.create(fringe, co, list)
|
||||
try:
|
||||
|
@ -27,7 +27,7 @@ x = [0, 1, [2, [3]], [4,5], [[[6]]] ]
|
|||
printinorder(x) # 0 1 2 3 4 5 6
|
||||
|
||||
# fcmp lexicographically compares the fringes of two nested lists
|
||||
def fcmp( l1, l2 ):
|
||||
def fcmp(l1, l2):
|
||||
co1 = Coroutine(); f1 = co1.create(fringe, co1, l1)
|
||||
co2 = Coroutine(); f2 = co2.create(fringe, co2, l2)
|
||||
while 1:
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
#
|
||||
# XML-RPC SERVER
|
||||
# $Id$
|
||||
#
|
||||
# an asynchronous XML-RPC server for Medusa
|
||||
#
|
||||
# written by Sam Rushing
|
||||
#
|
||||
# Based on "xmlrpcserver.py" by Fredrik Lundh (fredrik@pythonware.com)
|
||||
#
|
||||
|
||||
import http_server
|
||||
import xmlrpclib
|
||||
|
||||
import sys
|
||||
|
||||
class xmlrpc_handler:
|
||||
|
||||
def match (self, request):
|
||||
# Note: /RPC2 is not required by the spec, so you may override this method.
|
||||
if request.uri[:5] == '/RPC2':
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def handle_request (self, request):
|
||||
[path, params, query, fragment] = request.split_uri()
|
||||
|
||||
if request.command.lower() in ('post', 'put'):
|
||||
request.collector = collector (self, request)
|
||||
else:
|
||||
request.error (400)
|
||||
|
||||
def continue_request (self, data, request):
|
||||
params, method = xmlrpclib.loads (data)
|
||||
try:
|
||||
# generate response
|
||||
try:
|
||||
response = self.call (method, params)
|
||||
response = (response,)
|
||||
except:
|
||||
# report exception back to server
|
||||
response = xmlrpclib.dumps (
|
||||
xmlrpclib.Fault (1, "%s:%s" % sys.exc_info()[:2])
|
||||
)
|
||||
else:
|
||||
response = xmlrpclib.dumps (response, methodresponse=1)
|
||||
except:
|
||||
# internal error, report as HTTP server error
|
||||
request.error (500)
|
||||
else:
|
||||
# got a valid XML RPC response
|
||||
request['Content-Type'] = 'text/xml'
|
||||
request.push (response)
|
||||
request.done()
|
||||
|
||||
def call (self, method, params):
|
||||
# override this method to implement RPC methods
|
||||
raise "NotYetImplemented"
|
||||
|
||||
class collector:
|
||||
|
||||
"gathers input for POST and PUT requests"
|
||||
|
||||
def __init__ (self, handler, request):
|
||||
|
||||
self.handler = handler
|
||||
self.request = request
|
||||
self.data = ''
|
||||
|
||||
# make sure there's a content-length header
|
||||
cl = request.get_header ('content-length')
|
||||
|
||||
if not cl:
|
||||
request.error (411)
|
||||
else:
|
||||
cl = int (cl)
|
||||
# using a 'numeric' terminator
|
||||
self.request.channel.set_terminator (cl)
|
||||
|
||||
def collect_incoming_data (self, data):
|
||||
self.data = self.data + data
|
||||
|
||||
def found_terminator (self):
|
||||
# set the terminator back to the default
|
||||
self.request.channel.set_terminator ('\r\n\r\n')
|
||||
self.handler.continue_request (self.data, self.request)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
class rpc_demo (xmlrpc_handler):
|
||||
|
||||
def call (self, method, params):
|
||||
print 'method="%s" params=%s' % (method, params)
|
||||
return "Sure, that works"
|
||||
|
||||
import asyncore
|
||||
import http_server
|
||||
|
||||
hs = http_server.http_server ('', 8000)
|
||||
rpc = rpc_demo()
|
||||
hs.install_handler (rpc)
|
||||
|
||||
asyncore.loop()
|
|
@ -1,75 +0,0 @@
|
|||
#
|
||||
# XML-RPC SERVER
|
||||
# $Id$
|
||||
#
|
||||
# a simple XML-RPC server for Python
|
||||
#
|
||||
# History:
|
||||
# 1999-02-01 fl added to xmlrpclib distribution
|
||||
#
|
||||
# written by Fredrik Lundh, January 1999.
|
||||
#
|
||||
# Copyright (c) 1999 by Secret Labs AB.
|
||||
# Copyright (c) 1999 by Fredrik Lundh.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted. This software is provided as is.
|
||||
# --------------------------------------------------------------------
|
||||
#
|
||||
|
||||
import SocketServer, BaseHTTPServer
|
||||
import xmlrpclib
|
||||
import sys
|
||||
|
||||
class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
|
||||
def do_POST(self):
|
||||
try:
|
||||
# get arguments
|
||||
data = self.rfile.read(int(self.headers["content-length"]))
|
||||
params, method = xmlrpclib.loads(data)
|
||||
|
||||
# generate response
|
||||
try:
|
||||
response = self.call(method, params)
|
||||
# wrap response in a singleton tuple
|
||||
response = (response,)
|
||||
except:
|
||||
# report exception back to server
|
||||
response = xmlrpclib.dumps(
|
||||
xmlrpclib.Fault(1, "%s:%s" % sys.exc_info()[:2])
|
||||
)
|
||||
else:
|
||||
response = xmlrpclib.dumps(
|
||||
response,
|
||||
methodresponse=1
|
||||
)
|
||||
except:
|
||||
# internal error, report as HTTP server error
|
||||
self.send_response(500)
|
||||
self.end_headers()
|
||||
else:
|
||||
# got a valid XML RPC response
|
||||
self.send_response(200)
|
||||
self.send_header("Content-type", "text/xml")
|
||||
self.send_header("Content-length", str(len(response)))
|
||||
self.end_headers()
|
||||
self.wfile.write(response)
|
||||
|
||||
# shut down the connection (from Skip Montanaro)
|
||||
self.wfile.flush()
|
||||
self.connection.shutdown(1)
|
||||
|
||||
def call(self, method, params):
|
||||
# override this method to implement RPC methods
|
||||
print "CALL", method, params
|
||||
return params
|
||||
|
||||
if __name__ == '__main__':
|
||||
server = SocketServer.TCPServer(('', 8000), RequestHandler)
|
||||
server.serve_forever()
|
|
@ -1,106 +1,133 @@
|
|||
#!/usr/bin/env python
|
||||
# Demo program for zlib; it compresses or decompresses files, but *doesn't*
|
||||
# delete the original. This doesn't support all of gzip's options.
|
||||
#
|
||||
# The 'gzip' module in the standard library provides a more complete
|
||||
# implementation of gzip-format files.
|
||||
|
||||
import zlib, sys, os
|
||||
|
||||
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
|
||||
|
||||
def write32(output, value):
|
||||
output.write(chr(value & 255)) ; value=value / 256
|
||||
output.write(chr(value & 255)) ; value=value / 256
|
||||
output.write(chr(value & 255)) ; value=value / 256
|
||||
output.write(chr(value & 255)) ; value=value // 256
|
||||
output.write(chr(value & 255)) ; value=value // 256
|
||||
output.write(chr(value & 255)) ; value=value // 256
|
||||
output.write(chr(value & 255))
|
||||
|
||||
def read32(input):
|
||||
v=ord(input.read(1))
|
||||
v=v+ (ord(input.read(1))<<8 )
|
||||
v=v+ (ord(input.read(1))<<16)
|
||||
v=v+ (ord(input.read(1))<<24)
|
||||
v = ord(input.read(1))
|
||||
v += (ord(input.read(1)) << 8 )
|
||||
v += (ord(input.read(1)) << 16)
|
||||
v += (ord(input.read(1)) << 24)
|
||||
return v
|
||||
|
||||
import zlib, sys
|
||||
if len(sys.argv)!=2:
|
||||
print 'Usage: minigzip.py <filename>'
|
||||
print ' The file will be compressed or decompressed.'
|
||||
sys.exit(0)
|
||||
|
||||
filename=sys.argv[1]
|
||||
compressing=1 ; outputname=filename+'.gz'
|
||||
if filename[-3:]=='.gz':
|
||||
compressing=0 ; outputname=filename[:-3]
|
||||
input=open(filename) ; output=open(outputname, 'w')
|
||||
|
||||
if compressing:
|
||||
def compress (filename, input, output):
|
||||
output.write('\037\213\010') # Write the header, ...
|
||||
output.write(chr(FNAME)) # ... flag byte ...
|
||||
|
||||
import os # ... modification time ...
|
||||
statval=os.stat(filename)
|
||||
mtime=statval[8]
|
||||
statval = os.stat(filename) # ... modification time ...
|
||||
mtime = statval[8]
|
||||
write32(output, mtime)
|
||||
output.write('\002') # ... slowest compression alg. ...
|
||||
output.write('\377') # ... OS (=unknown) ...
|
||||
output.write(filename+'\000') # ... original filename ...
|
||||
|
||||
crcval=zlib.crc32("")
|
||||
compobj=zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
|
||||
crcval = zlib.crc32("")
|
||||
compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
|
||||
zlib.DEF_MEM_LEVEL, 0)
|
||||
while (1):
|
||||
data=input.read(1024)
|
||||
if data=="": break
|
||||
crcval=zlib.crc32(data, crcval)
|
||||
while True:
|
||||
data = input.read(1024)
|
||||
if data == "":
|
||||
break
|
||||
crcval = zlib.crc32(data, crcval)
|
||||
output.write(compobj.compress(data))
|
||||
output.write(compobj.flush())
|
||||
write32(output, crcval) # ... the CRC ...
|
||||
write32(output, statval[6]) # and the file size.
|
||||
|
||||
else:
|
||||
magic=input.read(2)
|
||||
if magic!='\037\213':
|
||||
print 'Not a gzipped file' ; sys.exit(0)
|
||||
if ord(input.read(1))!=8:
|
||||
print 'Unknown compression method' ; sys.exit(0)
|
||||
flag=ord(input.read(1))
|
||||
def decompress (input, output):
|
||||
magic = input.read(2)
|
||||
if magic != '\037\213':
|
||||
print 'Not a gzipped file'
|
||||
sys.exit(0)
|
||||
if ord(input.read(1)) != 8:
|
||||
print 'Unknown compression method'
|
||||
sys.exit(0)
|
||||
flag = ord(input.read(1))
|
||||
input.read(4+1+1) # Discard modification time,
|
||||
# extra flags, and OS byte.
|
||||
if flag & FEXTRA:
|
||||
# Read & discard the extra field, if present
|
||||
xlen=ord(input.read(1))
|
||||
xlen=xlen+256*ord(input.read(1))
|
||||
xlen = ord(input.read(1))
|
||||
xlen += 256*ord(input.read(1))
|
||||
input.read(xlen)
|
||||
if flag & FNAME:
|
||||
# Read and discard a null-terminated string containing the filename
|
||||
while (1):
|
||||
s=input.read(1)
|
||||
if s=='\000': break
|
||||
while True:
|
||||
s = input.read(1)
|
||||
if s == '\0': break
|
||||
if flag & FCOMMENT:
|
||||
# Read and discard a null-terminated string containing a comment
|
||||
while (1):
|
||||
while True:
|
||||
s=input.read(1)
|
||||
if s=='\000': break
|
||||
if s=='\0': break
|
||||
if flag & FHCRC:
|
||||
input.read(2) # Read & discard the 16-bit header CRC
|
||||
decompobj=zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
crcval=zlib.crc32("")
|
||||
length=0
|
||||
while (1):
|
||||
|
||||
decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
|
||||
crcval = zlib.crc32("")
|
||||
length = 0
|
||||
while True:
|
||||
data=input.read(1024)
|
||||
if data=="": break
|
||||
decompdata=decompobj.decompress(data)
|
||||
print len(decompdata)
|
||||
output.write(decompdata) ; length=length+len(decompdata)
|
||||
crcval=zlib.crc32(decompdata, crcval)
|
||||
decompdata=decompobj.flush()
|
||||
output.write(decompdata) ; length=length+len(decompdata)
|
||||
crcval=zlib.crc32(decompdata, crcval)
|
||||
if data == "":
|
||||
break
|
||||
decompdata = decompobj.decompress(data)
|
||||
output.write(decompdata)
|
||||
length += len(decompdata)
|
||||
crcval = zlib.crc32(decompdata, crcval)
|
||||
|
||||
decompdata = decompobj.flush()
|
||||
output.write(decompdata)
|
||||
length += len(decompdata)
|
||||
crcval = zlib.crc32(decompdata, crcval)
|
||||
|
||||
# We've read to the end of the file, so we have to rewind in order
|
||||
# to reread the 8 bytes containing the CRC and the file size. The
|
||||
# decompressor is smart and knows when to stop, so feeding it
|
||||
# extra data is harmless.
|
||||
input.seek(-8, 2)
|
||||
crc32=read32(input)
|
||||
isize=read32(input)
|
||||
if crc32!=crcval: print 'CRC check failed.'
|
||||
if isize!=length: print 'Incorrect length of data produced'
|
||||
crc32 = read32(input)
|
||||
isize = read32(input)
|
||||
if crc32 != crcval:
|
||||
print 'CRC check failed.'
|
||||
if isize != length:
|
||||
print 'Incorrect length of data produced'
|
||||
|
||||
input.close() ; output.close()
|
||||
def main():
|
||||
if len(sys.argv)!=2:
|
||||
print 'Usage: minigzip.py <filename>'
|
||||
print ' The file will be compressed or decompressed.'
|
||||
sys.exit(0)
|
||||
|
||||
filename = sys.argv[1]
|
||||
if filename.endswith('.gz'):
|
||||
compressing = False
|
||||
outputname = filename[:-3]
|
||||
else:
|
||||
compressing = True
|
||||
outputname = filename + '.gz'
|
||||
|
||||
input = open(filename, 'rb')
|
||||
output = open(outputname, 'wb')
|
||||
|
||||
if compressing:
|
||||
compress(filename, input, output)
|
||||
else:
|
||||
decompress(input, output)
|
||||
|
||||
input.close()
|
||||
output.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1,35 +1,48 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# Takes an optional filename, defaulting to this file itself.
|
||||
# Reads the file and compresses the content using level 1 and level 9
|
||||
# compression, printing a summary of the results.
|
||||
|
||||
import zlib, sys
|
||||
if len(sys.argv)>1: filename=sys.argv[1]
|
||||
else: filename='zlibdemo.py'
|
||||
print 'Reading', filename
|
||||
f=open(filename, 'r') # Get the data to compress
|
||||
s=f.read()
|
||||
f.close()
|
||||
|
||||
# First, we'll compress the string in one step
|
||||
comptext=zlib.compress(s, 1)
|
||||
decomp=zlib.decompress(comptext)
|
||||
def main():
|
||||
if len(sys.argv) > 1:
|
||||
filename = sys.argv[1]
|
||||
else:
|
||||
filename = sys.argv[0]
|
||||
print 'Reading', filename
|
||||
|
||||
print '1-step compression: (level 1)'
|
||||
print ' Original:', len(s), 'Compressed:', len(comptext),
|
||||
print 'Uncompressed:', len(decomp)
|
||||
f = open(filename, 'rb') # Get the data to compress
|
||||
s = f.read()
|
||||
f.close()
|
||||
|
||||
# Now, let's compress the string in stages; set chunk to work in smaller steps
|
||||
# First, we'll compress the string in one step
|
||||
comptext = zlib.compress(s, 1)
|
||||
decomp = zlib.decompress(comptext)
|
||||
|
||||
chunk=256
|
||||
compressor=zlib.compressobj(9)
|
||||
decompressor=zlib.decompressobj()
|
||||
comptext=decomp=''
|
||||
for i in range(0, len(s), chunk):
|
||||
comptext=comptext+compressor.compress(s[i:i+chunk])
|
||||
comptext=comptext+compressor.flush() # Don't forget to call flush()!!
|
||||
print '1-step compression: (level 1)'
|
||||
print ' Original:', len(s), 'Compressed:', len(comptext),
|
||||
print 'Uncompressed:', len(decomp)
|
||||
|
||||
for i in range(0, len(comptext), chunk):
|
||||
decomp=decomp+decompressor.decompress(comptext[i:i+chunk])
|
||||
decomp=decomp+decompressor.flush()
|
||||
# Now, let's compress the string in stages; set chunk to work in smaller steps
|
||||
|
||||
print 'Progressive compression (level 9):'
|
||||
print ' Original:', len(s), 'Compressed:', len(comptext),
|
||||
print 'Uncompressed:', len(decomp)
|
||||
chunk = 256
|
||||
compressor = zlib.compressobj(9)
|
||||
decompressor = zlib.decompressobj()
|
||||
comptext = decomp = ''
|
||||
for i in range(0, len(s), chunk):
|
||||
comptext = comptext+compressor.compress(s[i:i+chunk])
|
||||
# Don't forget to call flush()!!
|
||||
comptext = comptext + compressor.flush()
|
||||
|
||||
for i in range(0, len(comptext), chunk):
|
||||
decomp = decomp + decompressor.decompress(comptext[i:i+chunk])
|
||||
decomp=decomp+decompressor.flush()
|
||||
|
||||
print 'Progressive compression (level 9):'
|
||||
print ' Original:', len(s), 'Compressed:', len(comptext),
|
||||
print 'Uncompressed:', len(decomp)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1549,7 +1549,9 @@ to be able to test for its presence before using it.}
|
|||
Before using this slot, the caller should test whether it is present
|
||||
by using the
|
||||
\cfunction{PyType_HasFeature()}\ttindex{PyType_HasFeature()}
|
||||
function. If present, it may be \NULL, indicating that the object's
|
||||
function. If the flag is present, \member{bf_getcharbuffer} may be
|
||||
\NULL,
|
||||
indicating that the object's
|
||||
contents cannot be used as \emph{8-bit characters}.
|
||||
The slot function may also raise an error if the object's contents
|
||||
cannot be interpreted as 8-bit characters. For example, if the
|
||||
|
@ -1574,12 +1576,13 @@ to be able to test for its presence before using it.}
|
|||
|
||||
\begin{ctypedesc}[getreadbufferproc]{Py_ssize_t (*readbufferproc)
|
||||
(PyObject *self, Py_ssize_t segment, void **ptrptr)}
|
||||
Return a pointer to a readable segment of the buffer. This function
|
||||
Return a pointer to a readable segment of the buffer in
|
||||
\code{*\var{ptrptr}}. This function
|
||||
is allowed to raise an exception, in which case it must return
|
||||
\code{-1}. The \var{segment} which is passed must be zero or
|
||||
\code{-1}. The \var{segment} which is specified must be zero or
|
||||
positive, and strictly less than the number of segments returned by
|
||||
the \member{bf_getsegcount} slot function. On success, it returns
|
||||
the length of the buffer memory, and sets \code{*\var{ptrptr}} to a
|
||||
the length of the segment, and sets \code{*\var{ptrptr}} to a
|
||||
pointer to that memory.
|
||||
\end{ctypedesc}
|
||||
|
||||
|
@ -1608,8 +1611,9 @@ to be able to test for its presence before using it.}
|
|||
|
||||
\begin{ctypedesc}[getcharbufferproc]{Py_ssize_t (*charbufferproc)
|
||||
(PyObject *self, Py_ssize_t segment, const char **ptrptr)}
|
||||
Return the size of the memory buffer in \var{ptrptr} for segment
|
||||
\var{segment}. \code{*\var{ptrptr}} is set to the memory buffer.
|
||||
Return the size of the segment \var{segment} that \var{ptrptr}
|
||||
is set to. \code{*\var{ptrptr}} is set to the memory buffer.
|
||||
Returns \code{-1} on error.
|
||||
\end{ctypedesc}
|
||||
|
||||
|
||||
|
|
|
@ -849,7 +849,7 @@ version. This information includes:
|
|||
{long string}{}
|
||||
\lineiv{download_url}{location where the package may be downloaded}
|
||||
{URL}{(4)}
|
||||
\lineiv{classifiers}{a list of Trove classifiers}
|
||||
\lineiv{classifiers}{a list of classifiers}
|
||||
{list of strings}{(4)}
|
||||
\end{tableiv}
|
||||
|
||||
|
@ -2251,7 +2251,7 @@ are laid out in the following table.
|
|||
\lineiii{scripts}{A list of standalone script files to be built and installed}{a list of strings}
|
||||
\lineiii{ext_modules}{A list of Python extensions to be built}{A list of
|
||||
instances of \class{distutils.core.Extension}}
|
||||
\lineiii{classifiers}{A list of Trove categories for the package}{XXX link to better definition}
|
||||
\lineiii{classifiers}{A list of categories for the package}{The list of available categorizations is at \url{http://cheeseshop.python.org/pypi?:action=list_classifiers}.}
|
||||
\lineiii{distclass}{the \class{Distribution} class to use}{A subclass of \class{distutils.core.Distribution}}
|
||||
% What on earth is the use case for script_name?
|
||||
\lineiii{script_name}{The name of the setup.py script - defaults to \code{sys.argv[0]}}{a string}
|
||||
|
|
|
@ -952,7 +952,7 @@ sections \ref{doctest-simple-testmod} and
|
|||
|
||||
\begin{funcdesc}{testmod}{\optional{m}\optional{, name}\optional{,
|
||||
globs}\optional{, verbose}\optional{,
|
||||
isprivate}\optional{, report}\optional{,
|
||||
report}\optional{,
|
||||
optionflags}\optional{, extraglobs}\optional{,
|
||||
raise_on_error}\optional{, exclude_empty}}
|
||||
|
||||
|
@ -990,19 +990,14 @@ sections \ref{doctest-simple-testmod} and
|
|||
for function \function{testfile()} above, except that \var{globs}
|
||||
defaults to \code{\var{m}.__dict__}.
|
||||
|
||||
Optional argument \var{isprivate} specifies a function used to
|
||||
determine whether a name is private. The default function treats
|
||||
all names as public. \var{isprivate} can be set to
|
||||
\code{doctest.is_private} to skip over names that are
|
||||
private according to Python's underscore naming convention.
|
||||
\deprecated{2.4}{\var{isprivate} was a stupid idea -- don't use it.
|
||||
If you need to skip tests based on name, filter the list returned by
|
||||
\code{DocTestFinder.find()} instead.}
|
||||
|
||||
\versionchanged[The parameter \var{optionflags} was added]{2.3}
|
||||
|
||||
\versionchanged[The parameters \var{extraglobs}, \var{raise_on_error}
|
||||
and \var{exclude_empty} were added]{2.4}
|
||||
|
||||
\versionchanged[The optional argument \var{isprivate}, deprecated
|
||||
in 2.4, was removed]{2.5}
|
||||
|
||||
\end{funcdesc}
|
||||
|
||||
There's also a function to run the doctests associated with a single object.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
\moduleauthor{Peter Harris}{scav@blueyonder.co.uk}
|
||||
\moduleauthor{Raymond Hettinger}{python@rcn.com}
|
||||
\moduleauthor{Nick Coghlan}{ncoghlan@gmail.com}
|
||||
\sectionauthor{Peter Harris}{scav@blueyonder.co.uk}
|
||||
|
||||
\modulesynopsis{Higher-order functions and operations on callable objects.}
|
||||
|
@ -50,6 +51,51 @@ two:
|
|||
\end{verbatim}
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{update_wrapper}
|
||||
{wrapper, wrapped\optional{, assigned}\optional{, updated}}
|
||||
Update a wrapper function to look like the wrapped function. The optional
|
||||
arguments are tuples to specify which attributes of the original
|
||||
function are assigned directly to the matching attributes on the wrapper
|
||||
function and which attributes of the wrapper function are updated with
|
||||
the corresponding attributes from the original function. The default
|
||||
values for these arguments are the module level constants
|
||||
\var{WRAPPER_ASSIGNMENTS} (which assigns to the wrapper function's name,
|
||||
module and documentation string) and \var{WRAPPER_UPDATES} (which
|
||||
updates the wrapper function's instance dictionary).
|
||||
|
||||
The main intended use for this function is in decorator functions
|
||||
which wrap the decorated function and return the wrapper. If the
|
||||
wrapper function is not updated, the metadata of the returned function
|
||||
will reflect the wrapper definition rather than the original function
|
||||
definition, which is typically less than helpful.
|
||||
\end{funcdesc}
|
||||
|
||||
\begin{funcdesc}{wraps}
|
||||
{wrapped\optional{, assigned}\optional{, updated}}
|
||||
This is a convenience function for invoking
|
||||
\code{partial(update_wrapper, wrapped=wrapped, assigned=assigned, updated=updated)}
|
||||
as a function decorator when defining a wrapper function. For example:
|
||||
\begin{verbatim}
|
||||
>>> def my_decorator(f):
|
||||
... @wraps(f)
|
||||
... def wrapper(*args, **kwds):
|
||||
... print 'Calling decorated function'
|
||||
... return f(*args, **kwds)
|
||||
... return wrapper
|
||||
...
|
||||
>>> @my_decorator
|
||||
... def example():
|
||||
... print 'Called example function'
|
||||
...
|
||||
>>> example()
|
||||
Calling decorated function
|
||||
Called example function
|
||||
>>> example.__name__
|
||||
'example'
|
||||
\end{verbatim}
|
||||
Without the use of this decorator factory, the name of the example
|
||||
function would have been \code{'wrapper'}.
|
||||
\end{funcdesc}
|
||||
|
||||
|
||||
\subsection{\class{partial} Objects \label{partial-objects}}
|
||||
|
|
|
@ -218,8 +218,9 @@ preference over \method{do_\var{tag}()}. The
|
|||
\end{methoddescni}
|
||||
|
||||
\begin{methoddescni}{do_\var{tag}}{attributes}
|
||||
This method is called to process an opening tag \var{tag} that does
|
||||
not come with a matching closing tag. The \var{attributes} argument
|
||||
This method is called to process an opening tag \var{tag}
|
||||
for which no \method{start_\var{tag}} method is defined.
|
||||
The \var{attributes} argument
|
||||
has the same meaning as described for \method{handle_starttag()} above.
|
||||
\end{methoddescni}
|
||||
|
||||
|
|
|
@ -548,7 +548,7 @@ are described in \ref{bltin-file-objects}, ``File Objects.'')
|
|||
The file object references a \cfunction{dup()}ped version of the
|
||||
socket file descriptor, so the file object and socket object may be
|
||||
closed or garbage-collected independently.
|
||||
The socket should be in blocking mode.
|
||||
The socket must be in blocking mode.
|
||||
\index{I/O control!buffering}The optional \var{mode}
|
||||
and \var{bufsize} arguments are interpreted the same way as by the
|
||||
built-in \function{file()} function; see ``Built-in Functions''
|
||||
|
@ -647,7 +647,7 @@ Timeout mode internally sets the socket in non-blocking mode. The
|
|||
blocking and timeout modes are shared between file descriptors and
|
||||
socket objects that refer to the same network endpoint. A consequence
|
||||
of this is that file objects returned by the \method{makefile()}
|
||||
method should only be used when the socket is in blocking mode; in
|
||||
method must only be used when the socket is in blocking mode; in
|
||||
timeout or non-blocking mode file operations that cannot be completed
|
||||
immediately will fail.
|
||||
|
||||
|
|
|
@ -6,6 +6,105 @@
|
|||
\sectionauthor{Gerhard Häring}{gh@ghaering.de}
|
||||
\versionadded{2.5}
|
||||
|
||||
SQLite is a C library that provides a SQL-language database that
|
||||
stores data in disk files without requiring a separate server process.
|
||||
pysqlite was written by Gerhard H\"aring and provides a SQL interface
|
||||
compliant with the DB-API 2.0 specification described by
|
||||
\pep{249}. This means that it should be possible to write the first
|
||||
version of your applications using SQLite for data storage. If
|
||||
switching to a larger database such as PostgreSQL or Oracle is
|
||||
later necessary, the switch should be relatively easy.
|
||||
|
||||
To use the module, you must first create a \class{Connection} object
|
||||
that represents the database. Here the data will be stored in the
|
||||
\file{/tmp/example} file:
|
||||
|
||||
\begin{verbatim}
|
||||
conn = sqlite3.connect('/tmp/example')
|
||||
\end{verbatim}
|
||||
|
||||
You can also supply the special name \samp{:memory:} to create
|
||||
a database in RAM.
|
||||
|
||||
Once you have a \class{Connection}, you can create a \class{Cursor}
|
||||
object and call its \method{execute()} method to perform SQL commands:
|
||||
|
||||
\begin{verbatim}
|
||||
c = conn.cursor()
|
||||
|
||||
# Create table
|
||||
c.execute('''create table stocks
|
||||
(date timestamp, trans varchar, symbol varchar,
|
||||
qty decimal, price decimal)''')
|
||||
|
||||
# Insert a row of data
|
||||
c.execute("""insert into stocks
|
||||
values ('2006-01-05','BUY','RHAT',100,35.14)""")
|
||||
\end{verbatim}
|
||||
|
||||
Usually your SQL operations will need to use values from Python
|
||||
variables. You shouldn't assemble your query using Python's string
|
||||
operations because doing so is insecure; it makes your program
|
||||
vulnerable to an SQL injection attack.
|
||||
|
||||
Instead, use the DB-API's parameter substitution. Put \samp{?} as a
|
||||
placeholder wherever you want to use a value, and then provide a tuple
|
||||
of values as the second argument to the cursor's \method{execute()}
|
||||
method. (Other database modules may use a different placeholder,
|
||||
such as \samp{\%s} or \samp{:1}.) For example:
|
||||
|
||||
\begin{verbatim}
|
||||
# Never do this -- insecure!
|
||||
symbol = 'IBM'
|
||||
c.execute("... where symbol = '%s'" % symbol)
|
||||
|
||||
# Do this instead
|
||||
t = (symbol,)
|
||||
c.execute('select * from stocks where symbol=?', t)
|
||||
|
||||
# Larger example
|
||||
for t in (('2006-03-28', 'BUY', 'IBM', 1000, 45.00),
|
||||
('2006-04-05', 'BUY', 'MSOFT', 1000, 72.00),
|
||||
('2006-04-06', 'SELL', 'IBM', 500, 53.00),
|
||||
):
|
||||
c.execute('insert into stocks values (?,?,?,?,?)', t)
|
||||
\end{verbatim}
|
||||
|
||||
To retrieve data after executing a SELECT statement, you can either
|
||||
treat the cursor as an iterator, call the cursor's \method{fetchone()}
|
||||
method to retrieve a single matching row,
|
||||
or call \method{fetchall()} to get a list of the matching rows.
|
||||
|
||||
This example uses the iterator form:
|
||||
|
||||
\begin{verbatim}
|
||||
>>> c = conn.cursor()
|
||||
>>> c.execute('select * from stocks order by price')
|
||||
>>> for row in c:
|
||||
... print row
|
||||
...
|
||||
(u'2006-01-05', u'BUY', u'RHAT', 100, 35.140000000000001)
|
||||
(u'2006-03-28', u'BUY', u'IBM', 1000, 45.0)
|
||||
(u'2006-04-06', u'SELL', u'IBM', 500, 53.0)
|
||||
(u'2006-04-05', u'BUY', u'MSOFT', 1000, 72.0)
|
||||
>>>
|
||||
\end{verbatim}
|
||||
|
||||
\begin{seealso}
|
||||
|
||||
\seeurl{http://www.pysqlite.org}
|
||||
{The pysqlite web page.}
|
||||
|
||||
\seeurl{http://www.sqlite.org}
|
||||
{The SQLite web page; the documentation describes the syntax and the
|
||||
available data types for the supported SQL dialect.}
|
||||
|
||||
\seepep{249}{Database API Specification 2.0}{PEP written by
|
||||
Marc-Andr\'e Lemburg.}
|
||||
|
||||
\end{seealso}
|
||||
|
||||
|
||||
\subsection{Module functions and constants\label{sqlite3-Module-Contents}}
|
||||
|
||||
\begin{datadesc}{PARSE_DECLTYPES}
|
||||
|
@ -467,7 +566,7 @@ connections.
|
|||
|
||||
If you want \strong{autocommit mode}, then set \member{isolation_level} to None.
|
||||
|
||||
Otherwise leave it at it's default, which will result in a plain "BEGIN"
|
||||
Otherwise leave it at its default, which will result in a plain "BEGIN"
|
||||
statement, or set it to one of SQLite's supported isolation levels: DEFERRED,
|
||||
IMMEDIATE or EXCLUSIVE.
|
||||
|
||||
|
|
|
@ -18,11 +18,13 @@ The \module{urllib2} module defines the following functions:
|
|||
Open the URL \var{url}, which can be either a string or a \class{Request}
|
||||
object.
|
||||
|
||||
\var{data} should be a string, which specifies additional data to
|
||||
send to the server. In HTTP requests, which are the only ones that
|
||||
support \var{data}, it should be a buffer in the format of
|
||||
\mimetype{application/x-www-form-urlencoded}, for example one returned
|
||||
from \function{urllib.urlencode()}.
|
||||
\var{data} may be a string specifying additional data to send to the
|
||||
server. Currently HTTP requests are the only ones that use \var{data};
|
||||
the HTTP request will be a POST instead of a GET when the \var{data}
|
||||
parameter is provided. \var{data} should be a buffer in the standard
|
||||
\mimetype{application/x-www-form-urlencoded} format. The
|
||||
\function{urllib.urlencode()} function takes a mapping or sequence of
|
||||
2-tuples and returns a string in this format.
|
||||
|
||||
This function returns a file-like object with two additional methods:
|
||||
|
||||
|
|
|
@ -95,14 +95,3 @@ audio tracks.
|
|||
|
||||
The \module{W} widgets are used extensively in the \program{IDE}.
|
||||
|
||||
\section{\module{waste} --- non-Apple \program{TextEdit} replacement}
|
||||
\declaremodule{standard}{waste}
|
||||
\platform{Mac}
|
||||
\modulesynopsis{Interface to the ``WorldScript-Aware Styled Text Engine.''}
|
||||
|
||||
\begin{seealso}
|
||||
\seetitle[http://www.merzwaren.com/waste/]{About WASTE}{Information
|
||||
about the WASTE widget and library, including
|
||||
documentation and downloads.}
|
||||
\end{seealso}
|
||||
|
||||
|
|
|
@ -1166,12 +1166,13 @@ In 2.5 the internal data structure has been customized for implementing sets,
|
|||
and as a result sets will use a third less memory and are somewhat faster.
|
||||
(Implemented by Raymond Hettinger.)
|
||||
|
||||
\item The speed of some Unicode operations, such as
|
||||
finding substrings, string splitting, and character map decoding, has
|
||||
been improved. (Substring search and splitting improvements were
|
||||
\item The speed of some Unicode operations, such as finding
|
||||
substrings, string splitting, and character map encoding and decoding,
|
||||
has been improved. (Substring search and splitting improvements were
|
||||
added by Fredrik Lundh and Andrew Dalke at the NeedForSpeed
|
||||
sprint. Character map decoding was improved by Walter D\"orwald.)
|
||||
% Patch 1313939
|
||||
sprint. Character maps were improved by Walter D\"orwald and
|
||||
Martin von~L\"owis.)
|
||||
% Patch 1313939, 1359618
|
||||
|
||||
\item The \function{long(\var{str}, \var{base})} function is now
|
||||
faster on long digit strings because fewer intermediate results are
|
||||
|
@ -1185,6 +1186,11 @@ strings into an internal representation and caches this
|
|||
representation, yielding a 20\% speedup. (Contributed by Bob Ippolito
|
||||
at the NeedForSpeed sprint.)
|
||||
|
||||
\item The \module{re} module got a 1 or 2\% speedup by switching to
|
||||
Python's allocator functions instead of the system's
|
||||
\cfunction{malloc()} and \cfunction{free()}.
|
||||
(Contributed by Jack Diederich at the NeedForSpeed sprint.)
|
||||
|
||||
\item The code generator's peephole optimizer now performs
|
||||
simple constant folding in expressions. If you write something like
|
||||
\code{a = 2+3}, the code generator will do the arithmetic and produce
|
||||
|
@ -1358,7 +1364,6 @@ to specify which generation to collect.
|
|||
now support a \code{key} keyword parameter similar to the one
|
||||
provided by the \function{min()}/\function{max()} functions
|
||||
and the \method{sort()} methods. For example:
|
||||
Example:
|
||||
|
||||
\begin{verbatim}
|
||||
>>> import heapq
|
||||
|
@ -1923,10 +1928,11 @@ variables. You shouldn't assemble your query using Python's string
|
|||
operations because doing so is insecure; it makes your program
|
||||
vulnerable to an SQL injection attack.
|
||||
|
||||
Instead, use SQLite's parameter substitution. Put \samp{?} as a
|
||||
Instead, use the DB-API's parameter substitution. Put \samp{?} as a
|
||||
placeholder wherever you want to use a value, and then provide a tuple
|
||||
of values as the second argument to the cursor's \method{execute()}
|
||||
method. For example:
|
||||
method. (Other database modules may use a different placeholder,
|
||||
such as \samp{\%s} or \samp{:1}.) For example:
|
||||
|
||||
\begin{verbatim}
|
||||
# Never do this -- insecure!
|
||||
|
|
|
@ -39,6 +39,8 @@ typedef struct _symtable_entry {
|
|||
unsigned ste_generator : 1; /* true if namespace is a generator */
|
||||
unsigned ste_varargs : 1; /* true if block has varargs */
|
||||
unsigned ste_varkeywords : 1; /* true if block has varkeywords */
|
||||
unsigned ste_returns_value : 1; /* true if namespace uses return with
|
||||
an argument */
|
||||
int ste_lineno; /* first line of block */
|
||||
int ste_opt_lineno; /* lineno of last exec or import * */
|
||||
int ste_tmpname; /* counter for listcomp temp vars */
|
||||
|
|
|
@ -650,6 +650,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
|
|||
const char *errors /* error handling */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
|
||||
PyObject* string /* 256 character map */
|
||||
);
|
||||
|
||||
|
||||
/* --- UTF-7 Codecs ------------------------------------------------------- */
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
|
||||
|
|
|
@ -343,6 +343,7 @@ def _openDBEnv(cachesize):
|
|||
e.set_cachesize(0, cachesize)
|
||||
else:
|
||||
raise error, "cachesize must be >= 20480"
|
||||
e.set_lk_detect(db.DB_LOCK_DEFAULT)
|
||||
e.open('.', db.DB_PRIVATE | db.DB_CREATE | db.DB_THREAD | db.DB_INIT_LOCK | db.DB_INIT_MPOOL)
|
||||
return e
|
||||
|
||||
|
|
|
@ -91,9 +91,14 @@ class DBEnv:
|
|||
return self._cobj.lock_stat(*args, **kwargs)
|
||||
def log_archive(self, *args, **kwargs):
|
||||
return self._cobj.log_archive(*args, **kwargs)
|
||||
|
||||
def set_get_returns_none(self, *args, **kwargs):
|
||||
return self._cobj.set_get_returns_none(*args, **kwargs)
|
||||
|
||||
if db.version() >= (4,0):
|
||||
def log_stat(self, *args, **kwargs):
|
||||
return apply(self._cobj.log_stat, args, kwargs)
|
||||
|
||||
if db.version() >= (4,1):
|
||||
def dbremove(self, *args, **kwargs):
|
||||
return self._cobj.dbremove(*args, **kwargs)
|
||||
|
@ -102,6 +107,10 @@ class DBEnv:
|
|||
def set_encrypt(self, *args, **kwargs):
|
||||
return self._cobj.set_encrypt(*args, **kwargs)
|
||||
|
||||
if db.version() >= (4,4):
|
||||
def lsn_reset(self, *args, **kwargs):
|
||||
return apply(self._cobj.lsn_reset, args, kwargs)
|
||||
|
||||
|
||||
class DB(DictMixin):
|
||||
def __init__(self, dbenv, *args, **kwargs):
|
||||
|
@ -208,3 +217,38 @@ class DB(DictMixin):
|
|||
if db.version() >= (4,1):
|
||||
def set_encrypt(self, *args, **kwargs):
|
||||
return self._cobj.set_encrypt(*args, **kwargs)
|
||||
|
||||
|
||||
class DBSequence:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._cobj = apply(db.DBSequence, args, kwargs)
|
||||
|
||||
def close(self, *args, **kwargs):
|
||||
return apply(self._cobj.close, args, kwargs)
|
||||
def get(self, *args, **kwargs):
|
||||
return apply(self._cobj.get, args, kwargs)
|
||||
def get_dbp(self, *args, **kwargs):
|
||||
return apply(self._cobj.get_dbp, args, kwargs)
|
||||
def get_key(self, *args, **kwargs):
|
||||
return apply(self._cobj.get_key, args, kwargs)
|
||||
def init_value(self, *args, **kwargs):
|
||||
return apply(self._cobj.init_value, args, kwargs)
|
||||
def open(self, *args, **kwargs):
|
||||
return apply(self._cobj.open, args, kwargs)
|
||||
def remove(self, *args, **kwargs):
|
||||
return apply(self._cobj.remove, args, kwargs)
|
||||
def stat(self, *args, **kwargs):
|
||||
return apply(self._cobj.stat, args, kwargs)
|
||||
def set_cachesize(self, *args, **kwargs):
|
||||
return apply(self._cobj.set_cachesize, args, kwargs)
|
||||
def set_flags(self, *args, **kwargs):
|
||||
return apply(self._cobj.set_flags, args, kwargs)
|
||||
def set_range(self, *args, **kwargs):
|
||||
return apply(self._cobj.set_range, args, kwargs)
|
||||
def get_cachesize(self, *args, **kwargs):
|
||||
return apply(self._cobj.get_cachesize, args, kwargs)
|
||||
def get_flags(self, *args, **kwargs):
|
||||
return apply(self._cobj.get_flags, args, kwargs)
|
||||
def get_range(self, *args, **kwargs):
|
||||
return apply(self._cobj.get_range, args, kwargs)
|
||||
>>>>>>> .merge-right.r46752
|
||||
|
|
|
@ -131,7 +131,8 @@ def contains_metastrings(s) :
|
|||
class bsdTableDB :
|
||||
def __init__(self, filename, dbhome, create=0, truncate=0, mode=0600,
|
||||
recover=0, dbflags=0):
|
||||
"""bsdTableDB.open(filename, dbhome, create=0, truncate=0, mode=0600)
|
||||
"""bsdTableDB(filename, dbhome, create=0, truncate=0, mode=0600)
|
||||
|
||||
Open database name in the dbhome BerkeleyDB directory.
|
||||
Use keyword arguments when calling this constructor.
|
||||
"""
|
||||
|
@ -218,7 +219,8 @@ class bsdTableDB :
|
|||
|
||||
|
||||
def CreateTable(self, table, columns):
|
||||
"""CreateTable(table, columns) - Create a new table in the database
|
||||
"""CreateTable(table, columns) - Create a new table in the database.
|
||||
|
||||
raises TableDBError if it already exists or for other DB errors.
|
||||
"""
|
||||
assert isinstance(columns, ListType)
|
||||
|
@ -286,7 +288,8 @@ class bsdTableDB :
|
|||
def CreateOrExtendTable(self, table, columns):
|
||||
"""CreateOrExtendTable(table, columns)
|
||||
|
||||
- Create a new table in the database.
|
||||
Create a new table in the database.
|
||||
|
||||
If a table of this name already exists, extend it to have any
|
||||
additional columns present in the given list as well as
|
||||
all of its current columns.
|
||||
|
@ -411,14 +414,15 @@ class bsdTableDB :
|
|||
|
||||
|
||||
def Modify(self, table, conditions={}, mappings={}):
|
||||
"""Modify(table, conditions) - Modify in rows matching 'conditions'
|
||||
using mapping functions in 'mappings'
|
||||
* conditions is a dictionary keyed on column names
|
||||
containing condition functions expecting the data string as an
|
||||
argument and returning a boolean.
|
||||
* mappings is a dictionary keyed on column names containint condition
|
||||
functions expecting the data string as an argument and returning the
|
||||
new string for that column.
|
||||
"""Modify(table, conditions={}, mappings={}) - Modify items in rows matching 'conditions' using mapping functions in 'mappings'
|
||||
|
||||
* table - the table name
|
||||
* conditions - a dictionary keyed on column names containing
|
||||
a condition callable expecting the data string as an
|
||||
argument and returning a boolean.
|
||||
* mappings - a dictionary keyed on column names containing a
|
||||
condition callable expecting the data string as an argument and
|
||||
returning the new string for that column.
|
||||
"""
|
||||
try:
|
||||
matching_rowids = self.__Select(table, [], conditions)
|
||||
|
@ -450,7 +454,8 @@ class bsdTableDB :
|
|||
txn.commit()
|
||||
txn = None
|
||||
|
||||
except DBError, dberror:
|
||||
# catch all exceptions here since we call unknown callables
|
||||
except:
|
||||
if txn:
|
||||
txn.abort()
|
||||
raise
|
||||
|
@ -461,9 +466,10 @@ class bsdTableDB :
|
|||
def Delete(self, table, conditions={}):
|
||||
"""Delete(table, conditions) - Delete items matching the given
|
||||
conditions from the table.
|
||||
* conditions is a dictionary keyed on column names
|
||||
containing condition functions expecting the data string as an
|
||||
argument and returning a boolean.
|
||||
|
||||
* conditions - a dictionary keyed on column names containing
|
||||
condition functions expecting the data string as an
|
||||
argument and returning a boolean.
|
||||
"""
|
||||
try:
|
||||
matching_rowids = self.__Select(table, [], conditions)
|
||||
|
@ -499,11 +505,12 @@ class bsdTableDB :
|
|||
|
||||
|
||||
def Select(self, table, columns, conditions={}):
|
||||
"""Select(table, conditions) - retrieve specific row data
|
||||
"""Select(table, columns, conditions) - retrieve specific row data
|
||||
Returns a list of row column->value mapping dictionaries.
|
||||
* columns is a list of which column data to return. If
|
||||
|
||||
* columns - a list of which column data to return. If
|
||||
columns is None, all columns will be returned.
|
||||
* conditions is a dictionary keyed on column names
|
||||
* conditions - a dictionary keyed on column names
|
||||
containing callable conditions expecting the data string as an
|
||||
argument and returning a boolean.
|
||||
"""
|
||||
|
|
|
@ -4,6 +4,12 @@
|
|||
import sys
|
||||
import os
|
||||
import unittest
|
||||
try:
|
||||
# For Pythons w/distutils pybsddb
|
||||
from bsddb3 import db
|
||||
except ImportError:
|
||||
# For Python 2.3
|
||||
from bsddb import db
|
||||
|
||||
verbose = 0
|
||||
if 'verbose' in sys.argv:
|
||||
|
@ -16,12 +22,6 @@ if 'silent' in sys.argv: # take care of old flag, just in case
|
|||
|
||||
|
||||
def print_versions():
|
||||
try:
|
||||
# For Pythons w/distutils pybsddb
|
||||
from bsddb3 import db
|
||||
except ImportError:
|
||||
# For Python 2.3
|
||||
from bsddb import db
|
||||
print
|
||||
print '-=' * 38
|
||||
print db.DB_VERSION_STRING
|
||||
|
@ -69,6 +69,8 @@ def suite():
|
|||
'test_queue',
|
||||
'test_recno',
|
||||
'test_thread',
|
||||
'test_sequence',
|
||||
'test_cursor_pget_bug',
|
||||
]
|
||||
|
||||
alltests = unittest.TestSuite()
|
||||
|
|
|
@ -659,12 +659,22 @@ class BasicTransactionTestCase(BasicTestCase):
|
|||
except db.DBIncompleteError:
|
||||
pass
|
||||
|
||||
if db.version() >= (4,0):
|
||||
statDict = self.env.log_stat(0);
|
||||
assert statDict.has_key('magic')
|
||||
assert statDict.has_key('version')
|
||||
assert statDict.has_key('cur_file')
|
||||
assert statDict.has_key('region_nowait')
|
||||
|
||||
# must have at least one log file present:
|
||||
logs = self.env.log_archive(db.DB_ARCH_ABS | db.DB_ARCH_LOG)
|
||||
assert logs != None
|
||||
for log in logs:
|
||||
if verbose:
|
||||
print 'log file: ' + log
|
||||
if db.version >= (4,2):
|
||||
logs = self.env.log_archive(db.DB_ARCH_REMOVE)
|
||||
assert not logs
|
||||
|
||||
self.txn = self.env.txn_begin()
|
||||
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
import unittest
|
||||
import sys, os, glob
|
||||
|
||||
try:
|
||||
# For Pythons w/distutils pybsddb
|
||||
from bsddb3 import db
|
||||
except ImportError:
|
||||
# For Python 2.3
|
||||
from bsddb import db
|
||||
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
class pget_bugTestCase(unittest.TestCase):
|
||||
"""Verify that cursor.pget works properly"""
|
||||
db_name = 'test-cursor_pget.db'
|
||||
|
||||
def setUp(self):
|
||||
self.homeDir = os.path.join(os.path.dirname(sys.argv[0]), 'db_home')
|
||||
try:
|
||||
os.mkdir(self.homeDir)
|
||||
except os.error:
|
||||
pass
|
||||
self.env = db.DBEnv()
|
||||
self.env.open(self.homeDir, db.DB_CREATE | db.DB_INIT_MPOOL)
|
||||
self.primary_db = db.DB(self.env)
|
||||
self.primary_db.open(self.db_name, 'primary', db.DB_BTREE, db.DB_CREATE)
|
||||
self.secondary_db = db.DB(self.env)
|
||||
self.secondary_db.set_flags(db.DB_DUP)
|
||||
self.secondary_db.open(self.db_name, 'secondary', db.DB_BTREE, db.DB_CREATE)
|
||||
self.primary_db.associate(self.secondary_db, lambda key, data: data)
|
||||
self.primary_db.put('salad', 'eggs')
|
||||
self.primary_db.put('spam', 'ham')
|
||||
self.primary_db.put('omelet', 'eggs')
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
self.secondary_db.close()
|
||||
self.primary_db.close()
|
||||
self.env.close()
|
||||
del self.secondary_db
|
||||
del self.primary_db
|
||||
del self.env
|
||||
for file in glob.glob(os.path.join(self.homeDir, '*')):
|
||||
os.remove(file)
|
||||
os.removedirs(self.homeDir)
|
||||
|
||||
def test_pget(self):
|
||||
cursor = self.secondary_db.cursor()
|
||||
|
||||
self.assertEquals(('eggs', 'salad', 'eggs'), cursor.pget(key='eggs', flags=db.DB_SET))
|
||||
self.assertEquals(('eggs', 'omelet', 'eggs'), cursor.pget(db.DB_NEXT_DUP))
|
||||
self.assertEquals(None, cursor.pget(db.DB_NEXT_DUP))
|
||||
|
||||
self.assertEquals(('ham', 'spam', 'ham'), cursor.pget('ham', 'spam', flags=db.DB_SET))
|
||||
self.assertEquals(None, cursor.pget(db.DB_NEXT_DUP))
|
||||
|
||||
cursor.close()
|
||||
|
||||
|
||||
def test_suite():
|
||||
return unittest.makeSuite(pget_bugTestCase)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='test_suite')
|
|
@ -339,6 +339,16 @@ class TableDBTestCase(unittest.TestCase):
|
|||
conditions={'Name': dbtables.LikeCond('%')},
|
||||
mappings={'Access': increment_access})
|
||||
|
||||
try:
|
||||
self.tdb.Modify(tabname,
|
||||
conditions={'Name': dbtables.LikeCond('%')},
|
||||
mappings={'Access': 'What is your quest?'})
|
||||
except TypeError:
|
||||
# success, the string value in mappings isn't callable
|
||||
pass
|
||||
else:
|
||||
raise RuntimeError, "why was TypeError not raised for bad callable?"
|
||||
|
||||
# Delete key in select conditions
|
||||
values = self.tdb.Select(
|
||||
tabname, None,
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
import unittest
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import glob
|
||||
|
||||
try:
|
||||
# For Pythons w/distutils pybsddb
|
||||
from bsddb3 import db
|
||||
except ImportError:
|
||||
from bsddb import db
|
||||
|
||||
from test_all import verbose
|
||||
|
||||
|
||||
class DBSequenceTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.int_32_max = 0x100000000
|
||||
self.homeDir = os.path.join(os.path.dirname(sys.argv[0]), 'db_home')
|
||||
try:
|
||||
os.mkdir(self.homeDir)
|
||||
except os.error:
|
||||
pass
|
||||
tempfile.tempdir = self.homeDir
|
||||
self.filename = os.path.split(tempfile.mktemp())[1]
|
||||
tempfile.tempdir = None
|
||||
|
||||
self.dbenv = db.DBEnv()
|
||||
self.dbenv.open(self.homeDir, db.DB_CREATE | db.DB_INIT_MPOOL, 0666)
|
||||
self.d = db.DB(self.dbenv)
|
||||
self.d.open(self.filename, db.DB_BTREE, db.DB_CREATE, 0666)
|
||||
|
||||
def tearDown(self):
|
||||
if hasattr(self, 'seq'):
|
||||
self.seq.close()
|
||||
del self.seq
|
||||
if hasattr(self, 'd'):
|
||||
self.d.close()
|
||||
del self.d
|
||||
if hasattr(self, 'dbenv'):
|
||||
self.dbenv.close()
|
||||
del self.dbenv
|
||||
|
||||
files = glob.glob(os.path.join(self.homeDir, '*'))
|
||||
for file in files:
|
||||
os.remove(file)
|
||||
|
||||
def test_get(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
start_value = 10 * self.int_32_max
|
||||
self.assertEqual(0xA00000000, start_value)
|
||||
self.assertEquals(None, self.seq.init_value(start_value))
|
||||
self.assertEquals(None, self.seq.open(key='id', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(start_value, self.seq.get(5))
|
||||
self.assertEquals(start_value + 5, self.seq.get())
|
||||
|
||||
def test_remove(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(None, self.seq.remove(txn=None, flags=0))
|
||||
del self.seq
|
||||
|
||||
def test_get_key(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
key = 'foo'
|
||||
self.assertEquals(None, self.seq.open(key=key, txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(key, self.seq.get_key())
|
||||
|
||||
def test_get_dbp(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(self.d, self.seq.get_dbp())
|
||||
|
||||
def test_cachesize(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
cashe_size = 10
|
||||
self.assertEquals(None, self.seq.set_cachesize(cashe_size))
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(cashe_size, self.seq.get_cachesize())
|
||||
|
||||
def test_flags(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
flag = db.DB_SEQ_WRAP;
|
||||
self.assertEquals(None, self.seq.set_flags(flag))
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(flag, self.seq.get_flags() & flag)
|
||||
|
||||
def test_range(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
seq_range = (10 * self.int_32_max, 11 * self.int_32_max - 1)
|
||||
self.assertEquals(None, self.seq.set_range(seq_range))
|
||||
self.seq.init_value(seq_range[0])
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
self.assertEquals(seq_range, self.seq.get_range())
|
||||
|
||||
def test_stat(self):
|
||||
self.seq = db.DBSequence(self.d, flags=0)
|
||||
self.assertEquals(None, self.seq.open(key='foo', txn=None, flags=db.DB_CREATE))
|
||||
stat = self.seq.stat()
|
||||
for param in ('nowait', 'min', 'max', 'value', 'current',
|
||||
'flags', 'cache_size', 'last_value', 'wait'):
|
||||
self.assertTrue(param in stat, "parameter %s isn't in stat info" % param)
|
||||
|
||||
def test_suite():
|
||||
suite = unittest.TestSuite()
|
||||
if db.version() >= (4,3):
|
||||
suite.addTest(unittest.makeSuite(DBSequenceTest))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(defaultTest='test_suite')
|
|
@ -40,41 +40,49 @@ class CFunctions(unittest.TestCase):
|
|||
|
||||
def test_short(self):
|
||||
self._dll.tf_h.restype = c_short
|
||||
self._dll.tf_h.argtypes = (c_short,)
|
||||
self.failUnlessEqual(self._dll.tf_h(-32766), -10922)
|
||||
self.failUnlessEqual(self.S(), -32766)
|
||||
|
||||
def test_short_plus(self):
|
||||
self._dll.tf_bh.restype = c_short
|
||||
self._dll.tf_bh.argtypes = (c_byte, c_short)
|
||||
self.failUnlessEqual(self._dll.tf_bh(0, -32766), -10922)
|
||||
self.failUnlessEqual(self.S(), -32766)
|
||||
|
||||
def test_ushort(self):
|
||||
self._dll.tf_H.restype = c_ushort
|
||||
self._dll.tf_H.argtypes = (c_ushort,)
|
||||
self.failUnlessEqual(self._dll.tf_H(65535), 21845)
|
||||
self.failUnlessEqual(self.U(), 65535)
|
||||
|
||||
def test_ushort_plus(self):
|
||||
self._dll.tf_bH.restype = c_ushort
|
||||
self._dll.tf_bH.argtypes = (c_byte, c_ushort)
|
||||
self.failUnlessEqual(self._dll.tf_bH(0, 65535), 21845)
|
||||
self.failUnlessEqual(self.U(), 65535)
|
||||
|
||||
def test_int(self):
|
||||
self._dll.tf_i.restype = c_int
|
||||
self._dll.tf_i.argtypes = (c_int,)
|
||||
self.failUnlessEqual(self._dll.tf_i(-2147483646), -715827882)
|
||||
self.failUnlessEqual(self.S(), -2147483646)
|
||||
|
||||
def test_int_plus(self):
|
||||
self._dll.tf_bi.restype = c_int
|
||||
self._dll.tf_bi.argtypes = (c_byte, c_int)
|
||||
self.failUnlessEqual(self._dll.tf_bi(0, -2147483646), -715827882)
|
||||
self.failUnlessEqual(self.S(), -2147483646)
|
||||
|
||||
def test_uint(self):
|
||||
self._dll.tf_I.restype = c_uint
|
||||
self._dll.tf_I.argtypes = (c_uint,)
|
||||
self.failUnlessEqual(self._dll.tf_I(4294967295), 1431655765)
|
||||
self.failUnlessEqual(self.U(), 4294967295)
|
||||
|
||||
def test_uint_plus(self):
|
||||
self._dll.tf_bI.restype = c_uint
|
||||
self._dll.tf_bI.argtypes = (c_byte, c_uint)
|
||||
self.failUnlessEqual(self._dll.tf_bI(0, 4294967295), 1431655765)
|
||||
self.failUnlessEqual(self.U(), 4294967295)
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ class PointersTestCase(unittest.TestCase):
|
|||
self.failUnlessEqual(p[0], 42)
|
||||
self.failUnlessEqual(p.contents.value, 42)
|
||||
|
||||
def test_charpp( self ):
|
||||
def test_charpp(self):
|
||||
"""Test that a character pointer-to-pointer is correctly passed"""
|
||||
dll = CDLL(_ctypes_test.__file__)
|
||||
func = dll._testfunc_c_p_p
|
||||
|
|
|
@ -138,8 +138,8 @@ class StructureTestCase(unittest.TestCase):
|
|||
self.failUnlessEqual(X.y.size, sizeof(c_char))
|
||||
|
||||
# readonly
|
||||
self.assertRaises(TypeError, setattr, X.x, "offset", 92)
|
||||
self.assertRaises(TypeError, setattr, X.x, "size", 92)
|
||||
self.assertRaises(AttributeError, setattr, X.x, "offset", 92)
|
||||
self.assertRaises(AttributeError, setattr, X.x, "size", 92)
|
||||
|
||||
class X(Union):
|
||||
_fields_ = [("x", c_int),
|
||||
|
@ -152,8 +152,8 @@ class StructureTestCase(unittest.TestCase):
|
|||
self.failUnlessEqual(X.y.size, sizeof(c_char))
|
||||
|
||||
# readonly
|
||||
self.assertRaises(TypeError, setattr, X.x, "offset", 92)
|
||||
self.assertRaises(TypeError, setattr, X.x, "size", 92)
|
||||
self.assertRaises(AttributeError, setattr, X.x, "offset", 92)
|
||||
self.assertRaises(AttributeError, setattr, X.x, "size", 92)
|
||||
|
||||
# XXX Should we check nested data types also?
|
||||
# offset is always relative to the class...
|
||||
|
|
|
@ -63,7 +63,6 @@ __all__ = [
|
|||
'REPORT_ONLY_FIRST_FAILURE',
|
||||
'REPORTING_FLAGS',
|
||||
# 1. Utility Functions
|
||||
'is_private',
|
||||
# 2. Example & DocTest
|
||||
'Example',
|
||||
'DocTest',
|
||||
|
@ -101,11 +100,6 @@ import unittest, difflib, pdb, tempfile
|
|||
import warnings
|
||||
from StringIO import StringIO
|
||||
|
||||
# Don't whine about the deprecated is_private function in this
|
||||
# module's tests.
|
||||
warnings.filterwarnings("ignore", "is_private", DeprecationWarning,
|
||||
__name__, 0)
|
||||
|
||||
# There are 4 basic classes:
|
||||
# - Example: a <source, want> pair, plus an intra-docstring line number.
|
||||
# - DocTest: a collection of examples, parsed from a docstring, plus
|
||||
|
@ -178,35 +172,6 @@ ELLIPSIS_MARKER = '...'
|
|||
## 1. Utility Functions
|
||||
######################################################################
|
||||
|
||||
def is_private(prefix, base):
|
||||
"""prefix, base -> true iff name prefix + "." + base is "private".
|
||||
|
||||
Prefix may be an empty string, and base does not contain a period.
|
||||
Prefix is ignored (although functions you write conforming to this
|
||||
protocol may make use of it).
|
||||
Return true iff base begins with an (at least one) underscore, but
|
||||
does not both begin and end with (at least) two underscores.
|
||||
|
||||
>>> is_private("a.b", "my_func")
|
||||
False
|
||||
>>> is_private("____", "_my_func")
|
||||
True
|
||||
>>> is_private("someclass", "__init__")
|
||||
False
|
||||
>>> is_private("sometypo", "__init_")
|
||||
True
|
||||
>>> is_private("x.y.z", "_")
|
||||
True
|
||||
>>> is_private("_x.y.z", "__")
|
||||
False
|
||||
>>> is_private("", "") # senseless but consistent
|
||||
False
|
||||
"""
|
||||
warnings.warn("is_private is deprecated; it wasn't useful; "
|
||||
"examine DocTestFinder.find() lists instead",
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return base[:1] == "_" and not base[:2] == "__" == base[-2:]
|
||||
|
||||
def _extract_future_flags(globs):
|
||||
"""
|
||||
Return the compiler-flags associated with the future features that
|
||||
|
@ -759,7 +724,7 @@ class DocTestFinder:
|
|||
"""
|
||||
|
||||
def __init__(self, verbose=False, parser=DocTestParser(),
|
||||
recurse=True, _namefilter=None, exclude_empty=True):
|
||||
recurse=True, exclude_empty=True):
|
||||
"""
|
||||
Create a new doctest finder.
|
||||
|
||||
|
@ -779,12 +744,8 @@ class DocTestFinder:
|
|||
self._verbose = verbose
|
||||
self._recurse = recurse
|
||||
self._exclude_empty = exclude_empty
|
||||
# _namefilter is undocumented, and exists only for temporary backward-
|
||||
# compatibility support of testmod's deprecated isprivate mess.
|
||||
self._namefilter = _namefilter
|
||||
|
||||
def find(self, obj, name=None, module=None, globs=None,
|
||||
extraglobs=None):
|
||||
def find(self, obj, name=None, module=None, globs=None, extraglobs=None):
|
||||
"""
|
||||
Return a list of the DocTests that are defined by the given
|
||||
object's docstring, or by any of its contained objects'
|
||||
|
@ -862,13 +823,6 @@ class DocTestFinder:
|
|||
self._find(tests, obj, name, module, source_lines, globs, {})
|
||||
return tests
|
||||
|
||||
def _filter(self, obj, prefix, base):
|
||||
"""
|
||||
Return true if the given object should not be examined.
|
||||
"""
|
||||
return (self._namefilter is not None and
|
||||
self._namefilter(prefix, base))
|
||||
|
||||
def _from_module(self, module, object):
|
||||
"""
|
||||
Return true if the given object is defined in the given
|
||||
|
@ -910,9 +864,6 @@ class DocTestFinder:
|
|||
# Look for tests in a module's contained objects.
|
||||
if inspect.ismodule(obj) and self._recurse:
|
||||
for valname, val in obj.__dict__.items():
|
||||
# Check if this contained object should be ignored.
|
||||
if self._filter(val, name, valname):
|
||||
continue
|
||||
valname = '%s.%s' % (name, valname)
|
||||
# Recurse to functions & classes.
|
||||
if ((inspect.isfunction(val) or inspect.isclass(val)) and
|
||||
|
@ -941,9 +892,6 @@ class DocTestFinder:
|
|||
# Look for tests in a class's contained objects.
|
||||
if inspect.isclass(obj) and self._recurse:
|
||||
for valname, val in obj.__dict__.items():
|
||||
# Check if this contained object should be ignored.
|
||||
if self._filter(val, name, valname):
|
||||
continue
|
||||
# Special handling for staticmethod/classmethod.
|
||||
if isinstance(val, staticmethod):
|
||||
val = getattr(obj, valname)
|
||||
|
@ -1751,17 +1699,16 @@ class DebugRunner(DocTestRunner):
|
|||
# class, updated by testmod.
|
||||
master = None
|
||||
|
||||
def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None,
|
||||
def testmod(m=None, name=None, globs=None, verbose=None,
|
||||
report=True, optionflags=0, extraglobs=None,
|
||||
raise_on_error=False, exclude_empty=False):
|
||||
"""m=None, name=None, globs=None, verbose=None, isprivate=None,
|
||||
report=True, optionflags=0, extraglobs=None, raise_on_error=False,
|
||||
"""m=None, name=None, globs=None, verbose=None, report=True,
|
||||
optionflags=0, extraglobs=None, raise_on_error=False,
|
||||
exclude_empty=False
|
||||
|
||||
Test examples in docstrings in functions and classes reachable
|
||||
from module m (or the current module if m is not supplied), starting
|
||||
with m.__doc__. Unless isprivate is specified, private names
|
||||
are not skipped.
|
||||
with m.__doc__.
|
||||
|
||||
Also test examples reachable from dict m.__test__ if it exists and is
|
||||
not None. m.__test__ maps names to functions, classes and strings;
|
||||
|
@ -1810,13 +1757,6 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None,
|
|||
first unexpected exception or failure. This allows failures to be
|
||||
post-mortem debugged.
|
||||
|
||||
Deprecated in Python 2.4:
|
||||
Optional keyword arg "isprivate" specifies a function used to
|
||||
determine whether a name is private. The default function is
|
||||
treat all functions as public. Optionally, "isprivate" can be
|
||||
set to doctest.is_private to skip over functions marked as private
|
||||
using the underscore naming convention; see its docs for details.
|
||||
|
||||
Advanced tomfoolery: testmod runs methods of a local instance of
|
||||
class doctest.Tester, then merges the results into (or creates)
|
||||
global Tester instance doctest.master. Methods of doctest.master
|
||||
|
@ -1827,11 +1767,6 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None,
|
|||
"""
|
||||
global master
|
||||
|
||||
if isprivate is not None:
|
||||
warnings.warn("the isprivate argument is deprecated; "
|
||||
"examine DocTestFinder.find() lists instead",
|
||||
DeprecationWarning)
|
||||
|
||||
# If no module was given, then use __main__.
|
||||
if m is None:
|
||||
# DWA - m will still be None if this wasn't invoked from the command
|
||||
|
@ -1848,7 +1783,7 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None,
|
|||
name = m.__name__
|
||||
|
||||
# Find, parse, and run all tests in the given module.
|
||||
finder = DocTestFinder(_namefilter=isprivate, exclude_empty=exclude_empty)
|
||||
finder = DocTestFinder(exclude_empty=exclude_empty)
|
||||
|
||||
if raise_on_error:
|
||||
runner = DebugRunner(verbose=verbose, optionflags=optionflags)
|
||||
|
@ -2021,8 +1956,7 @@ def run_docstring_examples(f, globs, verbose=False, name="NoName",
|
|||
# actually used in any way.
|
||||
|
||||
class Tester:
|
||||
def __init__(self, mod=None, globs=None, verbose=None,
|
||||
isprivate=None, optionflags=0):
|
||||
def __init__(self, mod=None, globs=None, verbose=None, optionflags=0):
|
||||
|
||||
warnings.warn("class Tester is deprecated; "
|
||||
"use class doctest.DocTestRunner instead",
|
||||
|
@ -2037,9 +1971,8 @@ class Tester:
|
|||
self.globs = globs
|
||||
|
||||
self.verbose = verbose
|
||||
self.isprivate = isprivate
|
||||
self.optionflags = optionflags
|
||||
self.testfinder = DocTestFinder(_namefilter=isprivate)
|
||||
self.testfinder = DocTestFinder()
|
||||
self.testrunner = DocTestRunner(verbose=verbose,
|
||||
optionflags=optionflags)
|
||||
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x3F, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x5A, # EXCLAMATION MARK
|
||||
0x0022: 0x7F, # QUOTATION MARK
|
||||
0x0023: 0x7B, # NUMBER SIGN
|
||||
0x0024: 0x5B, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0x7C, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0xBA, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xE0, # REVERSE SOLIDUS
|
||||
0x005D: 0xBB, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0xB0, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0xC0, # LEFT CURLY BRACKET
|
||||
0x007C: 0x4F, # VERTICAL LINE
|
||||
0x007D: 0xD0, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xA1, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008A: 0x2A, # CONTROL
|
||||
0x008B: 0x2B, # CONTROL
|
||||
0x008C: 0x2C, # CONTROL
|
||||
0x008D: 0x09, # CONTROL
|
||||
0x008E: 0x0A, # CONTROL
|
||||
0x008F: 0x1B, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1A, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009A: 0x3A, # CONTROL
|
||||
0x009B: 0x3B, # CONTROL
|
||||
0x009C: 0x04, # CONTROL
|
||||
0x009D: 0x14, # CONTROL
|
||||
0x009E: 0x3E, # CONTROL
|
||||
0x009F: 0xFF, # CONTROL
|
||||
0x00A0: 0x41, # NO-BREAK SPACE
|
||||
0x00A1: 0xAA, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0x4A, # CENT SIGN
|
||||
0x00A3: 0xB1, # POUND SIGN
|
||||
0x00A4: 0x9F, # CURRENCY SIGN
|
||||
0x00A5: 0xB2, # YEN SIGN
|
||||
0x00A6: 0x6A, # BROKEN BAR
|
||||
0x00A7: 0xB5, # SECTION SIGN
|
||||
0x00A8: 0xBD, # DIAERESIS
|
||||
0x00A9: 0xB4, # COPYRIGHT SIGN
|
||||
0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0x5F, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00AE: 0xAF, # REGISTERED SIGN
|
||||
0x00AF: 0xBC, # MACRON
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0x8F, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xBE, # ACUTE ACCENT
|
||||
0x00B5: 0xA0, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB3, # MIDDLE DOT
|
||||
0x00B8: 0x9D, # CEDILLA
|
||||
0x00B9: 0xDA, # SUPERSCRIPT ONE
|
||||
0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xB8, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xAB, # INVERTED QUESTION MARK
|
||||
0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE
|
||||
0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xBF, # MULTIPLICATION SIGN
|
||||
0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0x9C, # LATIN SMALL LIGATURE AE
|
||||
0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xE1, # DIVISION SIGN
|
||||
0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,262 +303,5 @@ decoding_table = (
|
|||
u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x060C: 0xAB, # ARABIC COMMA
|
||||
0x061B: 0xAC, # ARABIC SEMICOLON
|
||||
0x061F: 0xAE, # ARABIC QUESTION MARK
|
||||
0x06F0: 0xA1, # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
0x06F1: 0xA2, # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
0x06F2: 0xA3, # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
0x06F3: 0xA4, # EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
0x06F4: 0xA5, # EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
0x06F5: 0xA6, # EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
0x06F6: 0xA7, # EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
0x06F7: 0xA8, # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
0x06F8: 0xA9, # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
0x06F9: 0xAA, # EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
0xFB56: 0xB5, # ARABIC LETTER PEH ISOLATED FORM
|
||||
0xFB58: 0xB6, # ARABIC LETTER PEH INITIAL FORM
|
||||
0xFB66: 0xBA, # ARABIC LETTER TTEH ISOLATED FORM
|
||||
0xFB68: 0xBB, # ARABIC LETTER TTEH INITIAL FORM
|
||||
0xFB7A: 0xC0, # ARABIC LETTER TCHEH ISOLATED FORM
|
||||
0xFB7C: 0xC1, # ARABIC LETTER TCHEH INITIAL FORM
|
||||
0xFB84: 0xC7, # ARABIC LETTER DAHAL ISOLATED FORMN
|
||||
0xFB8A: 0xCC, # ARABIC LETTER JEH ISOLATED FORM
|
||||
0xFB8C: 0xCA, # ARABIC LETTER RREH ISOLATED FORM
|
||||
0xFB92: 0xE5, # ARABIC LETTER GAF ISOLATED FORM
|
||||
0xFB94: 0xE6, # ARABIC LETTER GAF INITIAL FORM
|
||||
0xFB9E: 0xEC, # ARABIC LETTER NOON GHUNNA ISOLATED FORM
|
||||
0xFBA6: 0xF1, # ARABIC LETTER HEH GOAL ISOLATED FORM
|
||||
0xFBA8: 0xF2, # ARABIC LETTER HEH GOAL INITIAL FORM
|
||||
0xFBA9: 0xF3, # ARABIC LETTER HEH GOAL MEDIAL FORM
|
||||
0xFBAA: 0xF4, # ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
|
||||
0xFBAE: 0xFD, # ARABIC LETTER YEH BARREE ISOLATED FORM
|
||||
0xFBB0: 0xFC, # ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
|
||||
0xFE7C: 0xFE, # ARABIC SHADDA ISOLATED FORM
|
||||
0xFE7D: 0xFF, # ARABIC SHADDA MEDIAL FORM
|
||||
0xFE80: 0xF5, # ARABIC LETTER HAMZA ISOLATED FORM
|
||||
0xFE81: 0xAF, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
|
||||
0xFE85: 0xEF, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
|
||||
0xFE89: 0xF6, # ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
|
||||
0xFE8A: 0xF7, # ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
|
||||
0xFE8B: 0xF8, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
|
||||
0xFE8D: 0xB0, # ARABIC LETTER ALEF ISOLATED FORM
|
||||
0xFE8E: None, # ARABIC LETTER ALEF FINAL FORM
|
||||
0xFE8F: 0xB3, # ARABIC LETTER BEH ISOLATED FORM
|
||||
0xFE91: 0xB4, # ARABIC LETTER BEH INITIAL FORM
|
||||
0xFE93: 0xB7, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
|
||||
0xFE95: 0xB8, # ARABIC LETTER TEH ISOLATED FORM
|
||||
0xFE97: 0xB9, # ARABIC LETTER TEH INITIAL FORM
|
||||
0xFE99: 0xBC, # ARABIC LETTER THEH ISOLATED FORM
|
||||
0xFE9B: 0xBD, # ARABIC LETTER THEH INITIAL FORM
|
||||
0xFE9D: 0xBE, # ARABIC LETTER JEEM ISOLATED FORM
|
||||
0xFE9F: 0xBF, # ARABIC LETTER JEEM INITIAL FORM
|
||||
0xFEA1: 0xC2, # ARABIC LETTER HAH ISOLATED FORM
|
||||
0xFEA3: 0xC3, # ARABIC LETTER HAH INITIAL FORM
|
||||
0xFEA5: 0xC4, # ARABIC LETTER KHAH ISOLATED FORM
|
||||
0xFEA7: 0xC5, # ARABIC LETTER KHAH INITIAL FORM
|
||||
0xFEA9: 0xC6, # ARABIC LETTER DAL ISOLATED FORM
|
||||
0xFEAB: 0xC8, # ARABIC LETTER THAL ISOLATED FORM
|
||||
0xFEAD: 0xC9, # ARABIC LETTER REH ISOLATED FORM
|
||||
0xFEAF: 0xCB, # ARABIC LETTER ZAIN ISOLATED FORM
|
||||
0xFEB1: 0xCD, # ARABIC LETTER SEEN ISOLATED FORM
|
||||
0xFEB3: 0xCE, # ARABIC LETTER SEEN INITIAL FORM
|
||||
0xFEB5: 0xCF, # ARABIC LETTER SHEEN ISOLATED FORM
|
||||
0xFEB7: 0xD0, # ARABIC LETTER SHEEN INITIAL FORM
|
||||
0xFEB9: 0xD1, # ARABIC LETTER SAD ISOLATED FORM
|
||||
0xFEBB: 0xD2, # ARABIC LETTER SAD INITIAL FORM
|
||||
0xFEBD: 0xD3, # ARABIC LETTER DAD ISOLATED FORM
|
||||
0xFEBF: 0xD4, # ARABIC LETTER DAD INITIAL FORM
|
||||
0xFEC1: 0xD5, # ARABIC LETTER TAH ISOLATED FORM
|
||||
0xFEC5: 0xD6, # ARABIC LETTER ZAH ISOLATED FORM
|
||||
0xFEC9: 0xD7, # ARABIC LETTER AIN ISOLATED FORM
|
||||
0xFECA: 0xD8, # ARABIC LETTER AIN FINAL FORM
|
||||
0xFECB: 0xD9, # ARABIC LETTER AIN INITIAL FORM
|
||||
0xFECC: 0xDA, # ARABIC LETTER AIN MEDIAL FORM
|
||||
0xFECD: 0xDB, # ARABIC LETTER GHAIN ISOLATED FORM
|
||||
0xFECE: 0xDC, # ARABIC LETTER GHAIN FINAL FORM
|
||||
0xFECF: 0xDD, # ARABIC LETTER GHAIN INITIAL FORM
|
||||
0xFED0: 0xDE, # ARABIC LETTER GHAIN MEDIAL FORM
|
||||
0xFED1: 0xDF, # ARABIC LETTER FEH ISOLATED FORM
|
||||
0xFED3: 0xE0, # ARABIC LETTER FEH INITIAL FORM
|
||||
0xFED5: 0xE1, # ARABIC LETTER QAF ISOLATED FORM
|
||||
0xFED7: 0xE2, # ARABIC LETTER QAF INITIAL FORM
|
||||
0xFED9: 0xE3, # ARABIC LETTER KAF ISOLATED FORM
|
||||
0xFEDB: 0xE4, # ARABIC LETTER KAF INITIAL FORM
|
||||
0xFEDD: 0xE7, # ARABIC LETTER LAM ISOLATED FORM
|
||||
0xFEDF: 0xE8, # ARABIC LETTER LAM INITIAL FORM
|
||||
0xFEE0: 0xE9, # ARABIC LETTER LAM MEDIAL FORM
|
||||
0xFEE1: 0xEA, # ARABIC LETTER MEEM ISOLATED FORM
|
||||
0xFEE3: 0xEB, # ARABIC LETTER MEEM INITIAL FORM
|
||||
0xFEE5: 0xED, # ARABIC LETTER NOON ISOLATED FORM
|
||||
0xFEE7: 0xEE, # ARABIC LETTER NOON INITIAL FORM
|
||||
0xFEED: 0xF0, # ARABIC LETTER WAW ISOLATED FORM
|
||||
0xFEF1: 0xF9, # ARABIC LETTER YEH ISOLATED FORM
|
||||
0xFEF2: 0xFA, # ARABIC LETTER YEH FINAL FORM
|
||||
0xFEF3: 0xFB, # ARABIC LETTER YEH INITIAL FORM
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x3F, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x4F, # EXCLAMATION MARK
|
||||
0x0022: 0xFC, # QUOTATION MARK
|
||||
0x0023: 0xEC, # NUMBER SIGN
|
||||
0x0024: 0xAD, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0xAE, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x68, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xDC, # REVERSE SOLIDUS
|
||||
0x005D: 0xAC, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5F, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x8D, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x48, # LEFT CURLY BRACKET
|
||||
0x007C: 0xBB, # VERTICAL LINE
|
||||
0x007D: 0x8C, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xCC, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008A: 0x2A, # CONTROL
|
||||
0x008B: 0x2B, # CONTROL
|
||||
0x008C: 0x2C, # CONTROL
|
||||
0x008D: 0x09, # CONTROL
|
||||
0x008E: 0x0A, # CONTROL
|
||||
0x008F: 0x1B, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1A, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009A: 0x3A, # CONTROL
|
||||
0x009B: 0x3B, # CONTROL
|
||||
0x009C: 0x04, # CONTROL
|
||||
0x009D: 0x14, # CONTROL
|
||||
0x009E: 0x3E, # CONTROL
|
||||
0x009F: 0xFF, # CONTROL
|
||||
0x00A0: 0x41, # NO-BREAK SPACE
|
||||
0x00A1: 0xAA, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xB0, # CENT SIGN
|
||||
0x00A3: 0xB1, # POUND SIGN
|
||||
0x00A4: 0x9F, # CURRENCY SIGN
|
||||
0x00A5: 0xB2, # YEN SIGN
|
||||
0x00A6: 0x8E, # BROKEN BAR
|
||||
0x00A7: 0xB5, # SECTION SIGN
|
||||
0x00A8: 0xBD, # DIAERESIS
|
||||
0x00A9: 0xB4, # COPYRIGHT SIGN
|
||||
0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xBA, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00AE: 0xAF, # REGISTERED SIGN
|
||||
0x00AF: 0xBC, # MACRON
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0x8F, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xBE, # ACUTE ACCENT
|
||||
0x00B5: 0xA0, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB3, # MIDDLE DOT
|
||||
0x00B8: 0x9D, # CEDILLA
|
||||
0x00B9: 0xDA, # SUPERSCRIPT ONE
|
||||
0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xB8, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xAB, # INVERTED QUESTION MARK
|
||||
0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE
|
||||
0x00C7: 0x4A, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x7B, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xBF, # MULTIPLICATION SIGN
|
||||
0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x7F, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0x9C, # LATIN SMALL LIGATURE AE
|
||||
0x00E7: 0xC0, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xA1, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xE1, # DIVISION SIGN
|
||||
0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xE0, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x011E: 0x5A, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x011F: 0xD0, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x0130: 0x5B, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0131: 0x79, # LATIN SMALL LETTER DOTLESS I
|
||||
0x015E: 0x7C, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0x6A, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x3F, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x5A, # EXCLAMATION MARK
|
||||
0x0022: 0x7F, # QUOTATION MARK
|
||||
0x0023: 0x7B, # NUMBER SIGN
|
||||
0x0024: 0x5B, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0x7C, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0xBA, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xE0, # REVERSE SOLIDUS
|
||||
0x005D: 0xBB, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0xB0, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0xC0, # LEFT CURLY BRACKET
|
||||
0x007C: 0x4F, # VERTICAL LINE
|
||||
0x007D: 0xD0, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xA1, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008A: 0x2A, # CONTROL
|
||||
0x008B: 0x2B, # CONTROL
|
||||
0x008C: 0x2C, # CONTROL
|
||||
0x008D: 0x09, # CONTROL
|
||||
0x008E: 0x0A, # CONTROL
|
||||
0x008F: 0x1B, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1A, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009A: 0x3A, # CONTROL
|
||||
0x009B: 0x3B, # CONTROL
|
||||
0x009C: 0x04, # CONTROL
|
||||
0x009D: 0x14, # CONTROL
|
||||
0x009E: 0x3E, # CONTROL
|
||||
0x009F: 0xFF, # CONTROL
|
||||
0x00A0: 0x41, # NO-BREAK SPACE
|
||||
0x00A1: 0xAA, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0x4A, # CENT SIGN
|
||||
0x00A3: 0xB1, # POUND SIGN
|
||||
0x00A5: 0xB2, # YEN SIGN
|
||||
0x00A6: 0x6A, # BROKEN BAR
|
||||
0x00A7: 0xB5, # SECTION SIGN
|
||||
0x00A8: 0xBD, # DIAERESIS
|
||||
0x00A9: 0xB4, # COPYRIGHT SIGN
|
||||
0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0x5F, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00AE: 0xAF, # REGISTERED SIGN
|
||||
0x00AF: 0xBC, # MACRON
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0x8F, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xBE, # ACUTE ACCENT
|
||||
0x00B5: 0xA0, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB3, # MIDDLE DOT
|
||||
0x00B8: 0x9D, # CEDILLA
|
||||
0x00B9: 0xDA, # SUPERSCRIPT ONE
|
||||
0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xB8, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xAB, # INVERTED QUESTION MARK
|
||||
0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE
|
||||
0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xBF, # MULTIPLICATION SIGN
|
||||
0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0x9C, # LATIN SMALL LIGATURE AE
|
||||
0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xE1, # DIVISION SIGN
|
||||
0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x20AC: 0x9F, # EURO SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,258 +303,5 @@ decoding_table = (
|
|||
u'\u02d9' # 0xFF -> DOT ABOVE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x0104: 0xA5, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xB9, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON
|
||||
0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x013D: 0xBC, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x013E: 0xBE, # LATIN SMALL LETTER L WITH CARON
|
||||
0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON
|
||||
0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON
|
||||
0x015A: 0x8C, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0x9C, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x0164: 0x8D, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x0165: 0x9D, # LATIN SMALL LETTER T WITH CARON
|
||||
0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0x9F, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x02C7: 0xA1, # CARON
|
||||
0x02D8: 0xA2, # BREVE
|
||||
0x02D9: 0xFF, # DOT ABOVE
|
||||
0x02DB: 0xB2, # OGONEK
|
||||
0x02DD: 0xBD, # DOUBLE ACUTE ACCENT
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,262 +303,5 @@ decoding_table = (
|
|||
u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x0401: 0xA8, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0402: 0x80, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0403: 0x81, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0404: 0xAA, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0405: 0xBD, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x0406: 0xB2, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0xAF, # CYRILLIC CAPITAL LETTER YI
|
||||
0x0408: 0xA3, # CYRILLIC CAPITAL LETTER JE
|
||||
0x0409: 0x8A, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x040A: 0x8C, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x040B: 0x8E, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x040C: 0x8D, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x040E: 0xA1, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x040F: 0x8F, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x0410: 0xC0, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0xC1, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0xC2, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0xC3, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0xC4, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0xC5, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0xC6, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0xC7, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0xC8, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0xC9, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041A: 0xCA, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041B: 0xCB, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041C: 0xCC, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041D: 0xCD, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041E: 0xCE, # CYRILLIC CAPITAL LETTER O
|
||||
0x041F: 0xCF, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0xD0, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0xD1, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0xD2, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0xD3, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0xD4, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0xD5, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0xD6, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0xD7, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0xD8, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0xD9, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042A: 0xDA, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042B: 0xDB, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042C: 0xDC, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042D: 0xDD, # CYRILLIC CAPITAL LETTER E
|
||||
0x042E: 0xDE, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042F: 0xDF, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xE0, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xE1, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xE2, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xE4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xE5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xE8, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043A: 0xEA, # CYRILLIC SMALL LETTER KA
|
||||
0x043B: 0xEB, # CYRILLIC SMALL LETTER EL
|
||||
0x043C: 0xEC, # CYRILLIC SMALL LETTER EM
|
||||
0x043D: 0xED, # CYRILLIC SMALL LETTER EN
|
||||
0x043E: 0xEE, # CYRILLIC SMALL LETTER O
|
||||
0x043F: 0xEF, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xF0, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xF1, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xF2, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xF3, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xF4, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xF5, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU
|
||||
0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044D: 0xFD, # CYRILLIC SMALL LETTER E
|
||||
0x044E: 0xFE, # CYRILLIC SMALL LETTER YU
|
||||
0x044F: 0xFF, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xB8, # CYRILLIC SMALL LETTER IO
|
||||
0x0452: 0x90, # CYRILLIC SMALL LETTER DJE
|
||||
0x0453: 0x83, # CYRILLIC SMALL LETTER GJE
|
||||
0x0454: 0xBA, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0455: 0xBE, # CYRILLIC SMALL LETTER DZE
|
||||
0x0456: 0xB3, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0xBF, # CYRILLIC SMALL LETTER YI
|
||||
0x0458: 0xBC, # CYRILLIC SMALL LETTER JE
|
||||
0x0459: 0x9A, # CYRILLIC SMALL LETTER LJE
|
||||
0x045A: 0x9C, # CYRILLIC SMALL LETTER NJE
|
||||
0x045B: 0x9E, # CYRILLIC SMALL LETTER TSHE
|
||||
0x045C: 0x9D, # CYRILLIC SMALL LETTER KJE
|
||||
0x045E: 0xA2, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x045F: 0x9F, # CYRILLIC SMALL LETTER DZHE
|
||||
0x0490: 0xA5, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0x0491: 0xB4, # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x88, # EURO SIGN
|
||||
0x2116: 0xB9, # NUMERO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,258 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0xF0, # LATIN SMALL LETTER ETH
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0xFE, # LATIN SMALL LETTER THORN
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0x9C, # LATIN SMALL LIGATURE OE
|
||||
0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x017D: 0x8E, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0x9E, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02DC: 0x98, # SMALL TILDE
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,246 +303,5 @@ decoding_table = (
|
|||
u'\ufffe' # 0xFF -> UNDEFINED
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0384: 0xB4, # GREEK TONOS
|
||||
0x0385: 0xA1, # GREEK DIALYTIKA TONOS
|
||||
0x0386: 0xA2, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0392: 0xC2, # GREEK CAPITAL LETTER BETA
|
||||
0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA
|
||||
0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA
|
||||
0x0397: 0xC7, # GREEK CAPITAL LETTER ETA
|
||||
0x0398: 0xC8, # GREEK CAPITAL LETTER THETA
|
||||
0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA
|
||||
0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA
|
||||
0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA
|
||||
0x039C: 0xCC, # GREEK CAPITAL LETTER MU
|
||||
0x039D: 0xCD, # GREEK CAPITAL LETTER NU
|
||||
0x039E: 0xCE, # GREEK CAPITAL LETTER XI
|
||||
0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON
|
||||
0x03A0: 0xD0, # GREEK CAPITAL LETTER PI
|
||||
0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO
|
||||
0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU
|
||||
0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI
|
||||
0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI
|
||||
0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI
|
||||
0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA
|
||||
0x03B2: 0xE2, # GREEK SMALL LETTER BETA
|
||||
0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA
|
||||
0x03B4: 0xE4, # GREEK SMALL LETTER DELTA
|
||||
0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON
|
||||
0x03B6: 0xE6, # GREEK SMALL LETTER ZETA
|
||||
0x03B7: 0xE7, # GREEK SMALL LETTER ETA
|
||||
0x03B8: 0xE8, # GREEK SMALL LETTER THETA
|
||||
0x03B9: 0xE9, # GREEK SMALL LETTER IOTA
|
||||
0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA
|
||||
0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA
|
||||
0x03BC: 0xEC, # GREEK SMALL LETTER MU
|
||||
0x03BD: 0xED, # GREEK SMALL LETTER NU
|
||||
0x03BE: 0xEE, # GREEK SMALL LETTER XI
|
||||
0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON
|
||||
0x03C0: 0xF0, # GREEK SMALL LETTER PI
|
||||
0x03C1: 0xF1, # GREEK SMALL LETTER RHO
|
||||
0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA
|
||||
0x03C4: 0xF4, # GREEK SMALL LETTER TAU
|
||||
0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON
|
||||
0x03C6: 0xF6, # GREEK SMALL LETTER PHI
|
||||
0x03C7: 0xF7, # GREEK SMALL LETTER CHI
|
||||
0x03C8: 0xF8, # GREEK SMALL LETTER PSI
|
||||
0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA
|
||||
0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2015: 0xAF, # HORIZONTAL BAR
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,256 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0x9C, # LATIN SMALL LIGATURE OE
|
||||
0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x0160: 0x8A, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0x9A, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02DC: 0x98, # SMALL TILDE
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,240 +303,5 @@ decoding_table = (
|
|||
u'\ufffe' # 0xFF -> UNDEFINED
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00D7: 0xAA, # MULTIPLICATION SIGN
|
||||
0x00F7: 0xBA, # DIVISION SIGN
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02DC: 0x98, # SMALL TILDE
|
||||
0x05B0: 0xC0, # HEBREW POINT SHEVA
|
||||
0x05B1: 0xC1, # HEBREW POINT HATAF SEGOL
|
||||
0x05B2: 0xC2, # HEBREW POINT HATAF PATAH
|
||||
0x05B3: 0xC3, # HEBREW POINT HATAF QAMATS
|
||||
0x05B4: 0xC4, # HEBREW POINT HIRIQ
|
||||
0x05B5: 0xC5, # HEBREW POINT TSERE
|
||||
0x05B6: 0xC6, # HEBREW POINT SEGOL
|
||||
0x05B7: 0xC7, # HEBREW POINT PATAH
|
||||
0x05B8: 0xC8, # HEBREW POINT QAMATS
|
||||
0x05B9: 0xC9, # HEBREW POINT HOLAM
|
||||
0x05BB: 0xCB, # HEBREW POINT QUBUTS
|
||||
0x05BC: 0xCC, # HEBREW POINT DAGESH OR MAPIQ
|
||||
0x05BD: 0xCD, # HEBREW POINT METEG
|
||||
0x05BE: 0xCE, # HEBREW PUNCTUATION MAQAF
|
||||
0x05BF: 0xCF, # HEBREW POINT RAFE
|
||||
0x05C0: 0xD0, # HEBREW PUNCTUATION PASEQ
|
||||
0x05C1: 0xD1, # HEBREW POINT SHIN DOT
|
||||
0x05C2: 0xD2, # HEBREW POINT SIN DOT
|
||||
0x05C3: 0xD3, # HEBREW PUNCTUATION SOF PASUQ
|
||||
0x05D0: 0xE0, # HEBREW LETTER ALEF
|
||||
0x05D1: 0xE1, # HEBREW LETTER BET
|
||||
0x05D2: 0xE2, # HEBREW LETTER GIMEL
|
||||
0x05D3: 0xE3, # HEBREW LETTER DALET
|
||||
0x05D4: 0xE4, # HEBREW LETTER HE
|
||||
0x05D5: 0xE5, # HEBREW LETTER VAV
|
||||
0x05D6: 0xE6, # HEBREW LETTER ZAYIN
|
||||
0x05D7: 0xE7, # HEBREW LETTER HET
|
||||
0x05D8: 0xE8, # HEBREW LETTER TET
|
||||
0x05D9: 0xE9, # HEBREW LETTER YOD
|
||||
0x05DA: 0xEA, # HEBREW LETTER FINAL KAF
|
||||
0x05DB: 0xEB, # HEBREW LETTER KAF
|
||||
0x05DC: 0xEC, # HEBREW LETTER LAMED
|
||||
0x05DD: 0xED, # HEBREW LETTER FINAL MEM
|
||||
0x05DE: 0xEE, # HEBREW LETTER MEM
|
||||
0x05DF: 0xEF, # HEBREW LETTER FINAL NUN
|
||||
0x05E0: 0xF0, # HEBREW LETTER NUN
|
||||
0x05E1: 0xF1, # HEBREW LETTER SAMEKH
|
||||
0x05E2: 0xF2, # HEBREW LETTER AYIN
|
||||
0x05E3: 0xF3, # HEBREW LETTER FINAL PE
|
||||
0x05E4: 0xF4, # HEBREW LETTER PE
|
||||
0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI
|
||||
0x05E6: 0xF6, # HEBREW LETTER TSADI
|
||||
0x05E7: 0xF7, # HEBREW LETTER QOF
|
||||
0x05E8: 0xF8, # HEBREW LETTER RESH
|
||||
0x05E9: 0xF9, # HEBREW LETTER SHIN
|
||||
0x05EA: 0xFA, # HEBREW LETTER TAV
|
||||
0x05F0: 0xD4, # HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||||
0x05F1: 0xD5, # HEBREW LIGATURE YIDDISH VAV YOD
|
||||
0x05F2: 0xD6, # HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
0x05F3: 0xD7, # HEBREW PUNCTUATION GERESH
|
||||
0x05F4: 0xD8, # HEBREW PUNCTUATION GERSHAYIM
|
||||
0x200E: 0xFD, # LEFT-TO-RIGHT MARK
|
||||
0x200F: 0xFE, # RIGHT-TO-LEFT MARK
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AA: 0xA4, # NEW SHEQEL SIGN
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0x9C, # LATIN SMALL LIGATURE OE
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x060C: 0xA1, # ARABIC COMMA
|
||||
0x061B: 0xBA, # ARABIC SEMICOLON
|
||||
0x061F: 0xBF, # ARABIC QUESTION MARK
|
||||
0x0621: 0xC1, # ARABIC LETTER HAMZA
|
||||
0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x0627: 0xC7, # ARABIC LETTER ALEF
|
||||
0x0628: 0xC8, # ARABIC LETTER BEH
|
||||
0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA
|
||||
0x062A: 0xCA, # ARABIC LETTER TEH
|
||||
0x062B: 0xCB, # ARABIC LETTER THEH
|
||||
0x062C: 0xCC, # ARABIC LETTER JEEM
|
||||
0x062D: 0xCD, # ARABIC LETTER HAH
|
||||
0x062E: 0xCE, # ARABIC LETTER KHAH
|
||||
0x062F: 0xCF, # ARABIC LETTER DAL
|
||||
0x0630: 0xD0, # ARABIC LETTER THAL
|
||||
0x0631: 0xD1, # ARABIC LETTER REH
|
||||
0x0632: 0xD2, # ARABIC LETTER ZAIN
|
||||
0x0633: 0xD3, # ARABIC LETTER SEEN
|
||||
0x0634: 0xD4, # ARABIC LETTER SHEEN
|
||||
0x0635: 0xD5, # ARABIC LETTER SAD
|
||||
0x0636: 0xD6, # ARABIC LETTER DAD
|
||||
0x0637: 0xD8, # ARABIC LETTER TAH
|
||||
0x0638: 0xD9, # ARABIC LETTER ZAH
|
||||
0x0639: 0xDA, # ARABIC LETTER AIN
|
||||
0x063A: 0xDB, # ARABIC LETTER GHAIN
|
||||
0x0640: 0xDC, # ARABIC TATWEEL
|
||||
0x0641: 0xDD, # ARABIC LETTER FEH
|
||||
0x0642: 0xDE, # ARABIC LETTER QAF
|
||||
0x0643: 0xDF, # ARABIC LETTER KAF
|
||||
0x0644: 0xE1, # ARABIC LETTER LAM
|
||||
0x0645: 0xE3, # ARABIC LETTER MEEM
|
||||
0x0646: 0xE4, # ARABIC LETTER NOON
|
||||
0x0647: 0xE5, # ARABIC LETTER HEH
|
||||
0x0648: 0xE6, # ARABIC LETTER WAW
|
||||
0x0649: 0xEC, # ARABIC LETTER ALEF MAKSURA
|
||||
0x064A: 0xED, # ARABIC LETTER YEH
|
||||
0x064B: 0xF0, # ARABIC FATHATAN
|
||||
0x064C: 0xF1, # ARABIC DAMMATAN
|
||||
0x064D: 0xF2, # ARABIC KASRATAN
|
||||
0x064E: 0xF3, # ARABIC FATHA
|
||||
0x064F: 0xF5, # ARABIC DAMMA
|
||||
0x0650: 0xF6, # ARABIC KASRA
|
||||
0x0651: 0xF8, # ARABIC SHADDA
|
||||
0x0652: 0xFA, # ARABIC SUKUN
|
||||
0x0679: 0x8A, # ARABIC LETTER TTEH
|
||||
0x067E: 0x81, # ARABIC LETTER PEH
|
||||
0x0686: 0x8D, # ARABIC LETTER TCHEH
|
||||
0x0688: 0x8F, # ARABIC LETTER DDAL
|
||||
0x0691: 0x9A, # ARABIC LETTER RREH
|
||||
0x0698: 0x8E, # ARABIC LETTER JEH
|
||||
0x06A9: 0x98, # ARABIC LETTER KEHEH
|
||||
0x06AF: 0x90, # ARABIC LETTER GAF
|
||||
0x06BA: 0x9F, # ARABIC LETTER NOON GHUNNA
|
||||
0x06BE: 0xAA, # ARABIC LETTER HEH DOACHASHMEE
|
||||
0x06C1: 0xC0, # ARABIC LETTER HEH GOAL
|
||||
0x06D2: 0xFF, # ARABIC LETTER YEH BARREE
|
||||
0x200C: 0x9D, # ZERO WIDTH NON-JOINER
|
||||
0x200D: 0x9E, # ZERO WIDTH JOINER
|
||||
0x200E: 0xFD, # LEFT-TO-RIGHT MARK
|
||||
0x200F: 0xFE, # RIGHT-TO-LEFT MARK
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,251 +303,5 @@ decoding_table = (
|
|||
u'\u02d9' # 0xFF -> DOT ABOVE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0x8D, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0x9D, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0x8F, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xAF, # LATIN CAPITAL LETTER AE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xBF, # LATIN SMALL LETTER AE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON
|
||||
0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x02C7: 0x8E, # CARON
|
||||
0x02D9: 0xFF, # DOT ABOVE
|
||||
0x02DB: 0x9E, # OGONEK
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,254 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0152: 0x8C, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0x9C, # LATIN SMALL LIGATURE OE
|
||||
0x0178: 0x9F, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0x83, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x01A0: 0xD5, # LATIN CAPITAL LETTER O WITH HORN
|
||||
0x01A1: 0xF5, # LATIN SMALL LETTER O WITH HORN
|
||||
0x01AF: 0xDD, # LATIN CAPITAL LETTER U WITH HORN
|
||||
0x01B0: 0xFD, # LATIN SMALL LETTER U WITH HORN
|
||||
0x02C6: 0x88, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02DC: 0x98, # SMALL TILDE
|
||||
0x0300: 0xCC, # COMBINING GRAVE ACCENT
|
||||
0x0301: 0xEC, # COMBINING ACUTE ACCENT
|
||||
0x0303: 0xDE, # COMBINING TILDE
|
||||
0x0309: 0xD2, # COMBINING HOOK ABOVE
|
||||
0x0323: 0xF2, # COMBINING DOT BELOW
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0x82, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0x84, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0x86, # DAGGER
|
||||
0x2021: 0x87, # DOUBLE DAGGER
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x89, # PER MILLE SIGN
|
||||
0x2039: 0x8B, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0x9B, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x20AB: 0xFE, # DONG SIGN
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
0x2122: 0x99, # TRADE MARK SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,225 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> EIGHT ONES
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x3F, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x5A, # EXCLAMATION MARK
|
||||
0x0022: 0x7F, # QUOTATION MARK
|
||||
0x0023: 0x7B, # NUMBER SIGN
|
||||
0x0024: 0x5B, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0x7C, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0xBA, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xE0, # REVERSE SOLIDUS
|
||||
0x005D: 0xBB, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0xB0, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0xC0, # LEFT CURLY BRACKET
|
||||
0x007C: 0x4F, # VERTICAL LINE
|
||||
0x007D: 0xD0, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xA1, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # DIGIT SELECT
|
||||
0x0081: 0x21, # START OF SIGNIFICANCE
|
||||
0x0082: 0x22, # FIELD SEPARATOR
|
||||
0x0083: 0x23, # WORD UNDERSCORE
|
||||
0x0084: 0x24, # BYPASS OR INHIBIT PRESENTATION
|
||||
0x0085: 0x15, # NEW LINE
|
||||
0x0086: 0x06, # REQUIRED NEW LINE
|
||||
0x0087: 0x17, # PROGRAM OPERATOR COMMUNICATION
|
||||
0x0088: 0x28, # SET ATTRIBUTE
|
||||
0x0089: 0x29, # START FIELD EXTENDED
|
||||
0x008A: 0x2A, # SET MODE OR SWITCH
|
||||
0x008B: 0x2B, # CONTROL SEQUENCE PREFIX
|
||||
0x008C: 0x2C, # MODIFY FIELD ATTRIBUTE
|
||||
0x008D: 0x09, # SUPERSCRIPT
|
||||
0x008E: 0x0A, # REPEAT
|
||||
0x008F: 0x1B, # CUSTOMER USE ONE
|
||||
0x0090: 0x30, # <reserved>
|
||||
0x0091: 0x31, # <reserved>
|
||||
0x0092: 0x1A, # UNIT BACK SPACE
|
||||
0x0093: 0x33, # INDEX RETURN
|
||||
0x0094: 0x34, # PRESENTATION POSITION
|
||||
0x0095: 0x35, # TRANSPARENT
|
||||
0x0096: 0x36, # NUMERIC BACKSPACE
|
||||
0x0097: 0x08, # GRAPHIC ESCAPE
|
||||
0x0098: 0x38, # SUBSCRIPT
|
||||
0x0099: 0x39, # INDENT TABULATION
|
||||
0x009A: 0x3A, # REVERSE FORM FEED
|
||||
0x009B: 0x3B, # CUSTOMER USE THREE
|
||||
0x009C: 0x04, # SELECT
|
||||
0x009D: 0x14, # RESTORE/ENABLE PRESENTATION
|
||||
0x009E: 0x3E, # <reserved>
|
||||
0x009F: 0xFF, # EIGHT ONES
|
||||
0x00A0: 0x74, # NO-BREAK SPACE
|
||||
0x00A2: 0x4A, # CENT SIGN
|
||||
0x00A3: 0xB1, # POUND SIGN
|
||||
0x00A4: 0x9F, # CURRENCY SIGN
|
||||
0x00A5: 0xB2, # YEN SIGN
|
||||
0x00A6: 0x6A, # BROKEN BAR
|
||||
0x00A7: 0xB5, # SECTION SIGN
|
||||
0x00A8: 0xBD, # DIAERESIS
|
||||
0x00A9: 0xB4, # COPYRIGHT SIGN
|
||||
0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0x5F, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00AE: 0xAF, # REGISTERED SIGN
|
||||
0x00AF: 0xBC, # MACRON
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0x8F, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xBE, # ACUTE ACCENT
|
||||
0x00B5: 0xA0, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB3, # MIDDLE DOT
|
||||
0x00B8: 0x9D, # CEDILLA
|
||||
0x00B9: 0xDA, # SUPERSCRIPT ONE
|
||||
0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xB8, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00D7: 0xBF, # MULTIPLICATION SIGN
|
||||
0x00F7: 0xE1, # DIVISION SIGN
|
||||
0x05D0: 0x41, # HEBREW LETTER ALEF
|
||||
0x05D1: 0x42, # HEBREW LETTER BET
|
||||
0x05D2: 0x43, # HEBREW LETTER GIMEL
|
||||
0x05D3: 0x44, # HEBREW LETTER DALET
|
||||
0x05D4: 0x45, # HEBREW LETTER HE
|
||||
0x05D5: 0x46, # HEBREW LETTER VAV
|
||||
0x05D6: 0x47, # HEBREW LETTER ZAYIN
|
||||
0x05D7: 0x48, # HEBREW LETTER HET
|
||||
0x05D8: 0x49, # HEBREW LETTER TET
|
||||
0x05D9: 0x51, # HEBREW LETTER YOD
|
||||
0x05DA: 0x52, # HEBREW LETTER FINAL KAF
|
||||
0x05DB: 0x53, # HEBREW LETTER KAF
|
||||
0x05DC: 0x54, # HEBREW LETTER LAMED
|
||||
0x05DD: 0x55, # HEBREW LETTER FINAL MEM
|
||||
0x05DE: 0x56, # HEBREW LETTER MEM
|
||||
0x05DF: 0x57, # HEBREW LETTER FINAL NUN
|
||||
0x05E0: 0x58, # HEBREW LETTER NUN
|
||||
0x05E1: 0x59, # HEBREW LETTER SAMEKH
|
||||
0x05E2: 0x62, # HEBREW LETTER AYIN
|
||||
0x05E3: 0x63, # HEBREW LETTER FINAL PE
|
||||
0x05E4: 0x64, # HEBREW LETTER PE
|
||||
0x05E5: 0x65, # HEBREW LETTER FINAL TSADI
|
||||
0x05E6: 0x66, # HEBREW LETTER TSADI
|
||||
0x05E7: 0x67, # HEBREW LETTER QOF
|
||||
0x05E8: 0x68, # HEBREW LETTER RESH
|
||||
0x05E9: 0x69, # HEBREW LETTER SHIN
|
||||
0x05EA: 0x71, # HEBREW LETTER TAV
|
||||
0x2017: 0x78, # DOUBLE LOW LINE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x3F, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x4F, # EXCLAMATION MARK
|
||||
0x0022: 0x7F, # QUOTATION MARK
|
||||
0x0023: 0x7B, # NUMBER SIGN
|
||||
0x0024: 0x5B, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0x7C, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x4A, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xE0, # REVERSE SOLIDUS
|
||||
0x005D: 0x5A, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5F, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0xC0, # LEFT CURLY BRACKET
|
||||
0x007C: 0xBB, # VERTICAL LINE
|
||||
0x007D: 0xD0, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xA1, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008A: 0x2A, # CONTROL
|
||||
0x008B: 0x2B, # CONTROL
|
||||
0x008C: 0x2C, # CONTROL
|
||||
0x008D: 0x09, # CONTROL
|
||||
0x008E: 0x0A, # CONTROL
|
||||
0x008F: 0x1B, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1A, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009A: 0x3A, # CONTROL
|
||||
0x009B: 0x3B, # CONTROL
|
||||
0x009C: 0x04, # CONTROL
|
||||
0x009D: 0x14, # CONTROL
|
||||
0x009E: 0x3E, # CONTROL
|
||||
0x009F: 0xFF, # CONTROL
|
||||
0x00A0: 0x41, # NO-BREAK SPACE
|
||||
0x00A1: 0xAA, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xB0, # CENT SIGN
|
||||
0x00A3: 0xB1, # POUND SIGN
|
||||
0x00A4: 0x9F, # CURRENCY SIGN
|
||||
0x00A5: 0xB2, # YEN SIGN
|
||||
0x00A6: 0x6A, # BROKEN BAR
|
||||
0x00A7: 0xB5, # SECTION SIGN
|
||||
0x00A8: 0xBD, # DIAERESIS
|
||||
0x00A9: 0xB4, # COPYRIGHT SIGN
|
||||
0x00AA: 0x9A, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0x8A, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xBA, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00AE: 0xAF, # REGISTERED SIGN
|
||||
0x00AF: 0xBC, # MACRON
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0x8F, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xBE, # ACUTE ACCENT
|
||||
0x00B5: 0xA0, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB3, # MIDDLE DOT
|
||||
0x00B8: 0x9D, # CEDILLA
|
||||
0x00B9: 0xDA, # SUPERSCRIPT ONE
|
||||
0x00BA: 0x9B, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0x8B, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xB7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xB8, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xB9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xAB, # INVERTED QUESTION MARK
|
||||
0x00C0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0x9E, # LATIN CAPITAL LIGATURE AE
|
||||
0x00C7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xAC, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00D1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xED, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEB, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xEF, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xEC, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xBF, # MULTIPLICATION SIGN
|
||||
0x00D8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xFD, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xFE, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xFB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xFC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xAD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xAE, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00DF: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00E0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x46, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0x9C, # LATIN SMALL LIGATURE AE
|
||||
0x00E7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x58, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x55, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0x8C, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x00F1: 0x49, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xCD, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xCE, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xCB, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xCF, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xCC, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xE1, # DIVISION SIGN
|
||||
0x00F8: 0x70, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xDD, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xDE, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xDB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xDC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0x8D, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0x8E, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x00FF: 0xDF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,222 +303,5 @@ decoding_table = (
|
|||
u'\xa0' # 0xFF -> NO-BREAK SPACE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xFF, # NO-BREAK SPACE
|
||||
0x00A2: 0xBD, # CENT SIGN
|
||||
0x00A3: 0x9C, # POUND SIGN
|
||||
0x00A4: 0xCF, # CURRENCY SIGN
|
||||
0x00A5: 0xBE, # YEN SIGN
|
||||
0x00A6: 0xDD, # BROKEN BAR
|
||||
0x00A7: 0xF5, # SECTION SIGN
|
||||
0x00A8: 0xF9, # DIAERESIS
|
||||
0x00A9: 0xB8, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAA, # NOT SIGN
|
||||
0x00AD: 0xF0, # SOFT HYPHEN
|
||||
0x00AE: 0xA9, # REGISTERED SIGN
|
||||
0x00AF: 0xEE, # MACRON
|
||||
0x00B0: 0xF8, # DEGREE SIGN
|
||||
0x00B1: 0xF1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xFD, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFC, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xEF, # ACUTE ACCENT
|
||||
0x00B5: 0xE6, # MICRO SIGN
|
||||
0x00B6: 0xF4, # PILCROW SIGN
|
||||
0x00B7: 0xFA, # MIDDLE DOT
|
||||
0x00B8: 0xF7, # CEDILLA
|
||||
0x00B9: 0xFB, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xAF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xAC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xAB, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xF3, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00D7: 0x9E, # MULTIPLICATION SIGN
|
||||
0x00F7: 0xF6, # DIVISION SIGN
|
||||
0x05D0: 0x80, # HEBREW LETTER ALEF
|
||||
0x05D1: 0x81, # HEBREW LETTER BET
|
||||
0x05D2: 0x82, # HEBREW LETTER GIMEL
|
||||
0x05D3: 0x83, # HEBREW LETTER DALET
|
||||
0x05D4: 0x84, # HEBREW LETTER HE
|
||||
0x05D5: 0x85, # HEBREW LETTER VAV
|
||||
0x05D6: 0x86, # HEBREW LETTER ZAYIN
|
||||
0x05D7: 0x87, # HEBREW LETTER HET
|
||||
0x05D8: 0x88, # HEBREW LETTER TET
|
||||
0x05D9: 0x89, # HEBREW LETTER YOD
|
||||
0x05DA: 0x8A, # HEBREW LETTER FINAL KAF
|
||||
0x05DB: 0x8B, # HEBREW LETTER KAF
|
||||
0x05DC: 0x8C, # HEBREW LETTER LAMED
|
||||
0x05DD: 0x8D, # HEBREW LETTER FINAL MEM
|
||||
0x05DE: 0x8E, # HEBREW LETTER MEM
|
||||
0x05DF: 0x8F, # HEBREW LETTER FINAL NUN
|
||||
0x05E0: 0x90, # HEBREW LETTER NUN
|
||||
0x05E1: 0x91, # HEBREW LETTER SAMEKH
|
||||
0x05E2: 0x92, # HEBREW LETTER AYIN
|
||||
0x05E3: 0x93, # HEBREW LETTER FINAL PE
|
||||
0x05E4: 0x94, # HEBREW LETTER PE
|
||||
0x05E5: 0x95, # HEBREW LETTER FINAL TSADI
|
||||
0x05E6: 0x96, # HEBREW LETTER TSADI
|
||||
0x05E7: 0x97, # HEBREW LETTER QOF
|
||||
0x05E8: 0x98, # HEBREW LETTER RESH
|
||||
0x05E9: 0x99, # HEBREW LETTER SHIN
|
||||
0x05EA: 0x9A, # HEBREW LETTER TAV
|
||||
0x2017: 0xF2, # DOUBLE LOW LINE
|
||||
0x2500: 0xC4, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x2502: 0xB3, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x250C: 0xDA, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x2510: 0xBF, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x2514: 0xC0, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x2518: 0xD9, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x251C: 0xC3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x2524: 0xB4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x252C: 0xC2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x2534: 0xC1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x253C: 0xC5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x2550: 0xCD, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x2551: 0xBA, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x2554: 0xC9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x2557: 0xBB, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x255A: 0xC8, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x255D: 0xBC, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x2560: 0xCC, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x2563: 0xB9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x2566: 0xCB, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x2569: 0xCA, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x256C: 0xCE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x2580: 0xDF, # UPPER HALF BLOCK
|
||||
0x2584: 0xDC, # LOWER HALF BLOCK
|
||||
0x2588: 0xDB, # FULL BLOCK
|
||||
0x2591: 0xB0, # LIGHT SHADE
|
||||
0x2592: 0xB1, # MEDIUM SHADE
|
||||
0x2593: 0xB2, # DARK SHADE
|
||||
0x25A0: 0xFE, # BLACK SQUARE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,232 +303,5 @@ decoding_table = (
|
|||
u'\ufffe' # 0xFF -> UNDEFINED
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x0E01: 0xA1, # THAI CHARACTER KO KAI
|
||||
0x0E02: 0xA2, # THAI CHARACTER KHO KHAI
|
||||
0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT
|
||||
0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI
|
||||
0x0E05: 0xA5, # THAI CHARACTER KHO KHON
|
||||
0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG
|
||||
0x0E07: 0xA7, # THAI CHARACTER NGO NGU
|
||||
0x0E08: 0xA8, # THAI CHARACTER CHO CHAN
|
||||
0x0E09: 0xA9, # THAI CHARACTER CHO CHING
|
||||
0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG
|
||||
0x0E0B: 0xAB, # THAI CHARACTER SO SO
|
||||
0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE
|
||||
0x0E0D: 0xAD, # THAI CHARACTER YO YING
|
||||
0x0E0E: 0xAE, # THAI CHARACTER DO CHADA
|
||||
0x0E0F: 0xAF, # THAI CHARACTER TO PATAK
|
||||
0x0E10: 0xB0, # THAI CHARACTER THO THAN
|
||||
0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO
|
||||
0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO
|
||||
0x0E13: 0xB3, # THAI CHARACTER NO NEN
|
||||
0x0E14: 0xB4, # THAI CHARACTER DO DEK
|
||||
0x0E15: 0xB5, # THAI CHARACTER TO TAO
|
||||
0x0E16: 0xB6, # THAI CHARACTER THO THUNG
|
||||
0x0E17: 0xB7, # THAI CHARACTER THO THAHAN
|
||||
0x0E18: 0xB8, # THAI CHARACTER THO THONG
|
||||
0x0E19: 0xB9, # THAI CHARACTER NO NU
|
||||
0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI
|
||||
0x0E1B: 0xBB, # THAI CHARACTER PO PLA
|
||||
0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG
|
||||
0x0E1D: 0xBD, # THAI CHARACTER FO FA
|
||||
0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN
|
||||
0x0E1F: 0xBF, # THAI CHARACTER FO FAN
|
||||
0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO
|
||||
0x0E21: 0xC1, # THAI CHARACTER MO MA
|
||||
0x0E22: 0xC2, # THAI CHARACTER YO YAK
|
||||
0x0E23: 0xC3, # THAI CHARACTER RO RUA
|
||||
0x0E24: 0xC4, # THAI CHARACTER RU
|
||||
0x0E25: 0xC5, # THAI CHARACTER LO LING
|
||||
0x0E26: 0xC6, # THAI CHARACTER LU
|
||||
0x0E27: 0xC7, # THAI CHARACTER WO WAEN
|
||||
0x0E28: 0xC8, # THAI CHARACTER SO SALA
|
||||
0x0E29: 0xC9, # THAI CHARACTER SO RUSI
|
||||
0x0E2A: 0xCA, # THAI CHARACTER SO SUA
|
||||
0x0E2B: 0xCB, # THAI CHARACTER HO HIP
|
||||
0x0E2C: 0xCC, # THAI CHARACTER LO CHULA
|
||||
0x0E2D: 0xCD, # THAI CHARACTER O ANG
|
||||
0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK
|
||||
0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI
|
||||
0x0E30: 0xD0, # THAI CHARACTER SARA A
|
||||
0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT
|
||||
0x0E32: 0xD2, # THAI CHARACTER SARA AA
|
||||
0x0E33: 0xD3, # THAI CHARACTER SARA AM
|
||||
0x0E34: 0xD4, # THAI CHARACTER SARA I
|
||||
0x0E35: 0xD5, # THAI CHARACTER SARA II
|
||||
0x0E36: 0xD6, # THAI CHARACTER SARA UE
|
||||
0x0E37: 0xD7, # THAI CHARACTER SARA UEE
|
||||
0x0E38: 0xD8, # THAI CHARACTER SARA U
|
||||
0x0E39: 0xD9, # THAI CHARACTER SARA UU
|
||||
0x0E3A: 0xDA, # THAI CHARACTER PHINTHU
|
||||
0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT
|
||||
0x0E40: 0xE0, # THAI CHARACTER SARA E
|
||||
0x0E41: 0xE1, # THAI CHARACTER SARA AE
|
||||
0x0E42: 0xE2, # THAI CHARACTER SARA O
|
||||
0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN
|
||||
0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI
|
||||
0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO
|
||||
0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK
|
||||
0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU
|
||||
0x0E48: 0xE8, # THAI CHARACTER MAI EK
|
||||
0x0E49: 0xE9, # THAI CHARACTER MAI THO
|
||||
0x0E4A: 0xEA, # THAI CHARACTER MAI TRI
|
||||
0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA
|
||||
0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT
|
||||
0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT
|
||||
0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN
|
||||
0x0E4F: 0xEF, # THAI CHARACTER FONGMAN
|
||||
0x0E50: 0xF0, # THAI DIGIT ZERO
|
||||
0x0E51: 0xF1, # THAI DIGIT ONE
|
||||
0x0E52: 0xF2, # THAI DIGIT TWO
|
||||
0x0E53: 0xF3, # THAI DIGIT THREE
|
||||
0x0E54: 0xF4, # THAI DIGIT FOUR
|
||||
0x0E55: 0xF5, # THAI DIGIT FIVE
|
||||
0x0E56: 0xF6, # THAI DIGIT SIX
|
||||
0x0E57: 0xF7, # THAI DIGIT SEVEN
|
||||
0x0E58: 0xF8, # THAI DIGIT EIGHT
|
||||
0x0E59: 0xF9, # THAI DIGIT NINE
|
||||
0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU
|
||||
0x0E5B: 0xFB, # THAI CHARACTER KHOMUT
|
||||
0x2013: 0x96, # EN DASH
|
||||
0x2014: 0x97, # EM DASH
|
||||
0x2018: 0x91, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0x92, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201C: 0x93, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0x94, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x2022: 0x95, # BULLET
|
||||
0x2026: 0x85, # HORIZONTAL ELLIPSIS
|
||||
0x20AC: 0x80, # EURO SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,257 +303,5 @@ decoding_table = (
|
|||
u'\x9f' # 0xFF -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2D, # ENQUIRY
|
||||
0x0006: 0x2E, # ACKNOWLEDGE
|
||||
0x0007: 0x2F, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000A: 0x25, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3C, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3D, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: None, # SUBSTITUTE
|
||||
0x001B: 0x27, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x4F, # EXCLAMATION MARK
|
||||
0x0022: 0x7F, # QUOTATION MARK
|
||||
0x0023: 0x7B, # NUMBER SIGN
|
||||
0x0024: 0x5B, # DOLLAR SIGN
|
||||
0x0025: 0x6C, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7D, # APOSTROPHE
|
||||
0x0028: 0x4D, # LEFT PARENTHESIS
|
||||
0x0029: 0x5D, # RIGHT PARENTHESIS
|
||||
0x002A: 0x5C, # ASTERISK
|
||||
0x002B: 0x4E, # PLUS SIGN
|
||||
0x002C: 0x6B, # COMMA
|
||||
0x002D: 0x60, # HYPHEN-MINUS
|
||||
0x002E: 0x4B, # FULL STOP
|
||||
0x002F: 0x61, # SOLIDUS
|
||||
0x0030: 0xF0, # DIGIT ZERO
|
||||
0x0031: 0xF1, # DIGIT ONE
|
||||
0x0032: 0xF2, # DIGIT TWO
|
||||
0x0033: 0xF3, # DIGIT THREE
|
||||
0x0034: 0xF4, # DIGIT FOUR
|
||||
0x0035: 0xF5, # DIGIT FIVE
|
||||
0x0036: 0xF6, # DIGIT SIX
|
||||
0x0037: 0xF7, # DIGIT SEVEN
|
||||
0x0038: 0xF8, # DIGIT EIGHT
|
||||
0x0039: 0xF9, # DIGIT NINE
|
||||
0x003A: 0x7A, # COLON
|
||||
0x003B: 0x5E, # SEMICOLON
|
||||
0x003C: 0x4C, # LESS-THAN SIGN
|
||||
0x003D: 0x7E, # EQUALS SIGN
|
||||
0x003E: 0x6E, # GREATER-THAN SIGN
|
||||
0x003F: 0x6F, # QUESTION MARK
|
||||
0x0040: 0x7C, # COMMERCIAL AT
|
||||
0x0041: 0xC1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xC2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xC3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xC4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xC5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xC6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xC7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xC8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xC9, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0xD1, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0xD2, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0xD3, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0xD4, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0xD5, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0xD6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xD7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xD8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xD9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xE2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xE3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xE4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xE5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xE6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xE7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xE8, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0xE9, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x4A, # LEFT SQUARE BRACKET
|
||||
0x005C: 0xE0, # REVERSE SOLIDUS
|
||||
0x005D: 0x5A, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5F, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x6D, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006A: 0x91, # LATIN SMALL LETTER J
|
||||
0x006B: 0x92, # LATIN SMALL LETTER K
|
||||
0x006C: 0x93, # LATIN SMALL LETTER L
|
||||
0x006D: 0x94, # LATIN SMALL LETTER M
|
||||
0x006E: 0x95, # LATIN SMALL LETTER N
|
||||
0x006F: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xA2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xA3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xA4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xA5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xA6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xA7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xA8, # LATIN SMALL LETTER Y
|
||||
0x007A: 0xA9, # LATIN SMALL LETTER Z
|
||||
0x007B: 0xC0, # LEFT CURLY BRACKET
|
||||
0x007C: 0x6A, # VERTICAL LINE
|
||||
0x007D: 0xD0, # RIGHT CURLY BRACKET
|
||||
0x007E: 0xA1, # TILDE
|
||||
0x007F: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008A: 0x2A, # CONTROL
|
||||
0x008B: 0x2B, # CONTROL
|
||||
0x008C: 0x2C, # CONTROL
|
||||
0x008D: 0x09, # CONTROL
|
||||
0x008E: 0x0A, # CONTROL
|
||||
0x008F: 0x1B, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1A, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009A: 0x3A, # CONTROL
|
||||
0x009B: 0x3B, # CONTROL
|
||||
0x009C: 0x04, # CONTROL
|
||||
0x009D: 0x14, # CONTROL
|
||||
0x009E: 0x3E, # CONTROL
|
||||
0x009F: 0xFF, # CONTROL
|
||||
0x00A0: 0x74, # NO-BREAK SPACE
|
||||
0x00A3: 0xB0, # POUND SIGN
|
||||
0x00A6: 0xDF, # BROKEN BAR
|
||||
0x00A7: 0xEB, # SECTION SIGN
|
||||
0x00A8: 0x70, # DIAERESIS
|
||||
0x00A9: 0xFB, # COPYRIGHT SIGN
|
||||
0x00AB: 0xEE, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xEF, # NOT SIGN
|
||||
0x00AD: 0xCA, # SOFT HYPHEN
|
||||
0x00B0: 0x90, # DEGREE SIGN
|
||||
0x00B1: 0xDA, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xEA, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xFA, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xA0, # ACUTE ACCENT
|
||||
0x00BB: 0xFE, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BD: 0xDB, # VULGAR FRACTION ONE HALF
|
||||
0x0385: 0x80, # GREEK DIALYTIKA TONOS
|
||||
0x0386: 0x71, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0387: 0xDD, # GREEK ANO TELEIA
|
||||
0x0388: 0x72, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x0389: 0x73, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x038A: 0x75, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x038C: 0x76, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x038E: 0x77, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x038F: 0x78, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0390: 0xCC, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x0391: 0x41, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0392: 0x42, # GREEK CAPITAL LETTER BETA
|
||||
0x0393: 0x43, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0394: 0x44, # GREEK CAPITAL LETTER DELTA
|
||||
0x0395: 0x45, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0396: 0x46, # GREEK CAPITAL LETTER ZETA
|
||||
0x0397: 0x47, # GREEK CAPITAL LETTER ETA
|
||||
0x0398: 0x48, # GREEK CAPITAL LETTER THETA
|
||||
0x0399: 0x49, # GREEK CAPITAL LETTER IOTA
|
||||
0x039A: 0x51, # GREEK CAPITAL LETTER KAPPA
|
||||
0x039B: 0x52, # GREEK CAPITAL LETTER LAMDA
|
||||
0x039C: 0x53, # GREEK CAPITAL LETTER MU
|
||||
0x039D: 0x54, # GREEK CAPITAL LETTER NU
|
||||
0x039E: 0x55, # GREEK CAPITAL LETTER XI
|
||||
0x039F: 0x56, # GREEK CAPITAL LETTER OMICRON
|
||||
0x03A0: 0x57, # GREEK CAPITAL LETTER PI
|
||||
0x03A1: 0x58, # GREEK CAPITAL LETTER RHO
|
||||
0x03A3: 0x59, # GREEK CAPITAL LETTER SIGMA
|
||||
0x03A4: 0x62, # GREEK CAPITAL LETTER TAU
|
||||
0x03A5: 0x63, # GREEK CAPITAL LETTER UPSILON
|
||||
0x03A6: 0x64, # GREEK CAPITAL LETTER PHI
|
||||
0x03A7: 0x65, # GREEK CAPITAL LETTER CHI
|
||||
0x03A8: 0x66, # GREEK CAPITAL LETTER PSI
|
||||
0x03A9: 0x67, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03AA: 0x68, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x03AB: 0x69, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03AC: 0xB1, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x03AD: 0xB2, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x03AE: 0xB3, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x03AF: 0xB5, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x03B0: 0xCD, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x03B1: 0x8A, # GREEK SMALL LETTER ALPHA
|
||||
0x03B2: 0x8B, # GREEK SMALL LETTER BETA
|
||||
0x03B3: 0x8C, # GREEK SMALL LETTER GAMMA
|
||||
0x03B4: 0x8D, # GREEK SMALL LETTER DELTA
|
||||
0x03B5: 0x8E, # GREEK SMALL LETTER EPSILON
|
||||
0x03B6: 0x8F, # GREEK SMALL LETTER ZETA
|
||||
0x03B7: 0x9A, # GREEK SMALL LETTER ETA
|
||||
0x03B8: 0x9B, # GREEK SMALL LETTER THETA
|
||||
0x03B9: 0x9C, # GREEK SMALL LETTER IOTA
|
||||
0x03BA: 0x9D, # GREEK SMALL LETTER KAPPA
|
||||
0x03BB: 0x9E, # GREEK SMALL LETTER LAMDA
|
||||
0x03BC: 0x9F, # GREEK SMALL LETTER MU
|
||||
0x03BD: 0xAA, # GREEK SMALL LETTER NU
|
||||
0x03BE: 0xAB, # GREEK SMALL LETTER XI
|
||||
0x03BF: 0xAC, # GREEK SMALL LETTER OMICRON
|
||||
0x03C0: 0xAD, # GREEK SMALL LETTER PI
|
||||
0x03C1: 0xAE, # GREEK SMALL LETTER RHO
|
||||
0x03C2: 0xBA, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x03C3: 0xAF, # GREEK SMALL LETTER SIGMA
|
||||
0x03C4: 0xBB, # GREEK SMALL LETTER TAU
|
||||
0x03C5: 0xBC, # GREEK SMALL LETTER UPSILON
|
||||
0x03C6: 0xBD, # GREEK SMALL LETTER PHI
|
||||
0x03C7: 0xBE, # GREEK SMALL LETTER CHI
|
||||
0x03C8: 0xBF, # GREEK SMALL LETTER PSI
|
||||
0x03C9: 0xCB, # GREEK SMALL LETTER OMEGA
|
||||
0x03CA: 0xB4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x03CB: 0xB8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03CC: 0xB6, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x03CD: 0xB7, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x03CE: 0xB9, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x2015: 0xCF, # HORIZONTAL BAR
|
||||
0x2018: 0xCE, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xDE, # RIGHT SINGLE QUOTATION MARK
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic)
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic)
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German)
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic)
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic)
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH (Icelandic)
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN (Icelandic)
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German)
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0xF0, # LATIN SMALL LETTER ETH (Icelandic)
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0xFE, # LATIN SMALL LETTER THORN (Icelandic)
|
||||
0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0110: 0xA9, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xB9, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0112: 0xA2, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0113: 0xB2, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x0122: 0xA3, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0123: 0xB3, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE
|
||||
0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE
|
||||
0x012A: 0xA4, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x0136: 0xA6, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x0137: 0xB6, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x0138: 0xFF, # LATIN SMALL LETTER KRA
|
||||
0x013B: 0xA8, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x013C: 0xB8, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x014A: 0xAF, # LATIN CAPITAL LETTER ENG
|
||||
0x014B: 0xBF, # LATIN SMALL LETTER ENG
|
||||
0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0160: 0xAA, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xBA, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0166: 0xAB, # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0x0167: 0xBB, # LATIN SMALL LETTER T WITH STROKE
|
||||
0x0168: 0xD7, # LATIN CAPITAL LETTER U WITH TILDE
|
||||
0x0169: 0xF7, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x016A: 0xAE, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x016B: 0xBE, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x017D: 0xAC, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xBC, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x2015: 0xBD, # HORIZONTAL BAR
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,255 +303,5 @@ decoding_table = (
|
|||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x0E01: 0xA1, # THAI CHARACTER KO KAI
|
||||
0x0E02: 0xA2, # THAI CHARACTER KHO KHAI
|
||||
0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT
|
||||
0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI
|
||||
0x0E05: 0xA5, # THAI CHARACTER KHO KHON
|
||||
0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG
|
||||
0x0E07: 0xA7, # THAI CHARACTER NGO NGU
|
||||
0x0E08: 0xA8, # THAI CHARACTER CHO CHAN
|
||||
0x0E09: 0xA9, # THAI CHARACTER CHO CHING
|
||||
0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG
|
||||
0x0E0B: 0xAB, # THAI CHARACTER SO SO
|
||||
0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE
|
||||
0x0E0D: 0xAD, # THAI CHARACTER YO YING
|
||||
0x0E0E: 0xAE, # THAI CHARACTER DO CHADA
|
||||
0x0E0F: 0xAF, # THAI CHARACTER TO PATAK
|
||||
0x0E10: 0xB0, # THAI CHARACTER THO THAN
|
||||
0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO
|
||||
0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO
|
||||
0x0E13: 0xB3, # THAI CHARACTER NO NEN
|
||||
0x0E14: 0xB4, # THAI CHARACTER DO DEK
|
||||
0x0E15: 0xB5, # THAI CHARACTER TO TAO
|
||||
0x0E16: 0xB6, # THAI CHARACTER THO THUNG
|
||||
0x0E17: 0xB7, # THAI CHARACTER THO THAHAN
|
||||
0x0E18: 0xB8, # THAI CHARACTER THO THONG
|
||||
0x0E19: 0xB9, # THAI CHARACTER NO NU
|
||||
0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI
|
||||
0x0E1B: 0xBB, # THAI CHARACTER PO PLA
|
||||
0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG
|
||||
0x0E1D: 0xBD, # THAI CHARACTER FO FA
|
||||
0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN
|
||||
0x0E1F: 0xBF, # THAI CHARACTER FO FAN
|
||||
0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO
|
||||
0x0E21: 0xC1, # THAI CHARACTER MO MA
|
||||
0x0E22: 0xC2, # THAI CHARACTER YO YAK
|
||||
0x0E23: 0xC3, # THAI CHARACTER RO RUA
|
||||
0x0E24: 0xC4, # THAI CHARACTER RU
|
||||
0x0E25: 0xC5, # THAI CHARACTER LO LING
|
||||
0x0E26: 0xC6, # THAI CHARACTER LU
|
||||
0x0E27: 0xC7, # THAI CHARACTER WO WAEN
|
||||
0x0E28: 0xC8, # THAI CHARACTER SO SALA
|
||||
0x0E29: 0xC9, # THAI CHARACTER SO RUSI
|
||||
0x0E2A: 0xCA, # THAI CHARACTER SO SUA
|
||||
0x0E2B: 0xCB, # THAI CHARACTER HO HIP
|
||||
0x0E2C: 0xCC, # THAI CHARACTER LO CHULA
|
||||
0x0E2D: 0xCD, # THAI CHARACTER O ANG
|
||||
0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK
|
||||
0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI
|
||||
0x0E30: 0xD0, # THAI CHARACTER SARA A
|
||||
0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT
|
||||
0x0E32: 0xD2, # THAI CHARACTER SARA AA
|
||||
0x0E33: 0xD3, # THAI CHARACTER SARA AM
|
||||
0x0E34: 0xD4, # THAI CHARACTER SARA I
|
||||
0x0E35: 0xD5, # THAI CHARACTER SARA II
|
||||
0x0E36: 0xD6, # THAI CHARACTER SARA UE
|
||||
0x0E37: 0xD7, # THAI CHARACTER SARA UEE
|
||||
0x0E38: 0xD8, # THAI CHARACTER SARA U
|
||||
0x0E39: 0xD9, # THAI CHARACTER SARA UU
|
||||
0x0E3A: 0xDA, # THAI CHARACTER PHINTHU
|
||||
0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT
|
||||
0x0E40: 0xE0, # THAI CHARACTER SARA E
|
||||
0x0E41: 0xE1, # THAI CHARACTER SARA AE
|
||||
0x0E42: 0xE2, # THAI CHARACTER SARA O
|
||||
0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN
|
||||
0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI
|
||||
0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO
|
||||
0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK
|
||||
0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU
|
||||
0x0E48: 0xE8, # THAI CHARACTER MAI EK
|
||||
0x0E49: 0xE9, # THAI CHARACTER MAI THO
|
||||
0x0E4A: 0xEA, # THAI CHARACTER MAI TRI
|
||||
0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA
|
||||
0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT
|
||||
0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT
|
||||
0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN
|
||||
0x0E4F: 0xEF, # THAI CHARACTER FONGMAN
|
||||
0x0E50: 0xF0, # THAI DIGIT ZERO
|
||||
0x0E51: 0xF1, # THAI DIGIT ONE
|
||||
0x0E52: 0xF2, # THAI DIGIT TWO
|
||||
0x0E53: 0xF3, # THAI DIGIT THREE
|
||||
0x0E54: 0xF4, # THAI DIGIT FOUR
|
||||
0x0E55: 0xF5, # THAI DIGIT FIVE
|
||||
0x0E56: 0xF6, # THAI DIGIT SIX
|
||||
0x0E57: 0xF7, # THAI DIGIT SEVEN
|
||||
0x0E58: 0xF8, # THAI DIGIT EIGHT
|
||||
0x0E59: 0xF9, # THAI DIGIT NINE
|
||||
0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU
|
||||
0x0E5B: 0xFB, # THAI CHARACTER KHOMUT
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xAF, # LATIN CAPITAL LETTER AE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xA8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S (German)
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xBF, # LATIN SMALL LETTER AE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xB8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0100: 0xC2, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0101: 0xE2, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0104: 0xC0, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xE0, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0xC3, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE3, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0112: 0xC7, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0113: 0xE7, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0116: 0xCB, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0117: 0xEB, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0118: 0xC6, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xE6, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x0122: 0xCC, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0123: 0xEC, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x012A: 0xCE, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x012B: 0xEE, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x012E: 0xC1, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x012F: 0xE1, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x0136: 0xCD, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x0137: 0xED, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x013B: 0xCF, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x013C: 0xEF, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x0141: 0xD9, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xF9, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0145: 0xD2, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x0146: 0xF2, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x014C: 0xD4, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x014D: 0xF4, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0156: 0xAA, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x0157: 0xBA, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x015A: 0xDA, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0xFA, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0160: 0xD0, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xF0, # LATIN SMALL LETTER S WITH CARON
|
||||
0x016A: 0xDB, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x016B: 0xFB, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x0172: 0xD8, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x0173: 0xF8, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x0179: 0xCA, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0xEA, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xDD, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0xDE, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xFE, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x2019: 0xFF, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201C: 0xB4, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xA1, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x010A: 0xA4, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0x010B: 0xA5, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0x0120: 0xB2, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0x0121: 0xB3, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0x0174: 0xD0, # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||
0x0175: 0xF0, # LATIN SMALL LETTER W WITH CIRCUMFLEX
|
||||
0x0176: 0xDE, # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||
0x0177: 0xFE, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
|
||||
0x0178: 0xAF, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x1E02: 0xA1, # LATIN CAPITAL LETTER B WITH DOT ABOVE
|
||||
0x1E03: 0xA2, # LATIN SMALL LETTER B WITH DOT ABOVE
|
||||
0x1E0A: 0xA6, # LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
0x1E0B: 0xAB, # LATIN SMALL LETTER D WITH DOT ABOVE
|
||||
0x1E1E: 0xB0, # LATIN CAPITAL LETTER F WITH DOT ABOVE
|
||||
0x1E1F: 0xB1, # LATIN SMALL LETTER F WITH DOT ABOVE
|
||||
0x1E40: 0xB4, # LATIN CAPITAL LETTER M WITH DOT ABOVE
|
||||
0x1E41: 0xB5, # LATIN SMALL LETTER M WITH DOT ABOVE
|
||||
0x1E56: 0xB7, # LATIN CAPITAL LETTER P WITH DOT ABOVE
|
||||
0x1E57: 0xB9, # LATIN SMALL LETTER P WITH DOT ABOVE
|
||||
0x1E60: 0xBB, # LATIN CAPITAL LETTER S WITH DOT ABOVE
|
||||
0x1E61: 0xBF, # LATIN SMALL LETTER S WITH DOT ABOVE
|
||||
0x1E6A: 0xD7, # LATIN CAPITAL LETTER T WITH DOT ABOVE
|
||||
0x1E6B: 0xF7, # LATIN SMALL LETTER T WITH DOT ABOVE
|
||||
0x1E80: 0xA8, # LATIN CAPITAL LETTER W WITH GRAVE
|
||||
0x1E81: 0xB8, # LATIN SMALL LETTER W WITH GRAVE
|
||||
0x1E82: 0xAA, # LATIN CAPITAL LETTER W WITH ACUTE
|
||||
0x1E83: 0xBA, # LATIN SMALL LETTER W WITH ACUTE
|
||||
0x1E84: 0xBD, # LATIN CAPITAL LETTER W WITH DIAERESIS
|
||||
0x1E85: 0xBE, # LATIN SMALL LETTER W WITH DIAERESIS
|
||||
0x1EF2: 0xAC, # LATIN CAPITAL LETTER Y WITH GRAVE
|
||||
0x1EF3: 0xBC, # LATIN SMALL LETTER Y WITH GRAVE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xD0, # LATIN CAPITAL LETTER ETH
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0xF0, # LATIN SMALL LETTER ETH
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0xFE, # LATIN SMALL LETTER THORN
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xBD, # LATIN SMALL LIGATURE OE
|
||||
0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x20AC: 0xA4, # EURO SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xA2, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0xC5, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE5, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xB2, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xB9, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0118: 0xDD, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xFD, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x0152: 0xBC, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xBD, # LATIN SMALL LIGATURE OE
|
||||
0x015A: 0xD7, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0xF7, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0160: 0xA6, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xA8, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0170: 0xD8, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x0171: 0xF8, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x0178: 0xBE, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0xAE, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0xB4, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xB8, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x0218: 0xAA, # LATIN CAPITAL LETTER S WITH COMMA BELOW
|
||||
0x0219: 0xBA, # LATIN SMALL LETTER S WITH COMMA BELOW
|
||||
0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW
|
||||
0x021B: 0xFE, # LATIN SMALL LETTER T WITH COMMA BELOW
|
||||
0x201D: 0xB5, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xA5, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x20AC: 0xA4, # EURO SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02d9' # 0xFF -> DOT ABOVE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xDD, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xFD, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x0102: 0xC3, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x0103: 0xE3, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x010E: 0xCF, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x010F: 0xEF, # LATIN SMALL LETTER D WITH CARON
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x011A: 0xCC, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x011B: 0xEC, # LATIN SMALL LETTER E WITH CARON
|
||||
0x0139: 0xC5, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x013A: 0xE5, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x013D: 0xA5, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x013E: 0xB5, # LATIN SMALL LETTER L WITH CARON
|
||||
0x0141: 0xA3, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xB3, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xD1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xF1, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0147: 0xD2, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x0148: 0xF2, # LATIN SMALL LETTER N WITH CARON
|
||||
0x0150: 0xD5, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x0151: 0xF5, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x0154: 0xC0, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x0155: 0xE0, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x0158: 0xD8, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x0159: 0xF8, # LATIN SMALL LETTER R WITH CARON
|
||||
0x015A: 0xA6, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0xB6, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0162: 0xDE, # LATIN CAPITAL LETTER T WITH CEDILLA
|
||||
0x0163: 0xFE, # LATIN SMALL LETTER T WITH CEDILLA
|
||||
0x0164: 0xAB, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x0165: 0xBB, # LATIN SMALL LETTER T WITH CARON
|
||||
0x016E: 0xD9, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x016F: 0xF9, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x0170: 0xDB, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x0171: 0xFB, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x0179: 0xAC, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0xBC, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x02C7: 0xB7, # CARON
|
||||
0x02D8: 0xA2, # BREVE
|
||||
0x02D9: 0xFF, # DOT ABOVE
|
||||
0x02DB: 0xB2, # OGONEK
|
||||
0x02DD: 0xBD, # DOUBLE ACUTE ACCENT
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,256 +303,5 @@ decoding_table = (
|
|||
u'\u02d9' # 0xFF -> DOT ABOVE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0108: 0xC6, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||
0x0109: 0xE6, # LATIN SMALL LETTER C WITH CIRCUMFLEX
|
||||
0x010A: 0xC5, # LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||
0x010B: 0xE5, # LATIN SMALL LETTER C WITH DOT ABOVE
|
||||
0x011C: 0xD8, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||
0x011D: 0xF8, # LATIN SMALL LETTER G WITH CIRCUMFLEX
|
||||
0x011E: 0xAB, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x011F: 0xBB, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x0120: 0xD5, # LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||
0x0121: 0xF5, # LATIN SMALL LETTER G WITH DOT ABOVE
|
||||
0x0124: 0xA6, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0x0125: 0xB6, # LATIN SMALL LETTER H WITH CIRCUMFLEX
|
||||
0x0126: 0xA1, # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0x0127: 0xB1, # LATIN SMALL LETTER H WITH STROKE
|
||||
0x0130: 0xA9, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0131: 0xB9, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0134: 0xAC, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0x0135: 0xBC, # LATIN SMALL LETTER J WITH CIRCUMFLEX
|
||||
0x015C: 0xDE, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||
0x015D: 0xFE, # LATIN SMALL LETTER S WITH CIRCUMFLEX
|
||||
0x015E: 0xAA, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xBA, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x016C: 0xDD, # LATIN CAPITAL LETTER U WITH BREVE
|
||||
0x016D: 0xFD, # LATIN SMALL LETTER U WITH BREVE
|
||||
0x017B: 0xAF, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xBF, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x02D8: 0xA2, # BREVE
|
||||
0x02D9: 0xFF, # DOT ABOVE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02d9' # 0xFF -> DOT ABOVE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0100: 0xC0, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0101: 0xE0, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0104: 0xA1, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0xB1, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0112: 0xAA, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0113: 0xBA, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0116: 0xCC, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0117: 0xEC, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0118: 0xCA, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xEA, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x0122: 0xAB, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0123: 0xBB, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x0128: 0xA5, # LATIN CAPITAL LETTER I WITH TILDE
|
||||
0x0129: 0xB5, # LATIN SMALL LETTER I WITH TILDE
|
||||
0x012A: 0xCF, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x012B: 0xEF, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x012E: 0xC7, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x012F: 0xE7, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x0136: 0xD3, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x0137: 0xF3, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x0138: 0xA2, # LATIN SMALL LETTER KRA
|
||||
0x013B: 0xA6, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x013C: 0xB6, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x0145: 0xD1, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x0146: 0xF1, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x014A: 0xBD, # LATIN CAPITAL LETTER ENG
|
||||
0x014B: 0xBF, # LATIN SMALL LETTER ENG
|
||||
0x014C: 0xD2, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x014D: 0xF2, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0156: 0xA3, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x0157: 0xB3, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0166: 0xAC, # LATIN CAPITAL LETTER T WITH STROKE
|
||||
0x0167: 0xBC, # LATIN SMALL LETTER T WITH STROKE
|
||||
0x0168: 0xDD, # LATIN CAPITAL LETTER U WITH TILDE
|
||||
0x0169: 0xFD, # LATIN SMALL LETTER U WITH TILDE
|
||||
0x016A: 0xDE, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x016B: 0xFE, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x0172: 0xD9, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x0173: 0xF9, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x02C7: 0xB7, # CARON
|
||||
0x02D9: 0xFF, # DOT ABOVE
|
||||
0x02DB: 0xB2, # OGONEK
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A7: 0xFD, # SECTION SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x0401: 0xA1, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0402: 0xA2, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0403: 0xA3, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0404: 0xA4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0405: 0xA5, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x0406: 0xA6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0xA7, # CYRILLIC CAPITAL LETTER YI
|
||||
0x0408: 0xA8, # CYRILLIC CAPITAL LETTER JE
|
||||
0x0409: 0xA9, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x040A: 0xAA, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x040B: 0xAB, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x040C: 0xAC, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x040E: 0xAE, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x040F: 0xAF, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x0410: 0xB0, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0xB1, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0xB2, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0xB3, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0xB4, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0xB5, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0xB6, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0xB7, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0xB8, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0xB9, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041A: 0xBA, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041B: 0xBB, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041C: 0xBC, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041D: 0xBD, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041E: 0xBE, # CYRILLIC CAPITAL LETTER O
|
||||
0x041F: 0xBF, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0xC0, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0xC1, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0xC2, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0xC3, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0xC4, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0xC5, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0xC6, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0xC7, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0xC8, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0xC9, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042A: 0xCA, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042B: 0xCB, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042C: 0xCC, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042D: 0xCD, # CYRILLIC CAPITAL LETTER E
|
||||
0x042E: 0xCE, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042F: 0xCF, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xD0, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xD1, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xD2, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xD3, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xD4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xD5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xD7, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xD8, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xD9, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043A: 0xDA, # CYRILLIC SMALL LETTER KA
|
||||
0x043B: 0xDB, # CYRILLIC SMALL LETTER EL
|
||||
0x043C: 0xDC, # CYRILLIC SMALL LETTER EM
|
||||
0x043D: 0xDD, # CYRILLIC SMALL LETTER EN
|
||||
0x043E: 0xDE, # CYRILLIC SMALL LETTER O
|
||||
0x043F: 0xDF, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xE0, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xE1, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xE2, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xE3, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xE4, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xE5, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xE6, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xE7, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xE8, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xE9, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044A: 0xEA, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044B: 0xEB, # CYRILLIC SMALL LETTER YERU
|
||||
0x044C: 0xEC, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044D: 0xED, # CYRILLIC SMALL LETTER E
|
||||
0x044E: 0xEE, # CYRILLIC SMALL LETTER YU
|
||||
0x044F: 0xEF, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xF1, # CYRILLIC SMALL LETTER IO
|
||||
0x0452: 0xF2, # CYRILLIC SMALL LETTER DJE
|
||||
0x0453: 0xF3, # CYRILLIC SMALL LETTER GJE
|
||||
0x0454: 0xF4, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0455: 0xF5, # CYRILLIC SMALL LETTER DZE
|
||||
0x0456: 0xF6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0xF7, # CYRILLIC SMALL LETTER YI
|
||||
0x0458: 0xF8, # CYRILLIC SMALL LETTER JE
|
||||
0x0459: 0xF9, # CYRILLIC SMALL LETTER LJE
|
||||
0x045A: 0xFA, # CYRILLIC SMALL LETTER NJE
|
||||
0x045B: 0xFB, # CYRILLIC SMALL LETTER TSHE
|
||||
0x045C: 0xFC, # CYRILLIC SMALL LETTER KJE
|
||||
0x045E: 0xFE, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x045F: 0xFF, # CYRILLIC SMALL LETTER DZHE
|
||||
0x2116: 0xF0, # NUMERO SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,218 +303,5 @@ decoding_table = (
|
|||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x060C: 0xAC, # ARABIC COMMA
|
||||
0x061B: 0xBB, # ARABIC SEMICOLON
|
||||
0x061F: 0xBF, # ARABIC QUESTION MARK
|
||||
0x0621: 0xC1, # ARABIC LETTER HAMZA
|
||||
0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x0627: 0xC7, # ARABIC LETTER ALEF
|
||||
0x0628: 0xC8, # ARABIC LETTER BEH
|
||||
0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA
|
||||
0x062A: 0xCA, # ARABIC LETTER TEH
|
||||
0x062B: 0xCB, # ARABIC LETTER THEH
|
||||
0x062C: 0xCC, # ARABIC LETTER JEEM
|
||||
0x062D: 0xCD, # ARABIC LETTER HAH
|
||||
0x062E: 0xCE, # ARABIC LETTER KHAH
|
||||
0x062F: 0xCF, # ARABIC LETTER DAL
|
||||
0x0630: 0xD0, # ARABIC LETTER THAL
|
||||
0x0631: 0xD1, # ARABIC LETTER REH
|
||||
0x0632: 0xD2, # ARABIC LETTER ZAIN
|
||||
0x0633: 0xD3, # ARABIC LETTER SEEN
|
||||
0x0634: 0xD4, # ARABIC LETTER SHEEN
|
||||
0x0635: 0xD5, # ARABIC LETTER SAD
|
||||
0x0636: 0xD6, # ARABIC LETTER DAD
|
||||
0x0637: 0xD7, # ARABIC LETTER TAH
|
||||
0x0638: 0xD8, # ARABIC LETTER ZAH
|
||||
0x0639: 0xD9, # ARABIC LETTER AIN
|
||||
0x063A: 0xDA, # ARABIC LETTER GHAIN
|
||||
0x0640: 0xE0, # ARABIC TATWEEL
|
||||
0x0641: 0xE1, # ARABIC LETTER FEH
|
||||
0x0642: 0xE2, # ARABIC LETTER QAF
|
||||
0x0643: 0xE3, # ARABIC LETTER KAF
|
||||
0x0644: 0xE4, # ARABIC LETTER LAM
|
||||
0x0645: 0xE5, # ARABIC LETTER MEEM
|
||||
0x0646: 0xE6, # ARABIC LETTER NOON
|
||||
0x0647: 0xE7, # ARABIC LETTER HEH
|
||||
0x0648: 0xE8, # ARABIC LETTER WAW
|
||||
0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA
|
||||
0x064A: 0xEA, # ARABIC LETTER YEH
|
||||
0x064B: 0xEB, # ARABIC FATHATAN
|
||||
0x064C: 0xEC, # ARABIC DAMMATAN
|
||||
0x064D: 0xED, # ARABIC KASRATAN
|
||||
0x064E: 0xEE, # ARABIC FATHA
|
||||
0x064F: 0xEF, # ARABIC DAMMA
|
||||
0x0650: 0xF0, # ARABIC KASRA
|
||||
0x0651: 0xF1, # ARABIC SHADDA
|
||||
0x0652: 0xF2, # ARABIC SUKUN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,260 +303,5 @@ decoding_table = (
|
|||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x037A: 0xAA, # GREEK YPOGEGRAMMENI
|
||||
0x0384: 0xB4, # GREEK TONOS
|
||||
0x0385: 0xB5, # GREEK DIALYTIKA TONOS
|
||||
0x0386: 0xB6, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0388: 0xB8, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x0389: 0xB9, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x038A: 0xBA, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x038C: 0xBC, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x038E: 0xBE, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x038F: 0xBF, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0390: 0xC0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x0391: 0xC1, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0392: 0xC2, # GREEK CAPITAL LETTER BETA
|
||||
0x0393: 0xC3, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0394: 0xC4, # GREEK CAPITAL LETTER DELTA
|
||||
0x0395: 0xC5, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0396: 0xC6, # GREEK CAPITAL LETTER ZETA
|
||||
0x0397: 0xC7, # GREEK CAPITAL LETTER ETA
|
||||
0x0398: 0xC8, # GREEK CAPITAL LETTER THETA
|
||||
0x0399: 0xC9, # GREEK CAPITAL LETTER IOTA
|
||||
0x039A: 0xCA, # GREEK CAPITAL LETTER KAPPA
|
||||
0x039B: 0xCB, # GREEK CAPITAL LETTER LAMDA
|
||||
0x039C: 0xCC, # GREEK CAPITAL LETTER MU
|
||||
0x039D: 0xCD, # GREEK CAPITAL LETTER NU
|
||||
0x039E: 0xCE, # GREEK CAPITAL LETTER XI
|
||||
0x039F: 0xCF, # GREEK CAPITAL LETTER OMICRON
|
||||
0x03A0: 0xD0, # GREEK CAPITAL LETTER PI
|
||||
0x03A1: 0xD1, # GREEK CAPITAL LETTER RHO
|
||||
0x03A3: 0xD3, # GREEK CAPITAL LETTER SIGMA
|
||||
0x03A4: 0xD4, # GREEK CAPITAL LETTER TAU
|
||||
0x03A5: 0xD5, # GREEK CAPITAL LETTER UPSILON
|
||||
0x03A6: 0xD6, # GREEK CAPITAL LETTER PHI
|
||||
0x03A7: 0xD7, # GREEK CAPITAL LETTER CHI
|
||||
0x03A8: 0xD8, # GREEK CAPITAL LETTER PSI
|
||||
0x03A9: 0xD9, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03AA: 0xDA, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x03AB: 0xDB, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03AC: 0xDC, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x03AD: 0xDD, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x03AE: 0xDE, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x03AF: 0xDF, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x03B0: 0xE0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA
|
||||
0x03B2: 0xE2, # GREEK SMALL LETTER BETA
|
||||
0x03B3: 0xE3, # GREEK SMALL LETTER GAMMA
|
||||
0x03B4: 0xE4, # GREEK SMALL LETTER DELTA
|
||||
0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON
|
||||
0x03B6: 0xE6, # GREEK SMALL LETTER ZETA
|
||||
0x03B7: 0xE7, # GREEK SMALL LETTER ETA
|
||||
0x03B8: 0xE8, # GREEK SMALL LETTER THETA
|
||||
0x03B9: 0xE9, # GREEK SMALL LETTER IOTA
|
||||
0x03BA: 0xEA, # GREEK SMALL LETTER KAPPA
|
||||
0x03BB: 0xEB, # GREEK SMALL LETTER LAMDA
|
||||
0x03BC: 0xEC, # GREEK SMALL LETTER MU
|
||||
0x03BD: 0xED, # GREEK SMALL LETTER NU
|
||||
0x03BE: 0xEE, # GREEK SMALL LETTER XI
|
||||
0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON
|
||||
0x03C0: 0xF0, # GREEK SMALL LETTER PI
|
||||
0x03C1: 0xF1, # GREEK SMALL LETTER RHO
|
||||
0x03C2: 0xF2, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA
|
||||
0x03C4: 0xF4, # GREEK SMALL LETTER TAU
|
||||
0x03C5: 0xF5, # GREEK SMALL LETTER UPSILON
|
||||
0x03C6: 0xF6, # GREEK SMALL LETTER PHI
|
||||
0x03C7: 0xF7, # GREEK SMALL LETTER CHI
|
||||
0x03C8: 0xF8, # GREEK SMALL LETTER PSI
|
||||
0x03C9: 0xF9, # GREEK SMALL LETTER OMEGA
|
||||
0x03CA: 0xFA, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x03CB: 0xFB, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03CC: 0xFC, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x03CD: 0xFD, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x03CE: 0xFE, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x2015: 0xAF, # HORIZONTAL BAR
|
||||
0x2018: 0xA1, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xA2, # RIGHT SINGLE QUOTATION MARK
|
||||
0x20AC: 0xA4, # EURO SIGN
|
||||
0x20AF: 0xA5, # DRACHMA SIGN
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,227 +303,5 @@ decoding_table = (
|
|||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00D7: 0xAA, # MULTIPLICATION SIGN
|
||||
0x00F7: 0xBA, # DIVISION SIGN
|
||||
0x05D0: 0xE0, # HEBREW LETTER ALEF
|
||||
0x05D1: 0xE1, # HEBREW LETTER BET
|
||||
0x05D2: 0xE2, # HEBREW LETTER GIMEL
|
||||
0x05D3: 0xE3, # HEBREW LETTER DALET
|
||||
0x05D4: 0xE4, # HEBREW LETTER HE
|
||||
0x05D5: 0xE5, # HEBREW LETTER VAV
|
||||
0x05D6: 0xE6, # HEBREW LETTER ZAYIN
|
||||
0x05D7: 0xE7, # HEBREW LETTER HET
|
||||
0x05D8: 0xE8, # HEBREW LETTER TET
|
||||
0x05D9: 0xE9, # HEBREW LETTER YOD
|
||||
0x05DA: 0xEA, # HEBREW LETTER FINAL KAF
|
||||
0x05DB: 0xEB, # HEBREW LETTER KAF
|
||||
0x05DC: 0xEC, # HEBREW LETTER LAMED
|
||||
0x05DD: 0xED, # HEBREW LETTER FINAL MEM
|
||||
0x05DE: 0xEE, # HEBREW LETTER MEM
|
||||
0x05DF: 0xEF, # HEBREW LETTER FINAL NUN
|
||||
0x05E0: 0xF0, # HEBREW LETTER NUN
|
||||
0x05E1: 0xF1, # HEBREW LETTER SAMEKH
|
||||
0x05E2: 0xF2, # HEBREW LETTER AYIN
|
||||
0x05E3: 0xF3, # HEBREW LETTER FINAL PE
|
||||
0x05E4: 0xF4, # HEBREW LETTER PE
|
||||
0x05E5: 0xF5, # HEBREW LETTER FINAL TSADI
|
||||
0x05E6: 0xF6, # HEBREW LETTER TSADI
|
||||
0x05E7: 0xF7, # HEBREW LETTER QOF
|
||||
0x05E8: 0xF8, # HEBREW LETTER RESH
|
||||
0x05E9: 0xF9, # HEBREW LETTER SHIN
|
||||
0x05EA: 0xFA, # HEBREW LETTER TAV
|
||||
0x200E: 0xFD, # LEFT-TO-RIGHT MARK
|
||||
0x200F: 0xFE, # RIGHT-TO-LEFT MARK
|
||||
0x2017: 0xDF, # DOUBLE LOW LINE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x00A0: 0xA0, # NO-BREAK SPACE
|
||||
0x00A1: 0xA1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A4: 0xA4, # CURRENCY SIGN
|
||||
0x00A5: 0xA5, # YEN SIGN
|
||||
0x00A6: 0xA6, # BROKEN BAR
|
||||
0x00A7: 0xA7, # SECTION SIGN
|
||||
0x00A8: 0xA8, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xAA, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xAB, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xAC, # NOT SIGN
|
||||
0x00AD: 0xAD, # SOFT HYPHEN
|
||||
0x00AE: 0xAE, # REGISTERED SIGN
|
||||
0x00AF: 0xAF, # MACRON
|
||||
0x00B0: 0xB0, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0xB2, # SUPERSCRIPT TWO
|
||||
0x00B3: 0xB3, # SUPERSCRIPT THREE
|
||||
0x00B4: 0xB4, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xB6, # PILCROW SIGN
|
||||
0x00B7: 0xB7, # MIDDLE DOT
|
||||
0x00B8: 0xB8, # CEDILLA
|
||||
0x00B9: 0xB9, # SUPERSCRIPT ONE
|
||||
0x00BA: 0xBA, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xBB, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BC: 0xBC, # VULGAR FRACTION ONE QUARTER
|
||||
0x00BD: 0xBD, # VULGAR FRACTION ONE HALF
|
||||
0x00BE: 0xBE, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00BF: 0xBF, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xC0, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xC1, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xC2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xC3, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0xC4, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0xC5, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xC6, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0xC7, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xC8, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0xC9, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xCA, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xCB, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xCC, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xCD, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xCE, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xCF, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0xD1, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xD2, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xD3, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xD4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xD5, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0xD6, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D7: 0xD7, # MULTIPLICATION SIGN
|
||||
0x00D8: 0xD8, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xD9, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xDA, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xDB, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0xDC, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xDF, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0xE0, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0xE1, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0xE2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0xE3, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0xE4, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0xE5, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xE6, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0xE7, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0xE8, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0xE9, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0xEA, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0xEB, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0xEC, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0xED, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0xEE, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0xEF, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0xF1, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0xF2, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0xF3, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0xF4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0xF5, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0xF6, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xF7, # DIVISION SIGN
|
||||
0x00F8: 0xF8, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0xF9, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0xFA, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0xFB, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0xFC, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xFF, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x011E: 0xD0, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x011F: 0xF0, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x0130: 0xDD, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0131: 0xFD, # LATIN SMALL LETTER DOTLESS I
|
||||
0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xFE, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0x9A, # NO-BREAK SPACE
|
||||
0x00A9: 0xBF, # COPYRIGHT SIGN
|
||||
0x00B0: 0x9C, # DEGREE SIGN
|
||||
0x00B2: 0x9D, # SUPERSCRIPT TWO
|
||||
0x00B7: 0x9E, # MIDDLE DOT
|
||||
0x00F7: 0x9F, # DIVISION SIGN
|
||||
0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O
|
||||
0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E
|
||||
0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xC1, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xC2, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xD7, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xC4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xC5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xC9, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043A: 0xCB, # CYRILLIC SMALL LETTER KA
|
||||
0x043B: 0xCC, # CYRILLIC SMALL LETTER EL
|
||||
0x043C: 0xCD, # CYRILLIC SMALL LETTER EM
|
||||
0x043D: 0xCE, # CYRILLIC SMALL LETTER EN
|
||||
0x043E: 0xCF, # CYRILLIC SMALL LETTER O
|
||||
0x043F: 0xD0, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xD2, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xD3, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xD4, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xD5, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xC6, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xC8, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU
|
||||
0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044D: 0xDC, # CYRILLIC SMALL LETTER E
|
||||
0x044E: 0xC0, # CYRILLIC SMALL LETTER YU
|
||||
0x044F: 0xD1, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xA3, # CYRILLIC SMALL LETTER IO
|
||||
0x2219: 0x95, # BULLET OPERATOR
|
||||
0x221A: 0x96, # SQUARE ROOT
|
||||
0x2248: 0x97, # ALMOST EQUAL TO
|
||||
0x2264: 0x98, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0x99, # GREATER-THAN OR EQUAL TO
|
||||
0x2320: 0x93, # TOP HALF INTEGRAL
|
||||
0x2321: 0x9B, # BOTTOM HALF INTEGRAL
|
||||
0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x2553: 0xA4, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
|
||||
0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x2555: 0xA6, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
|
||||
0x2556: 0xA7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
|
||||
0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x255C: 0xAD, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
|
||||
0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x2562: 0xB4, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
|
||||
0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x2564: 0xB6, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
|
||||
0x2565: 0xB7, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
|
||||
0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x256B: 0xBD, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
|
||||
0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x2580: 0x8B, # UPPER HALF BLOCK
|
||||
0x2584: 0x8C, # LOWER HALF BLOCK
|
||||
0x2588: 0x8D, # FULL BLOCK
|
||||
0x258C: 0x8E, # LEFT HALF BLOCK
|
||||
0x2590: 0x8F, # RIGHT HALF BLOCK
|
||||
0x2591: 0x90, # LIGHT SHADE
|
||||
0x2592: 0x91, # MEDIUM SHADE
|
||||
0x2593: 0x92, # DARK SHADE
|
||||
0x25A0: 0x94, # BLACK SQUARE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x00A0: 0x9A, # NO-BREAK SPACE
|
||||
0x00A9: 0xBF, # COPYRIGHT SIGN
|
||||
0x00B0: 0x9C, # DEGREE SIGN
|
||||
0x00B2: 0x9D, # SUPERSCRIPT TWO
|
||||
0x00B7: 0x9E, # MIDDLE DOT
|
||||
0x00F7: 0x9F, # DIVISION SIGN
|
||||
0x0401: 0xB3, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0404: 0xB4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0406: 0xB6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0xB7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
|
||||
0x0410: 0xE1, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0xE2, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0xF7, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0xE7, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0xE4, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0xE5, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0xF6, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0xFA, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0xE9, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0xEA, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041A: 0xEB, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041B: 0xEC, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041C: 0xED, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041D: 0xEE, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041E: 0xEF, # CYRILLIC CAPITAL LETTER O
|
||||
0x041F: 0xF0, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0xF2, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0xF3, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0xF4, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0xF5, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0xE6, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0xE8, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0xE3, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0xFE, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0xFB, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0xFD, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042A: 0xFF, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042B: 0xF9, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042C: 0xF8, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042D: 0xFC, # CYRILLIC CAPITAL LETTER E
|
||||
0x042E: 0xE0, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042F: 0xF1, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xC1, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xC2, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xD7, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xC7, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xC4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xC5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xD6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xDA, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xC9, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xCA, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043A: 0xCB, # CYRILLIC SMALL LETTER KA
|
||||
0x043B: 0xCC, # CYRILLIC SMALL LETTER EL
|
||||
0x043C: 0xCD, # CYRILLIC SMALL LETTER EM
|
||||
0x043D: 0xCE, # CYRILLIC SMALL LETTER EN
|
||||
0x043E: 0xCF, # CYRILLIC SMALL LETTER O
|
||||
0x043F: 0xD0, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xD2, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xD3, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xD4, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xD5, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xC6, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xC8, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xC3, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xDE, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xDB, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xDD, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044A: 0xDF, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044B: 0xD9, # CYRILLIC SMALL LETTER YERU
|
||||
0x044C: 0xD8, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044D: 0xDC, # CYRILLIC SMALL LETTER E
|
||||
0x044E: 0xC0, # CYRILLIC SMALL LETTER YU
|
||||
0x044F: 0xD1, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xA3, # CYRILLIC SMALL LETTER IO
|
||||
0x0454: 0xA4, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0456: 0xA6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0xA7, # CYRILLIC SMALL LETTER YI (UKRAINIAN)
|
||||
0x0490: 0xBD, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
0x0491: 0xAD, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
0x2219: 0x95, # BULLET OPERATOR
|
||||
0x221A: 0x96, # SQUARE ROOT
|
||||
0x2248: 0x97, # ALMOST EQUAL TO
|
||||
0x2264: 0x98, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0x99, # GREATER-THAN OR EQUAL TO
|
||||
0x2320: 0x93, # TOP HALF INTEGRAL
|
||||
0x2321: 0x9B, # BOTTOM HALF INTEGRAL
|
||||
0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x250C: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x251C: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x252C: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x253C: 0x8A, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x2550: 0xA0, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x2551: 0xA1, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x2552: 0xA2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x2554: 0xA5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x2557: 0xA8, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x2558: 0xA9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x2559: 0xAA, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x255A: 0xAB, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x255B: 0xAC, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x255D: 0xAE, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x255E: 0xAF, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x255F: 0xB0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x2560: 0xB1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x2561: 0xB2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x2563: 0xB5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x2566: 0xB8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x2567: 0xB9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x2568: 0xBA, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x2569: 0xBB, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x256A: 0xBC, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x256C: 0xBE, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x2580: 0x8B, # UPPER HALF BLOCK
|
||||
0x2584: 0x8C, # LOWER HALF BLOCK
|
||||
0x2588: 0x8D, # FULL BLOCK
|
||||
0x258C: 0x8E, # LEFT HALF BLOCK
|
||||
0x2590: 0x8F, # RIGHT HALF BLOCK
|
||||
0x2591: 0x90, # LIGHT SHADE
|
||||
0x2592: 0x91, # MEDIUM SHADE
|
||||
0x2593: 0x92, # DARK SHADE
|
||||
0x25A0: 0x94, # BLACK SQUARE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xF8, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xF9, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x0100: 0x81, # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0x0101: 0x82, # LATIN SMALL LETTER A WITH MACRON
|
||||
0x0104: 0x84, # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
0x0105: 0x88, # LATIN SMALL LETTER A WITH OGONEK
|
||||
0x0106: 0x8C, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0x8D, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0x89, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0x8B, # LATIN SMALL LETTER C WITH CARON
|
||||
0x010E: 0x91, # LATIN CAPITAL LETTER D WITH CARON
|
||||
0x010F: 0x93, # LATIN SMALL LETTER D WITH CARON
|
||||
0x0112: 0x94, # LATIN CAPITAL LETTER E WITH MACRON
|
||||
0x0113: 0x95, # LATIN SMALL LETTER E WITH MACRON
|
||||
0x0116: 0x96, # LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||
0x0117: 0x98, # LATIN SMALL LETTER E WITH DOT ABOVE
|
||||
0x0118: 0xA2, # LATIN CAPITAL LETTER E WITH OGONEK
|
||||
0x0119: 0xAB, # LATIN SMALL LETTER E WITH OGONEK
|
||||
0x011A: 0x9D, # LATIN CAPITAL LETTER E WITH CARON
|
||||
0x011B: 0x9E, # LATIN SMALL LETTER E WITH CARON
|
||||
0x0122: 0xFE, # LATIN CAPITAL LETTER G WITH CEDILLA
|
||||
0x0123: 0xAE, # LATIN SMALL LETTER G WITH CEDILLA
|
||||
0x012A: 0xB1, # LATIN CAPITAL LETTER I WITH MACRON
|
||||
0x012B: 0xB4, # LATIN SMALL LETTER I WITH MACRON
|
||||
0x012E: 0xAF, # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
0x012F: 0xB0, # LATIN SMALL LETTER I WITH OGONEK
|
||||
0x0136: 0xB5, # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
0x0137: 0xFA, # LATIN SMALL LETTER K WITH CEDILLA
|
||||
0x0139: 0xBD, # LATIN CAPITAL LETTER L WITH ACUTE
|
||||
0x013A: 0xBE, # LATIN SMALL LETTER L WITH ACUTE
|
||||
0x013B: 0xB9, # LATIN CAPITAL LETTER L WITH CEDILLA
|
||||
0x013C: 0xBA, # LATIN SMALL LETTER L WITH CEDILLA
|
||||
0x013D: 0xBB, # LATIN CAPITAL LETTER L WITH CARON
|
||||
0x013E: 0xBC, # LATIN SMALL LETTER L WITH CARON
|
||||
0x0141: 0xFC, # LATIN CAPITAL LETTER L WITH STROKE
|
||||
0x0142: 0xB8, # LATIN SMALL LETTER L WITH STROKE
|
||||
0x0143: 0xC1, # LATIN CAPITAL LETTER N WITH ACUTE
|
||||
0x0144: 0xC4, # LATIN SMALL LETTER N WITH ACUTE
|
||||
0x0145: 0xBF, # LATIN CAPITAL LETTER N WITH CEDILLA
|
||||
0x0146: 0xC0, # LATIN SMALL LETTER N WITH CEDILLA
|
||||
0x0147: 0xC5, # LATIN CAPITAL LETTER N WITH CARON
|
||||
0x0148: 0xCB, # LATIN SMALL LETTER N WITH CARON
|
||||
0x014C: 0xCF, # LATIN CAPITAL LETTER O WITH MACRON
|
||||
0x014D: 0xD8, # LATIN SMALL LETTER O WITH MACRON
|
||||
0x0150: 0xCC, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||
0x0151: 0xCE, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
|
||||
0x0154: 0xD9, # LATIN CAPITAL LETTER R WITH ACUTE
|
||||
0x0155: 0xDA, # LATIN SMALL LETTER R WITH ACUTE
|
||||
0x0156: 0xDF, # LATIN CAPITAL LETTER R WITH CEDILLA
|
||||
0x0157: 0xE0, # LATIN SMALL LETTER R WITH CEDILLA
|
||||
0x0158: 0xDB, # LATIN CAPITAL LETTER R WITH CARON
|
||||
0x0159: 0xDE, # LATIN SMALL LETTER R WITH CARON
|
||||
0x015A: 0xE5, # LATIN CAPITAL LETTER S WITH ACUTE
|
||||
0x015B: 0xE6, # LATIN SMALL LETTER S WITH ACUTE
|
||||
0x0160: 0xE1, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xE4, # LATIN SMALL LETTER S WITH CARON
|
||||
0x0164: 0xE8, # LATIN CAPITAL LETTER T WITH CARON
|
||||
0x0165: 0xE9, # LATIN SMALL LETTER T WITH CARON
|
||||
0x016A: 0xED, # LATIN CAPITAL LETTER U WITH MACRON
|
||||
0x016B: 0xF0, # LATIN SMALL LETTER U WITH MACRON
|
||||
0x016E: 0xF1, # LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||
0x016F: 0xF3, # LATIN SMALL LETTER U WITH RING ABOVE
|
||||
0x0170: 0xF4, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||
0x0171: 0xF5, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
|
||||
0x0172: 0xF6, # LATIN CAPITAL LETTER U WITH OGONEK
|
||||
0x0173: 0xF7, # LATIN SMALL LETTER U WITH OGONEK
|
||||
0x0179: 0x8F, # LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x017A: 0x90, # LATIN SMALL LETTER Z WITH ACUTE
|
||||
0x017B: 0xFB, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||
0x017C: 0xFD, # LATIN SMALL LETTER Z WITH DOT ABOVE
|
||||
0x017D: 0xEB, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xEC, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A1: 0xC1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xD9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00AF: 0xF8, # MACRON
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xAB, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00B7: 0xE1, # MIDDLE DOT
|
||||
0x00B8: 0xFC, # CEDILLA
|
||||
0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xDF, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xC0, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xDE, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xFD, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xFA, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xFE, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0106: 0xC6, # LATIN CAPITAL LETTER C WITH ACUTE
|
||||
0x0107: 0xE6, # LATIN SMALL LETTER C WITH ACUTE
|
||||
0x010C: 0xC8, # LATIN CAPITAL LETTER C WITH CARON
|
||||
0x010D: 0xE8, # LATIN SMALL LETTER C WITH CARON
|
||||
0x0110: 0xD0, # LATIN CAPITAL LETTER D WITH STROKE
|
||||
0x0111: 0xF0, # LATIN SMALL LETTER D WITH STROKE
|
||||
0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x0160: 0xA9, # LATIN CAPITAL LETTER S WITH CARON
|
||||
0x0161: 0xB9, # LATIN SMALL LETTER S WITH CARON
|
||||
0x017D: 0xAE, # LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x017E: 0xBE, # LATIN SMALL LETTER Z WITH CARON
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x02DA: 0xFB, # RING ABOVE
|
||||
0x02DC: 0xF7, # SMALL TILDE
|
||||
0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03C0: 0xF9, # GREEK SMALL LETTER PI
|
||||
0x2013: 0xE0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0xE4, # PER MILLE SIGN
|
||||
0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x2044: 0xDA, # FRACTION SLASH
|
||||
0x20AC: 0xDB, # EURO SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xB4, # INCREMENT
|
||||
0x220F: 0xB8, # N-ARY PRODUCT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x222B: 0xBA, # INTEGRAL
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
0xF8FF: 0xD8, # Apple logo
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u20ac' # 0xFF -> EURO SIGN
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0401: 0xDD, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0402: 0xAB, # CYRILLIC CAPITAL LETTER DJE
|
||||
0x0403: 0xAE, # CYRILLIC CAPITAL LETTER GJE
|
||||
0x0404: 0xB8, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0405: 0xC1, # CYRILLIC CAPITAL LETTER DZE
|
||||
0x0406: 0xA7, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0xBA, # CYRILLIC CAPITAL LETTER YI
|
||||
0x0408: 0xB7, # CYRILLIC CAPITAL LETTER JE
|
||||
0x0409: 0xBC, # CYRILLIC CAPITAL LETTER LJE
|
||||
0x040A: 0xBE, # CYRILLIC CAPITAL LETTER NJE
|
||||
0x040B: 0xCB, # CYRILLIC CAPITAL LETTER TSHE
|
||||
0x040C: 0xCD, # CYRILLIC CAPITAL LETTER KJE
|
||||
0x040E: 0xD8, # CYRILLIC CAPITAL LETTER SHORT U
|
||||
0x040F: 0xDA, # CYRILLIC CAPITAL LETTER DZHE
|
||||
0x0410: 0x80, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0x81, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0x82, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0x83, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0x84, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0x85, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0x86, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0x87, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0x88, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0x89, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041A: 0x8A, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041B: 0x8B, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041C: 0x8C, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041D: 0x8D, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041E: 0x8E, # CYRILLIC CAPITAL LETTER O
|
||||
0x041F: 0x8F, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0x90, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0x91, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0x92, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0x93, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0x94, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0x95, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0x96, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0x97, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0x98, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0x99, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042A: 0x9A, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042B: 0x9B, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042C: 0x9C, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042D: 0x9D, # CYRILLIC CAPITAL LETTER E
|
||||
0x042E: 0x9E, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042F: 0x9F, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xE0, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xE1, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xE2, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xE3, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xE4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xE5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xE6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xE7, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xE8, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xE9, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043A: 0xEA, # CYRILLIC SMALL LETTER KA
|
||||
0x043B: 0xEB, # CYRILLIC SMALL LETTER EL
|
||||
0x043C: 0xEC, # CYRILLIC SMALL LETTER EM
|
||||
0x043D: 0xED, # CYRILLIC SMALL LETTER EN
|
||||
0x043E: 0xEE, # CYRILLIC SMALL LETTER O
|
||||
0x043F: 0xEF, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xF0, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xF1, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xF2, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xF3, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xF4, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xF5, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xF6, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xF7, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xF8, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xF9, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044A: 0xFA, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044B: 0xFB, # CYRILLIC SMALL LETTER YERU
|
||||
0x044C: 0xFC, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044D: 0xFD, # CYRILLIC SMALL LETTER E
|
||||
0x044E: 0xFE, # CYRILLIC SMALL LETTER YU
|
||||
0x044F: 0xDF, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xDE, # CYRILLIC SMALL LETTER IO
|
||||
0x0452: 0xAC, # CYRILLIC SMALL LETTER DJE
|
||||
0x0453: 0xAF, # CYRILLIC SMALL LETTER GJE
|
||||
0x0454: 0xB9, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0455: 0xCF, # CYRILLIC SMALL LETTER DZE
|
||||
0x0456: 0xB4, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0xBB, # CYRILLIC SMALL LETTER YI
|
||||
0x0458: 0xC0, # CYRILLIC SMALL LETTER JE
|
||||
0x0459: 0xBD, # CYRILLIC SMALL LETTER LJE
|
||||
0x045A: 0xBF, # CYRILLIC SMALL LETTER NJE
|
||||
0x045B: 0xCC, # CYRILLIC SMALL LETTER TSHE
|
||||
0x045C: 0xCE, # CYRILLIC SMALL LETTER KJE
|
||||
0x045E: 0xD9, # CYRILLIC SMALL LETTER SHORT U
|
||||
0x045F: 0xDB, # CYRILLIC SMALL LETTER DZHE
|
||||
0x0490: 0xA2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
0x0491: 0xB6, # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xD7, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x20AC: 0xFF, # EURO SIGN
|
||||
0x2116: 0xDC, # NUMERO SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE, left-right
|
||||
0x0020: 0xA0, # SPACE, right-left
|
||||
0x0021: 0x21, # EXCLAMATION MARK, left-right
|
||||
0x0021: 0xA1, # EXCLAMATION MARK, right-left
|
||||
0x0022: 0x22, # QUOTATION MARK, left-right
|
||||
0x0022: 0xA2, # QUOTATION MARK, right-left
|
||||
0x0023: 0x23, # NUMBER SIGN, left-right
|
||||
0x0023: 0xA3, # NUMBER SIGN, right-left
|
||||
0x0024: 0x24, # DOLLAR SIGN, left-right
|
||||
0x0024: 0xA4, # DOLLAR SIGN, right-left
|
||||
0x0025: 0x25, # PERCENT SIGN, left-right
|
||||
0x0026: 0x26, # AMPERSAND, left-right
|
||||
0x0026: 0xA6, # AMPERSAND, right-left
|
||||
0x0027: 0x27, # APOSTROPHE, left-right
|
||||
0x0027: 0xA7, # APOSTROPHE, right-left
|
||||
0x0028: 0x28, # LEFT PARENTHESIS, left-right
|
||||
0x0028: 0xA8, # LEFT PARENTHESIS, right-left
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS, left-right
|
||||
0x0029: 0xA9, # RIGHT PARENTHESIS, right-left
|
||||
0x002A: 0x2A, # ASTERISK, left-right
|
||||
0x002A: 0xAA, # ASTERISK, right-left
|
||||
0x002B: 0x2B, # PLUS SIGN, left-right
|
||||
0x002B: 0xAB, # PLUS SIGN, right-left
|
||||
0x002C: 0x2C, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
|
||||
0x002D: 0x2D, # HYPHEN-MINUS, left-right
|
||||
0x002D: 0xAD, # HYPHEN-MINUS, right-left
|
||||
0x002E: 0x2E, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
|
||||
0x002E: 0xAE, # FULL STOP, right-left
|
||||
0x002F: 0x2F, # SOLIDUS, left-right
|
||||
0x002F: 0xAF, # SOLIDUS, right-left
|
||||
0x0030: 0x30, # DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
0x003A: 0x3A, # COLON, left-right
|
||||
0x003A: 0xBA, # COLON, right-left
|
||||
0x003B: 0x3B, # SEMICOLON, left-right
|
||||
0x003C: 0x3C, # LESS-THAN SIGN, left-right
|
||||
0x003C: 0xBC, # LESS-THAN SIGN, right-left
|
||||
0x003D: 0x3D, # EQUALS SIGN, left-right
|
||||
0x003D: 0xBD, # EQUALS SIGN, right-left
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN, left-right
|
||||
0x003E: 0xBE, # GREATER-THAN SIGN, right-left
|
||||
0x003F: 0x3F, # QUESTION MARK, left-right
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET, left-right
|
||||
0x005B: 0xDB, # LEFT SQUARE BRACKET, right-left
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS, left-right
|
||||
0x005C: 0xDC, # REVERSE SOLIDUS, right-left
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET, left-right
|
||||
0x005D: 0xDD, # RIGHT SQUARE BRACKET, right-left
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT, left-right
|
||||
0x005E: 0xDE, # CIRCUMFLEX ACCENT, right-left
|
||||
0x005F: 0x5F, # LOW LINE, left-right
|
||||
0x005F: 0xDF, # LOW LINE, right-left
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET, left-right
|
||||
0x007B: 0xFB, # LEFT CURLY BRACKET, right-left
|
||||
0x007C: 0x7C, # VERTICAL LINE, left-right
|
||||
0x007C: 0xFC, # VERTICAL LINE, right-left
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET, left-right
|
||||
0x007D: 0xFD, # RIGHT CURLY BRACKET, right-left
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0x81, # NO-BREAK SPACE, right-left
|
||||
0x00AB: 0x8C, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x00BB: 0x98, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0x9B, # DIVISION SIGN, right-left
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x060C: 0xAC, # ARABIC COMMA
|
||||
0x061B: 0xBB, # ARABIC SEMICOLON
|
||||
0x061F: 0xBF, # ARABIC QUESTION MARK
|
||||
0x0621: 0xC1, # ARABIC LETTER HAMZA
|
||||
0x0622: 0xC2, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x0623: 0xC3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x0624: 0xC4, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x0625: 0xC5, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x0626: 0xC6, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x0627: 0xC7, # ARABIC LETTER ALEF
|
||||
0x0628: 0xC8, # ARABIC LETTER BEH
|
||||
0x0629: 0xC9, # ARABIC LETTER TEH MARBUTA
|
||||
0x062A: 0xCA, # ARABIC LETTER TEH
|
||||
0x062B: 0xCB, # ARABIC LETTER THEH
|
||||
0x062C: 0xCC, # ARABIC LETTER JEEM
|
||||
0x062D: 0xCD, # ARABIC LETTER HAH
|
||||
0x062E: 0xCE, # ARABIC LETTER KHAH
|
||||
0x062F: 0xCF, # ARABIC LETTER DAL
|
||||
0x0630: 0xD0, # ARABIC LETTER THAL
|
||||
0x0631: 0xD1, # ARABIC LETTER REH
|
||||
0x0632: 0xD2, # ARABIC LETTER ZAIN
|
||||
0x0633: 0xD3, # ARABIC LETTER SEEN
|
||||
0x0634: 0xD4, # ARABIC LETTER SHEEN
|
||||
0x0635: 0xD5, # ARABIC LETTER SAD
|
||||
0x0636: 0xD6, # ARABIC LETTER DAD
|
||||
0x0637: 0xD7, # ARABIC LETTER TAH
|
||||
0x0638: 0xD8, # ARABIC LETTER ZAH
|
||||
0x0639: 0xD9, # ARABIC LETTER AIN
|
||||
0x063A: 0xDA, # ARABIC LETTER GHAIN
|
||||
0x0640: 0xE0, # ARABIC TATWEEL
|
||||
0x0641: 0xE1, # ARABIC LETTER FEH
|
||||
0x0642: 0xE2, # ARABIC LETTER QAF
|
||||
0x0643: 0xE3, # ARABIC LETTER KAF
|
||||
0x0644: 0xE4, # ARABIC LETTER LAM
|
||||
0x0645: 0xE5, # ARABIC LETTER MEEM
|
||||
0x0646: 0xE6, # ARABIC LETTER NOON
|
||||
0x0647: 0xE7, # ARABIC LETTER HEH
|
||||
0x0648: 0xE8, # ARABIC LETTER WAW
|
||||
0x0649: 0xE9, # ARABIC LETTER ALEF MAKSURA
|
||||
0x064A: 0xEA, # ARABIC LETTER YEH
|
||||
0x064B: 0xEB, # ARABIC FATHATAN
|
||||
0x064C: 0xEC, # ARABIC DAMMATAN
|
||||
0x064D: 0xED, # ARABIC KASRATAN
|
||||
0x064E: 0xEE, # ARABIC FATHA
|
||||
0x064F: 0xEF, # ARABIC DAMMA
|
||||
0x0650: 0xF0, # ARABIC KASRA
|
||||
0x0651: 0xF1, # ARABIC SHADDA
|
||||
0x0652: 0xF2, # ARABIC SUKUN
|
||||
0x066A: 0xA5, # ARABIC PERCENT SIGN
|
||||
0x0679: 0xF4, # ARABIC LETTER TTEH
|
||||
0x067E: 0xF3, # ARABIC LETTER PEH
|
||||
0x0686: 0xF5, # ARABIC LETTER TCHEH
|
||||
0x0688: 0xF9, # ARABIC LETTER DDAL
|
||||
0x0691: 0xFA, # ARABIC LETTER RREH
|
||||
0x0698: 0xFE, # ARABIC LETTER JEH
|
||||
0x06A4: 0xF7, # ARABIC LETTER VEH
|
||||
0x06AF: 0xF8, # ARABIC LETTER GAF
|
||||
0x06BA: 0x8B, # ARABIC LETTER NOON GHUNNA
|
||||
0x06D2: 0xFF, # ARABIC LETTER YEH BARREE
|
||||
0x06D5: 0xF6, # ARABIC LETTER AE
|
||||
0x06F0: 0xB0, # EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override)
|
||||
0x06F1: 0xB1, # EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override)
|
||||
0x06F2: 0xB2, # EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override)
|
||||
0x06F3: 0xB3, # EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override)
|
||||
0x06F4: 0xB4, # EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override)
|
||||
0x06F5: 0xB5, # EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override)
|
||||
0x06F6: 0xB6, # EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override)
|
||||
0x06F7: 0xB7, # EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override)
|
||||
0x06F8: 0xB8, # EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override)
|
||||
0x06F9: 0xB9, # EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override)
|
||||
0x2026: 0x93, # HORIZONTAL ELLIPSIS, right-left
|
||||
0x274A: 0xC0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A3: 0x92, # POUND SIGN
|
||||
0x00A5: 0xB4, # YEN SIGN
|
||||
0x00A6: 0x9B, # BROKEN BAR
|
||||
0x00A7: 0xAC, # SECTION SIGN
|
||||
0x00A8: 0x8C, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AD: 0xFF, # SOFT HYPHEN # before Mac OS 9.2.2, was undefined
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00B0: 0xAE, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B2: 0x82, # SUPERSCRIPT TWO
|
||||
0x00B3: 0x84, # SUPERSCRIPT THREE
|
||||
0x00B7: 0xAF, # MIDDLE DOT
|
||||
0x00B9: 0x81, # SUPERSCRIPT ONE
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BD: 0x97, # VULGAR FRACTION ONE HALF
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x0384: 0x8B, # GREEK TONOS
|
||||
0x0385: 0x87, # GREEK DIALYTIKA TONOS
|
||||
0x0386: 0xCD, # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0x0388: 0xCE, # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0x0389: 0xD7, # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
0x038A: 0xD8, # GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
0x038C: 0xD9, # GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
0x038E: 0xDA, # GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||
0x038F: 0xDF, # GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||
0x0390: 0xFD, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
0x0391: 0xB0, # GREEK CAPITAL LETTER ALPHA
|
||||
0x0392: 0xB5, # GREEK CAPITAL LETTER BETA
|
||||
0x0393: 0xA1, # GREEK CAPITAL LETTER GAMMA
|
||||
0x0394: 0xA2, # GREEK CAPITAL LETTER DELTA
|
||||
0x0395: 0xB6, # GREEK CAPITAL LETTER EPSILON
|
||||
0x0396: 0xB7, # GREEK CAPITAL LETTER ZETA
|
||||
0x0397: 0xB8, # GREEK CAPITAL LETTER ETA
|
||||
0x0398: 0xA3, # GREEK CAPITAL LETTER THETA
|
||||
0x0399: 0xB9, # GREEK CAPITAL LETTER IOTA
|
||||
0x039A: 0xBA, # GREEK CAPITAL LETTER KAPPA
|
||||
0x039B: 0xA4, # GREEK CAPITAL LETTER LAMDA
|
||||
0x039C: 0xBB, # GREEK CAPITAL LETTER MU
|
||||
0x039D: 0xC1, # GREEK CAPITAL LETTER NU
|
||||
0x039E: 0xA5, # GREEK CAPITAL LETTER XI
|
||||
0x039F: 0xC3, # GREEK CAPITAL LETTER OMICRON
|
||||
0x03A0: 0xA6, # GREEK CAPITAL LETTER PI
|
||||
0x03A1: 0xC4, # GREEK CAPITAL LETTER RHO
|
||||
0x03A3: 0xAA, # GREEK CAPITAL LETTER SIGMA
|
||||
0x03A4: 0xC6, # GREEK CAPITAL LETTER TAU
|
||||
0x03A5: 0xCB, # GREEK CAPITAL LETTER UPSILON
|
||||
0x03A6: 0xBC, # GREEK CAPITAL LETTER PHI
|
||||
0x03A7: 0xCC, # GREEK CAPITAL LETTER CHI
|
||||
0x03A8: 0xBE, # GREEK CAPITAL LETTER PSI
|
||||
0x03A9: 0xBF, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03AA: 0xAB, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||
0x03AB: 0xBD, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03AC: 0xC0, # GREEK SMALL LETTER ALPHA WITH TONOS
|
||||
0x03AD: 0xDB, # GREEK SMALL LETTER EPSILON WITH TONOS
|
||||
0x03AE: 0xDC, # GREEK SMALL LETTER ETA WITH TONOS
|
||||
0x03AF: 0xDD, # GREEK SMALL LETTER IOTA WITH TONOS
|
||||
0x03B0: 0xFE, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
0x03B1: 0xE1, # GREEK SMALL LETTER ALPHA
|
||||
0x03B2: 0xE2, # GREEK SMALL LETTER BETA
|
||||
0x03B3: 0xE7, # GREEK SMALL LETTER GAMMA
|
||||
0x03B4: 0xE4, # GREEK SMALL LETTER DELTA
|
||||
0x03B5: 0xE5, # GREEK SMALL LETTER EPSILON
|
||||
0x03B6: 0xFA, # GREEK SMALL LETTER ZETA
|
||||
0x03B7: 0xE8, # GREEK SMALL LETTER ETA
|
||||
0x03B8: 0xF5, # GREEK SMALL LETTER THETA
|
||||
0x03B9: 0xE9, # GREEK SMALL LETTER IOTA
|
||||
0x03BA: 0xEB, # GREEK SMALL LETTER KAPPA
|
||||
0x03BB: 0xEC, # GREEK SMALL LETTER LAMDA
|
||||
0x03BC: 0xED, # GREEK SMALL LETTER MU
|
||||
0x03BD: 0xEE, # GREEK SMALL LETTER NU
|
||||
0x03BE: 0xEA, # GREEK SMALL LETTER XI
|
||||
0x03BF: 0xEF, # GREEK SMALL LETTER OMICRON
|
||||
0x03C0: 0xF0, # GREEK SMALL LETTER PI
|
||||
0x03C1: 0xF2, # GREEK SMALL LETTER RHO
|
||||
0x03C2: 0xF7, # GREEK SMALL LETTER FINAL SIGMA
|
||||
0x03C3: 0xF3, # GREEK SMALL LETTER SIGMA
|
||||
0x03C4: 0xF4, # GREEK SMALL LETTER TAU
|
||||
0x03C5: 0xF9, # GREEK SMALL LETTER UPSILON
|
||||
0x03C6: 0xE6, # GREEK SMALL LETTER PHI
|
||||
0x03C7: 0xF8, # GREEK SMALL LETTER CHI
|
||||
0x03C8: 0xE3, # GREEK SMALL LETTER PSI
|
||||
0x03C9: 0xF6, # GREEK SMALL LETTER OMEGA
|
||||
0x03CA: 0xFB, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
|
||||
0x03CB: 0xFC, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
|
||||
0x03CC: 0xDE, # GREEK SMALL LETTER OMICRON WITH TONOS
|
||||
0x03CD: 0xE0, # GREEK SMALL LETTER UPSILON WITH TONOS
|
||||
0x03CE: 0xF1, # GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2015: 0xD1, # HORIZONTAL BAR
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2022: 0x96, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0x98, # PER MILLE SIGN
|
||||
0x20AC: 0x9C, # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN
|
||||
0x2122: 0x93, # TRADE MARK SIGN
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A1: 0xC1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xB4, # YEN SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00AF: 0xF8, # MACRON
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xAB, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00B7: 0xE1, # MIDDLE DOT
|
||||
0x00B8: 0xFC, # CEDILLA
|
||||
0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xC0, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xAE, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D0: 0xDC, # LATIN CAPITAL LETTER ETH
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DD: 0xA0, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00DE: 0xDE, # LATIN CAPITAL LETTER THORN
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xBE, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F0: 0xDD, # LATIN SMALL LETTER ETH
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FD: 0xE0, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00FE: 0xDF, # LATIN SMALL LETTER THORN
|
||||
0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x02D8: 0xF9, # BREVE
|
||||
0x02D9: 0xFA, # DOT ABOVE
|
||||
0x02DA: 0xFB, # RING ABOVE
|
||||
0x02DB: 0xFE, # OGONEK
|
||||
0x02DC: 0xF7, # SMALL TILDE
|
||||
0x02DD: 0xFD, # DOUBLE ACUTE ACCENT
|
||||
0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03C0: 0xB9, # GREEK SMALL LETTER PI
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0xE4, # PER MILLE SIGN
|
||||
0x2044: 0xDA, # FRACTION SLASH
|
||||
0x20AC: 0xDB, # EURO SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x220F: 0xB8, # N-ARY PRODUCT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x222B: 0xBA, # INTEGRAL
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
0xF8FF: 0xF0, # Apple logo
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A1: 0xC1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xB4, # YEN SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00AF: 0xF8, # MACRON
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xAB, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00B7: 0xE1, # MIDDLE DOT
|
||||
0x00B8: 0xFC, # CEDILLA
|
||||
0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xC0, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xAE, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xBE, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x02D8: 0xF9, # BREVE
|
||||
0x02D9: 0xFA, # DOT ABOVE
|
||||
0x02DA: 0xFB, # RING ABOVE
|
||||
0x02DB: 0xFE, # OGONEK
|
||||
0x02DC: 0xF7, # SMALL TILDE
|
||||
0x02DD: 0xFD, # DOUBLE ACUTE ACCENT
|
||||
0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03C0: 0xB9, # GREEK SMALL LETTER PI
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2021: 0xE0, # DOUBLE DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0xE4, # PER MILLE SIGN
|
||||
0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x2044: 0xDA, # FRACTION SLASH
|
||||
0x20AC: 0xDB, # EURO SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x220F: 0xB8, # N-ARY PRODUCT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x222B: 0xBA, # INTEGRAL
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
0xF8FF: 0xF0, # Apple logo
|
||||
0xFB01: 0xDE, # LATIN SMALL LIGATURE FI
|
||||
0xFB02: 0xDF, # LATIN SMALL LIGATURE FL
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A1: 0xC1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xB4, # YEN SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00AF: 0xF8, # MACRON
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xAB, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00B7: 0xE1, # MIDDLE DOT
|
||||
0x00B8: 0xFC, # CEDILLA
|
||||
0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xC0, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x0102: 0xAE, # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0x0103: 0xBE, # LATIN SMALL LETTER A WITH BREVE
|
||||
0x0131: 0xF5, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x0218: 0xAF, # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0x0219: 0xBF, # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0x021A: 0xDE, # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0x021B: 0xDF, # LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
|
||||
0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x02D8: 0xF9, # BREVE
|
||||
0x02D9: 0xFA, # DOT ABOVE
|
||||
0x02DA: 0xFB, # RING ABOVE
|
||||
0x02DB: 0xFE, # OGONEK
|
||||
0x02DC: 0xF7, # SMALL TILDE
|
||||
0x02DD: 0xFD, # DOUBLE ACUTE ACCENT
|
||||
0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03C0: 0xB9, # GREEK SMALL LETTER PI
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2021: 0xE0, # DOUBLE DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0xE4, # PER MILLE SIGN
|
||||
0x2039: 0xDC, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x203A: 0xDD, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x2044: 0xDA, # FRACTION SLASH
|
||||
0x20AC: 0xDB, # EURO SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x220F: 0xB8, # N-ARY PRODUCT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x222B: 0xBA, # INTEGRAL
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
0xF8FF: 0xF0, # Apple logo
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,263 +303,5 @@ decoding_table = (
|
|||
u'\u02c7' # 0xFF -> CARON
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # CONTROL CHARACTER
|
||||
0x0001: 0x01, # CONTROL CHARACTER
|
||||
0x0002: 0x02, # CONTROL CHARACTER
|
||||
0x0003: 0x03, # CONTROL CHARACTER
|
||||
0x0004: 0x04, # CONTROL CHARACTER
|
||||
0x0005: 0x05, # CONTROL CHARACTER
|
||||
0x0006: 0x06, # CONTROL CHARACTER
|
||||
0x0007: 0x07, # CONTROL CHARACTER
|
||||
0x0008: 0x08, # CONTROL CHARACTER
|
||||
0x0009: 0x09, # CONTROL CHARACTER
|
||||
0x000A: 0x0A, # CONTROL CHARACTER
|
||||
0x000B: 0x0B, # CONTROL CHARACTER
|
||||
0x000C: 0x0C, # CONTROL CHARACTER
|
||||
0x000D: 0x0D, # CONTROL CHARACTER
|
||||
0x000E: 0x0E, # CONTROL CHARACTER
|
||||
0x000F: 0x0F, # CONTROL CHARACTER
|
||||
0x0010: 0x10, # CONTROL CHARACTER
|
||||
0x0011: 0x11, # CONTROL CHARACTER
|
||||
0x0012: 0x12, # CONTROL CHARACTER
|
||||
0x0013: 0x13, # CONTROL CHARACTER
|
||||
0x0014: 0x14, # CONTROL CHARACTER
|
||||
0x0015: 0x15, # CONTROL CHARACTER
|
||||
0x0016: 0x16, # CONTROL CHARACTER
|
||||
0x0017: 0x17, # CONTROL CHARACTER
|
||||
0x0018: 0x18, # CONTROL CHARACTER
|
||||
0x0019: 0x19, # CONTROL CHARACTER
|
||||
0x001A: 0x1A, # CONTROL CHARACTER
|
||||
0x001B: 0x1B, # CONTROL CHARACTER
|
||||
0x001C: 0x1C, # CONTROL CHARACTER
|
||||
0x001D: 0x1D, # CONTROL CHARACTER
|
||||
0x001E: 0x1E, # CONTROL CHARACTER
|
||||
0x001F: 0x1F, # CONTROL CHARACTER
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # CONTROL CHARACTER
|
||||
0x00A0: 0xCA, # NO-BREAK SPACE
|
||||
0x00A1: 0xC1, # INVERTED EXCLAMATION MARK
|
||||
0x00A2: 0xA2, # CENT SIGN
|
||||
0x00A3: 0xA3, # POUND SIGN
|
||||
0x00A5: 0xB4, # YEN SIGN
|
||||
0x00A7: 0xA4, # SECTION SIGN
|
||||
0x00A8: 0xAC, # DIAERESIS
|
||||
0x00A9: 0xA9, # COPYRIGHT SIGN
|
||||
0x00AA: 0xBB, # FEMININE ORDINAL INDICATOR
|
||||
0x00AB: 0xC7, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00AC: 0xC2, # NOT SIGN
|
||||
0x00AE: 0xA8, # REGISTERED SIGN
|
||||
0x00AF: 0xF8, # MACRON
|
||||
0x00B0: 0xA1, # DEGREE SIGN
|
||||
0x00B1: 0xB1, # PLUS-MINUS SIGN
|
||||
0x00B4: 0xAB, # ACUTE ACCENT
|
||||
0x00B5: 0xB5, # MICRO SIGN
|
||||
0x00B6: 0xA6, # PILCROW SIGN
|
||||
0x00B7: 0xE1, # MIDDLE DOT
|
||||
0x00B8: 0xFC, # CEDILLA
|
||||
0x00BA: 0xBC, # MASCULINE ORDINAL INDICATOR
|
||||
0x00BB: 0xC8, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00BF: 0xC0, # INVERTED QUESTION MARK
|
||||
0x00C0: 0xCB, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00C1: 0xE7, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00C2: 0xE5, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00C3: 0xCC, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00C4: 0x80, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00C5: 0x81, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00C6: 0xAE, # LATIN CAPITAL LETTER AE
|
||||
0x00C7: 0x82, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00C8: 0xE9, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00C9: 0x83, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00CA: 0xE6, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00CB: 0xE8, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00CC: 0xED, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00CD: 0xEA, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00CE: 0xEB, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00CF: 0xEC, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00D1: 0x84, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00D2: 0xF1, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00D3: 0xEE, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00D4: 0xEF, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00D5: 0xCD, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00D6: 0x85, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00D8: 0xAF, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00D9: 0xF4, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00DA: 0xF2, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00DB: 0xF3, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00DC: 0x86, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00DF: 0xA7, # LATIN SMALL LETTER SHARP S
|
||||
0x00E0: 0x88, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00E1: 0x87, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00E2: 0x89, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00E3: 0x8B, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00E4: 0x8A, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00E5: 0x8C, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00E6: 0xBE, # LATIN SMALL LETTER AE
|
||||
0x00E7: 0x8D, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00E8: 0x8F, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00E9: 0x8E, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00EA: 0x90, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00EB: 0x91, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00EC: 0x93, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ED: 0x92, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00EE: 0x94, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00EF: 0x95, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00F1: 0x96, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00F2: 0x98, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00F3: 0x97, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00F4: 0x99, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00F5: 0x9B, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00F6: 0x9A, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00F7: 0xD6, # DIVISION SIGN
|
||||
0x00F8: 0xBF, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00F9: 0x9D, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00FA: 0x9C, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00FB: 0x9E, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00FC: 0x9F, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00FF: 0xD8, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x011E: 0xDA, # LATIN CAPITAL LETTER G WITH BREVE
|
||||
0x011F: 0xDB, # LATIN SMALL LETTER G WITH BREVE
|
||||
0x0130: 0xDC, # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0x0131: 0xDD, # LATIN SMALL LETTER DOTLESS I
|
||||
0x0152: 0xCE, # LATIN CAPITAL LIGATURE OE
|
||||
0x0153: 0xCF, # LATIN SMALL LIGATURE OE
|
||||
0x015E: 0xDE, # LATIN CAPITAL LETTER S WITH CEDILLA
|
||||
0x015F: 0xDF, # LATIN SMALL LETTER S WITH CEDILLA
|
||||
0x0178: 0xD9, # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
0x0192: 0xC4, # LATIN SMALL LETTER F WITH HOOK
|
||||
0x02C6: 0xF6, # MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02C7: 0xFF, # CARON
|
||||
0x02D8: 0xF9, # BREVE
|
||||
0x02D9: 0xFA, # DOT ABOVE
|
||||
0x02DA: 0xFB, # RING ABOVE
|
||||
0x02DB: 0xFE, # OGONEK
|
||||
0x02DC: 0xF7, # SMALL TILDE
|
||||
0x02DD: 0xFD, # DOUBLE ACUTE ACCENT
|
||||
0x03A9: 0xBD, # GREEK CAPITAL LETTER OMEGA
|
||||
0x03C0: 0xB9, # GREEK SMALL LETTER PI
|
||||
0x2013: 0xD0, # EN DASH
|
||||
0x2014: 0xD1, # EM DASH
|
||||
0x2018: 0xD4, # LEFT SINGLE QUOTATION MARK
|
||||
0x2019: 0xD5, # RIGHT SINGLE QUOTATION MARK
|
||||
0x201A: 0xE2, # SINGLE LOW-9 QUOTATION MARK
|
||||
0x201C: 0xD2, # LEFT DOUBLE QUOTATION MARK
|
||||
0x201D: 0xD3, # RIGHT DOUBLE QUOTATION MARK
|
||||
0x201E: 0xE3, # DOUBLE LOW-9 QUOTATION MARK
|
||||
0x2020: 0xA0, # DAGGER
|
||||
0x2021: 0xE0, # DOUBLE DAGGER
|
||||
0x2022: 0xA5, # BULLET
|
||||
0x2026: 0xC9, # HORIZONTAL ELLIPSIS
|
||||
0x2030: 0xE4, # PER MILLE SIGN
|
||||
0x2122: 0xAA, # TRADE MARK SIGN
|
||||
0x2202: 0xB6, # PARTIAL DIFFERENTIAL
|
||||
0x2206: 0xC6, # INCREMENT
|
||||
0x220F: 0xB8, # N-ARY PRODUCT
|
||||
0x2211: 0xB7, # N-ARY SUMMATION
|
||||
0x221A: 0xC3, # SQUARE ROOT
|
||||
0x221E: 0xB0, # INFINITY
|
||||
0x222B: 0xBA, # INTEGRAL
|
||||
0x2248: 0xC5, # ALMOST EQUAL TO
|
||||
0x2260: 0xAD, # NOT EQUAL TO
|
||||
0x2264: 0xB2, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0xB3, # GREATER-THAN OR EQUAL TO
|
||||
0x25CA: 0xD7, # LOZENGE
|
||||
0xF8A0: 0xF5, # undefined1
|
||||
0xF8FF: 0xF0, # Apple logo
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -9,14 +9,14 @@ import codecs
|
|||
class Codec(codecs.Codec):
|
||||
|
||||
def encode(self,input,errors='strict'):
|
||||
return codecs.charmap_encode(input,errors,encoding_map)
|
||||
return codecs.charmap_encode(input,errors,encoding_table)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
|
||||
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
|
@ -303,254 +303,5 @@ decoding_table = (
|
|||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000A: 0x0A, # LINE FEED
|
||||
0x000B: 0x0B, # VERTICAL TABULATION
|
||||
0x000C: 0x0C, # FORM FEED
|
||||
0x000D: 0x0D, # CARRIAGE RETURN
|
||||
0x000E: 0x0E, # SHIFT OUT
|
||||
0x000F: 0x0F, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001A: 0x1A, # SUBSTITUTE
|
||||
0x001B: 0x1B, # ESCAPE
|
||||
0x001C: 0x1C, # FILE SEPARATOR
|
||||
0x001D: 0x1D, # GROUP SEPARATOR
|
||||
0x001E: 0x1E, # RECORD SEPARATOR
|
||||
0x001F: 0x1F, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002A: 0x2A, # ASTERISK
|
||||
0x002B: 0x2B, # PLUS SIGN
|
||||
0x002C: 0x2C, # COMMA
|
||||
0x002D: 0x2D, # HYPHEN-MINUS
|
||||
0x002E: 0x2E, # FULL STOP
|
||||
0x002F: 0x2F, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003A: 0x3A, # COLON
|
||||
0x003B: 0x3B, # SEMICOLON
|
||||
0x003C: 0x3C, # LESS-THAN SIGN
|
||||
0x003D: 0x3D, # EQUALS SIGN
|
||||
0x003E: 0x3E, # GREATER-THAN SIGN
|
||||
0x003F: 0x3F, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004A: 0x4A, # LATIN CAPITAL LETTER J
|
||||
0x004B: 0x4B, # LATIN CAPITAL LETTER K
|
||||
0x004C: 0x4C, # LATIN CAPITAL LETTER L
|
||||
0x004D: 0x4D, # LATIN CAPITAL LETTER M
|
||||
0x004E: 0x4E, # LATIN CAPITAL LETTER N
|
||||
0x004F: 0x4F, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005A: 0x5A, # LATIN CAPITAL LETTER Z
|
||||
0x005B: 0x5B, # LEFT SQUARE BRACKET
|
||||
0x005C: 0x5C, # REVERSE SOLIDUS
|
||||
0x005D: 0x5D, # RIGHT SQUARE BRACKET
|
||||
0x005E: 0x5E, # CIRCUMFLEX ACCENT
|
||||
0x005F: 0x5F, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006A: 0x6A, # LATIN SMALL LETTER J
|
||||
0x006B: 0x6B, # LATIN SMALL LETTER K
|
||||
0x006C: 0x6C, # LATIN SMALL LETTER L
|
||||
0x006D: 0x6D, # LATIN SMALL LETTER M
|
||||
0x006E: 0x6E, # LATIN SMALL LETTER N
|
||||
0x006F: 0x6F, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007A: 0x7A, # LATIN SMALL LETTER Z
|
||||
0x007B: 0x7B, # LEFT CURLY BRACKET
|
||||
0x007C: 0x7C, # VERTICAL LINE
|
||||
0x007D: 0x7D, # RIGHT CURLY BRACKET
|
||||
0x007E: 0x7E, # TILDE
|
||||
0x007F: 0x7F, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008A: 0x8A, # <control>
|
||||
0x008B: 0x8B, # <control>
|
||||
0x008C: 0x8C, # <control>
|
||||
0x008D: 0x8D, # <control>
|
||||
0x008E: 0x8E, # <control>
|
||||
0x008F: 0x8F, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009A: 0x9A, # <control>
|
||||
0x009B: 0x9B, # <control>
|
||||
0x009C: 0x9C, # <control>
|
||||
0x009D: 0x9D, # <control>
|
||||
0x009E: 0x9E, # <control>
|
||||
0x009F: 0x9F, # <control>
|
||||
0x0E01: 0xA1, # THAI CHARACTER KO KAI
|
||||
0x0E02: 0xA2, # THAI CHARACTER KHO KHAI
|
||||
0x0E03: 0xA3, # THAI CHARACTER KHO KHUAT
|
||||
0x0E04: 0xA4, # THAI CHARACTER KHO KHWAI
|
||||
0x0E05: 0xA5, # THAI CHARACTER KHO KHON
|
||||
0x0E06: 0xA6, # THAI CHARACTER KHO RAKHANG
|
||||
0x0E07: 0xA7, # THAI CHARACTER NGO NGU
|
||||
0x0E08: 0xA8, # THAI CHARACTER CHO CHAN
|
||||
0x0E09: 0xA9, # THAI CHARACTER CHO CHING
|
||||
0x0E0A: 0xAA, # THAI CHARACTER CHO CHANG
|
||||
0x0E0B: 0xAB, # THAI CHARACTER SO SO
|
||||
0x0E0C: 0xAC, # THAI CHARACTER CHO CHOE
|
||||
0x0E0D: 0xAD, # THAI CHARACTER YO YING
|
||||
0x0E0E: 0xAE, # THAI CHARACTER DO CHADA
|
||||
0x0E0F: 0xAF, # THAI CHARACTER TO PATAK
|
||||
0x0E10: 0xB0, # THAI CHARACTER THO THAN
|
||||
0x0E11: 0xB1, # THAI CHARACTER THO NANGMONTHO
|
||||
0x0E12: 0xB2, # THAI CHARACTER THO PHUTHAO
|
||||
0x0E13: 0xB3, # THAI CHARACTER NO NEN
|
||||
0x0E14: 0xB4, # THAI CHARACTER DO DEK
|
||||
0x0E15: 0xB5, # THAI CHARACTER TO TAO
|
||||
0x0E16: 0xB6, # THAI CHARACTER THO THUNG
|
||||
0x0E17: 0xB7, # THAI CHARACTER THO THAHAN
|
||||
0x0E18: 0xB8, # THAI CHARACTER THO THONG
|
||||
0x0E19: 0xB9, # THAI CHARACTER NO NU
|
||||
0x0E1A: 0xBA, # THAI CHARACTER BO BAIMAI
|
||||
0x0E1B: 0xBB, # THAI CHARACTER PO PLA
|
||||
0x0E1C: 0xBC, # THAI CHARACTER PHO PHUNG
|
||||
0x0E1D: 0xBD, # THAI CHARACTER FO FA
|
||||
0x0E1E: 0xBE, # THAI CHARACTER PHO PHAN
|
||||
0x0E1F: 0xBF, # THAI CHARACTER FO FAN
|
||||
0x0E20: 0xC0, # THAI CHARACTER PHO SAMPHAO
|
||||
0x0E21: 0xC1, # THAI CHARACTER MO MA
|
||||
0x0E22: 0xC2, # THAI CHARACTER YO YAK
|
||||
0x0E23: 0xC3, # THAI CHARACTER RO RUA
|
||||
0x0E24: 0xC4, # THAI CHARACTER RU
|
||||
0x0E25: 0xC5, # THAI CHARACTER LO LING
|
||||
0x0E26: 0xC6, # THAI CHARACTER LU
|
||||
0x0E27: 0xC7, # THAI CHARACTER WO WAEN
|
||||
0x0E28: 0xC8, # THAI CHARACTER SO SALA
|
||||
0x0E29: 0xC9, # THAI CHARACTER SO RUSI
|
||||
0x0E2A: 0xCA, # THAI CHARACTER SO SUA
|
||||
0x0E2B: 0xCB, # THAI CHARACTER HO HIP
|
||||
0x0E2C: 0xCC, # THAI CHARACTER LO CHULA
|
||||
0x0E2D: 0xCD, # THAI CHARACTER O ANG
|
||||
0x0E2E: 0xCE, # THAI CHARACTER HO NOKHUK
|
||||
0x0E2F: 0xCF, # THAI CHARACTER PAIYANNOI
|
||||
0x0E30: 0xD0, # THAI CHARACTER SARA A
|
||||
0x0E31: 0xD1, # THAI CHARACTER MAI HAN-AKAT
|
||||
0x0E32: 0xD2, # THAI CHARACTER SARA AA
|
||||
0x0E33: 0xD3, # THAI CHARACTER SARA AM
|
||||
0x0E34: 0xD4, # THAI CHARACTER SARA I
|
||||
0x0E35: 0xD5, # THAI CHARACTER SARA II
|
||||
0x0E36: 0xD6, # THAI CHARACTER SARA UE
|
||||
0x0E37: 0xD7, # THAI CHARACTER SARA UEE
|
||||
0x0E38: 0xD8, # THAI CHARACTER SARA U
|
||||
0x0E39: 0xD9, # THAI CHARACTER SARA UU
|
||||
0x0E3A: 0xDA, # THAI CHARACTER PHINTHU
|
||||
0x0E3F: 0xDF, # THAI CURRENCY SYMBOL BAHT
|
||||
0x0E40: 0xE0, # THAI CHARACTER SARA E
|
||||
0x0E41: 0xE1, # THAI CHARACTER SARA AE
|
||||
0x0E42: 0xE2, # THAI CHARACTER SARA O
|
||||
0x0E43: 0xE3, # THAI CHARACTER SARA AI MAIMUAN
|
||||
0x0E44: 0xE4, # THAI CHARACTER SARA AI MAIMALAI
|
||||
0x0E45: 0xE5, # THAI CHARACTER LAKKHANGYAO
|
||||
0x0E46: 0xE6, # THAI CHARACTER MAIYAMOK
|
||||
0x0E47: 0xE7, # THAI CHARACTER MAITAIKHU
|
||||
0x0E48: 0xE8, # THAI CHARACTER MAI EK
|
||||
0x0E49: 0xE9, # THAI CHARACTER MAI THO
|
||||
0x0E4A: 0xEA, # THAI CHARACTER MAI TRI
|
||||
0x0E4B: 0xEB, # THAI CHARACTER MAI CHATTAWA
|
||||
0x0E4C: 0xEC, # THAI CHARACTER THANTHAKHAT
|
||||
0x0E4D: 0xED, # THAI CHARACTER NIKHAHIT
|
||||
0x0E4E: 0xEE, # THAI CHARACTER YAMAKKAN
|
||||
0x0E4F: 0xEF, # THAI CHARACTER FONGMAN
|
||||
0x0E50: 0xF0, # THAI DIGIT ZERO
|
||||
0x0E51: 0xF1, # THAI DIGIT ONE
|
||||
0x0E52: 0xF2, # THAI DIGIT TWO
|
||||
0x0E53: 0xF3, # THAI DIGIT THREE
|
||||
0x0E54: 0xF4, # THAI DIGIT FOUR
|
||||
0x0E55: 0xF5, # THAI DIGIT FIVE
|
||||
0x0E56: 0xF6, # THAI DIGIT SIX
|
||||
0x0E57: 0xF7, # THAI DIGIT SEVEN
|
||||
0x0E58: 0xF8, # THAI DIGIT EIGHT
|
||||
0x0E59: 0xF9, # THAI DIGIT NINE
|
||||
0x0E5A: 0xFA, # THAI CHARACTER ANGKHANKHU
|
||||
0x0E5B: 0xFB, # THAI CHARACTER KHOMUT
|
||||
}
|
||||
### Encoding table
|
||||
encoding_table=codecs.charmap_build(decoding_table)
|
||||
|
|
|
@ -1,26 +1,51 @@
|
|||
"""functools.py - Tools for working with functions
|
||||
"""functools.py - Tools for working with functions and callable objects
|
||||
"""
|
||||
# Python module wrapper for _functools C module
|
||||
# to allow utilities written in Python to be added
|
||||
# to the functools module.
|
||||
# Written by Nick Coghlan <ncoghlan at gmail.com>
|
||||
# Copyright (c) 2006 Python Software Foundation.
|
||||
# Copyright (C) 2006 Python Software Foundation.
|
||||
# See C source code for _functools credits/copyright
|
||||
|
||||
from _functools import partial
|
||||
__all__ = [
|
||||
"partial",
|
||||
]
|
||||
|
||||
# Still to come here (need to write tests and docs):
|
||||
# update_wrapper - utility function to transfer basic function
|
||||
# metadata to wrapper functions
|
||||
# WRAPPER_ASSIGNMENTS & WRAPPER_UPDATES - defaults args to above
|
||||
# (update_wrapper has been approved by BDFL)
|
||||
# wraps - decorator factory equivalent to:
|
||||
# def wraps(f):
|
||||
# return partial(update_wrapper, wrapped=f)
|
||||
#
|
||||
# The wraps function makes it easy to avoid the bug that afflicts the
|
||||
# decorator example in the python-dev email proposing the
|
||||
# update_wrapper function:
|
||||
# http://mail.python.org/pipermail/python-dev/2006-May/064775.html
|
||||
# update_wrapper() and wraps() are tools to help write
|
||||
# wrapper functions that can handle naive introspection
|
||||
|
||||
WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__doc__')
|
||||
WRAPPER_UPDATES = ('__dict__',)
|
||||
def update_wrapper(wrapper,
|
||||
wrapped,
|
||||
assigned = WRAPPER_ASSIGNMENTS,
|
||||
updated = WRAPPER_UPDATES):
|
||||
"""Update a wrapper function to look like the wrapped function
|
||||
|
||||
wrapper is the function to be updated
|
||||
wrapped is the original function
|
||||
assigned is a tuple naming the attributes assigned directly
|
||||
from the wrapped function to the wrapper function (defaults to
|
||||
functools.WRAPPER_ASSIGNMENTS)
|
||||
updated is a tuple naming the attributes off the wrapper that
|
||||
are updated with the corresponding attribute from the wrapped
|
||||
function (defaults to functools.WRAPPER_UPDATES)
|
||||
"""
|
||||
for attr in assigned:
|
||||
setattr(wrapper, attr, getattr(wrapped, attr))
|
||||
for attr in updated:
|
||||
getattr(wrapper, attr).update(getattr(wrapped, attr))
|
||||
# Return the wrapper so this can be used as a decorator via partial()
|
||||
return wrapper
|
||||
|
||||
def wraps(wrapped,
|
||||
assigned = WRAPPER_ASSIGNMENTS,
|
||||
updated = WRAPPER_UPDATES):
|
||||
"""Decorator factory to apply update_wrapper() to a wrapper function
|
||||
|
||||
Returns a decorator that invokes update_wrapper() with the decorated
|
||||
function as the wrapper argument and the arguments to wraps() as the
|
||||
remaining arguments. Default arguments are as for update_wrapper().
|
||||
This is a convenience function to simplify applying partial() to
|
||||
update_wrapper().
|
||||
"""
|
||||
return partial(update_wrapper, wrapped=wrapped,
|
||||
assigned=assigned, updated=updated)
|
||||
|
|
|
@ -468,7 +468,7 @@ class DisplayStyle:
|
|||
"""DisplayStyle - handle configuration options shared by
|
||||
(multiple) Display Items"""
|
||||
|
||||
def __init__(self, itemtype, cnf={}, **kw ):
|
||||
def __init__(self, itemtype, cnf={}, **kw):
|
||||
master = _default_root # global from Tkinter
|
||||
if not master and cnf.has_key('refwindow'): master=cnf['refwindow']
|
||||
elif not master and kw.has_key('refwindow'): master= kw['refwindow']
|
||||
|
@ -480,7 +480,7 @@ class DisplayStyle:
|
|||
def __str__(self):
|
||||
return self.stylename
|
||||
|
||||
def _options(self, cnf, kw ):
|
||||
def _options(self, cnf, kw):
|
||||
if kw and cnf:
|
||||
cnf = _cnfmerge((cnf, kw))
|
||||
elif kw:
|
||||
|
|
|
@ -140,7 +140,7 @@ class ParserBase:
|
|||
|
||||
# Internal -- parse a marked section
|
||||
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
|
||||
def parse_marked_section( self, i, report=1 ):
|
||||
def parse_marked_section(self, i, report=1):
|
||||
rawdata= self.rawdata
|
||||
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
|
||||
sectName, j = self._scan_name( i+3, i )
|
||||
|
|
|
@ -1,207 +0,0 @@
|
|||
# Generated from 'WASTE.h'
|
||||
|
||||
kPascalStackBased = None # workaround for header parsing
|
||||
def FOUR_CHAR_CODE(x): return x
|
||||
weCantUndoErr = -10015
|
||||
weEmptySelectionErr = -10013
|
||||
weUnknownObjectTypeErr = -9478
|
||||
weObjectNotFoundErr = -9477
|
||||
weReadOnlyErr = -9476
|
||||
weTextNotFoundErr = -9474
|
||||
weInvalidTextEncodingErr = -9473
|
||||
weDuplicateAttributeErr = -9472
|
||||
weInvalidAttributeSizeErr = -9471
|
||||
weReadOnlyAttributeErr = -9470
|
||||
weOddByteCountErr = -9469
|
||||
weHandlerNotFoundErr = -1717
|
||||
weNotHandledErr = -1708
|
||||
weNewerVersionErr = -1706
|
||||
weCorruptDataErr = -1702
|
||||
weProtocolErr = -603
|
||||
weUndefinedSelectorErr = -50
|
||||
weFlushLeft = -2
|
||||
weFlushRight = -1
|
||||
weFlushDefault = 0
|
||||
weCenter = 1
|
||||
weJustify = 2
|
||||
weDirDefault = 1
|
||||
weDirRightToLeft = -1
|
||||
weDirLeftToRight = 0
|
||||
weDoFont = 0x0001
|
||||
weDoFace = 0x0002
|
||||
weDoSize = 0x0004
|
||||
weDoColor = 0x0008
|
||||
weDoAll = weDoFont | weDoFace | weDoSize | weDoColor
|
||||
weDoAddSize = 0x0010
|
||||
weDoToggleFace = 0x0020
|
||||
weDoReplaceFace = 0x0040
|
||||
weDoPreserveScript = 0x0080
|
||||
weDoExtractSubscript = 0x0100
|
||||
weDoFaceMask = 0x0200
|
||||
weDoDirection = 0x00000001
|
||||
weDoAlignment = 0x00000002
|
||||
weDoLeftIndent = 0x00000004
|
||||
weDoRightIndent = 0x00000008
|
||||
weDoFirstLineIndent = 0x00000010
|
||||
weDoLineSpacing = 0x00000020
|
||||
weDoSpaceBefore = 0x00000040
|
||||
weDoSpaceAfter = 0x00000080
|
||||
weDoBottomBorderStyle = 0x00000400
|
||||
kLeadingEdge = -1
|
||||
kTrailingEdge = 0
|
||||
kObjectEdge = 2
|
||||
weFAutoScroll = 0
|
||||
weFOutlineHilite = 2
|
||||
weFReadOnly = 5
|
||||
weFUndo = 6
|
||||
weFIntCutAndPaste = 7
|
||||
weFDragAndDrop = 8
|
||||
weFInhibitRecal = 9
|
||||
weFUseTempMem = 10
|
||||
weFDrawOffscreen = 11
|
||||
weFInhibitRedraw = 12
|
||||
weFMonoStyled = 13
|
||||
weFMultipleUndo = 14
|
||||
weFNoKeyboardSync = 29
|
||||
weFInhibitICSupport = 30
|
||||
weFInhibitColor = 31
|
||||
# weDoAutoScroll = 1UL << weFAutoScroll
|
||||
# weDoOutlineHilite = 1UL << weFOutlineHilite
|
||||
# weDoReadOnly = 1UL << weFReadOnly
|
||||
# weDoUndo = 1UL << weFUndo
|
||||
# weDoIntCutAndPaste = 1UL << weFIntCutAndPaste
|
||||
# weDoDragAndDrop = 1UL << weFDragAndDrop
|
||||
# weDoInhibitRecal = 1UL << weFInhibitRecal
|
||||
# weDoUseTempMem = 1UL << weFUseTempMem
|
||||
# weDoDrawOffscreen = 1UL << weFDrawOffscreen
|
||||
# weDoInhibitRedraw = 1UL << weFInhibitRedraw
|
||||
# weDoMonoStyled = 1UL << weFMonoStyled
|
||||
# weDoMultipleUndo = 1UL << weFMultipleUndo
|
||||
# weDoNoKeyboardSync = 1UL << weFNoKeyboardSync
|
||||
# weDoInhibitICSupport = 1UL << weFInhibitICSupport
|
||||
# weDoInhibitColor = 1UL << weFInhibitColor
|
||||
weBitToggle = -2
|
||||
weBitTest = -1
|
||||
weBitClear = 0
|
||||
weBitSet = 1
|
||||
weLowerCase = 0
|
||||
weUpperCase = 1
|
||||
weFindWholeWords = 0x00000001
|
||||
weFindCaseInsensitive = 0x00000002
|
||||
weFindDiacriticalInsensitive = 0x00000004
|
||||
wePutIntCutAndPaste = 0x00000001
|
||||
wePutAddToTypingSequence = 0x00000002
|
||||
wePutDetectUnicodeBOM = 0x00000200
|
||||
weStreamDestinationKindMask = 0x000000FF
|
||||
weStreamIncludeObjects = 0x00000100
|
||||
weGetAddUnicodeBOM = 0x00000200
|
||||
weGetLittleEndian = 0x00000400
|
||||
weTagFontFamily = FOUR_CHAR_CODE('font')
|
||||
weTagFontSize = FOUR_CHAR_CODE('ptsz')
|
||||
weTagPlain = FOUR_CHAR_CODE('plan')
|
||||
weTagBold = FOUR_CHAR_CODE('bold')
|
||||
weTagItalic = FOUR_CHAR_CODE('ital')
|
||||
weTagUnderline = FOUR_CHAR_CODE('undl')
|
||||
weTagOutline = FOUR_CHAR_CODE('outl')
|
||||
weTagShadow = FOUR_CHAR_CODE('shad')
|
||||
weTagCondensed = FOUR_CHAR_CODE('cond')
|
||||
weTagExtended = FOUR_CHAR_CODE('pexp')
|
||||
weTagStrikethrough = FOUR_CHAR_CODE('strk')
|
||||
weTagTextColor = FOUR_CHAR_CODE('colr')
|
||||
weTagBackgroundColor = FOUR_CHAR_CODE('pbcl')
|
||||
weTagTransferMode = FOUR_CHAR_CODE('pptm')
|
||||
weTagVerticalShift = FOUR_CHAR_CODE('xshf')
|
||||
weTagAlignment = FOUR_CHAR_CODE('pjst')
|
||||
weTagDirection = FOUR_CHAR_CODE('LDIR')
|
||||
weTagLineSpacing = FOUR_CHAR_CODE('ledg')
|
||||
weTagLeftIndent = FOUR_CHAR_CODE('lein')
|
||||
weTagRightIndent = FOUR_CHAR_CODE('riin')
|
||||
weTagFirstLineIndent = FOUR_CHAR_CODE('fidt')
|
||||
weTagSpaceBefore = FOUR_CHAR_CODE('spbe')
|
||||
weTagSpaceAfter = FOUR_CHAR_CODE('spaf')
|
||||
weTagBottomBorderStyle = FOUR_CHAR_CODE('BBRD')
|
||||
weTagForceFontFamily = FOUR_CHAR_CODE('ffnt')
|
||||
weTagAddFontSize = FOUR_CHAR_CODE('+siz')
|
||||
weTagAddVerticalShift = FOUR_CHAR_CODE('+shf')
|
||||
weTagTextEncoding = FOUR_CHAR_CODE('ptxe')
|
||||
weTagQDStyles = FOUR_CHAR_CODE('qdst')
|
||||
weTagTETextStyle = FOUR_CHAR_CODE('tets')
|
||||
weTagAlignmentDefault = FOUR_CHAR_CODE('deft')
|
||||
weTagAlignmentLeft = FOUR_CHAR_CODE('left')
|
||||
weTagAlignmentCenter = FOUR_CHAR_CODE('cent')
|
||||
weTagAlignmentRight = FOUR_CHAR_CODE('rght')
|
||||
weTagAlignmentFull = FOUR_CHAR_CODE('full')
|
||||
weTagDirectionDefault = FOUR_CHAR_CODE('deft')
|
||||
weTagDirectionLeftToRight = FOUR_CHAR_CODE('L->R')
|
||||
weTagDirectionRightToLeft = FOUR_CHAR_CODE('R->L')
|
||||
weTagBorderStyleNone = FOUR_CHAR_CODE('NONE')
|
||||
weTagBorderStyleThin = FOUR_CHAR_CODE('SLDL')
|
||||
weTagBorderStyleDotted = FOUR_CHAR_CODE('DTDL')
|
||||
weTagBorderStyleThick = FOUR_CHAR_CODE('THKL')
|
||||
weLineSpacingSingle = 0x00000000
|
||||
weLineSpacingOneAndHalf = 0x00008000
|
||||
weLineSpacingDouble = 0x00010000
|
||||
weCharByteHook = FOUR_CHAR_CODE('cbyt')
|
||||
weCharToPixelHook = FOUR_CHAR_CODE('c2p ')
|
||||
weCharTypeHook = FOUR_CHAR_CODE('ctyp')
|
||||
weClickLoop = FOUR_CHAR_CODE('clik')
|
||||
weCurrentDrag = FOUR_CHAR_CODE('drag')
|
||||
weDrawTextHook = FOUR_CHAR_CODE('draw')
|
||||
weDrawTSMHiliteHook = FOUR_CHAR_CODE('dtsm')
|
||||
weEraseHook = FOUR_CHAR_CODE('eras')
|
||||
weFontFamilyToNameHook = FOUR_CHAR_CODE('ff2n')
|
||||
weFontNameToFamilyHook = FOUR_CHAR_CODE('fn2f')
|
||||
weFluxProc = FOUR_CHAR_CODE('flux')
|
||||
weHiliteDropAreaHook = FOUR_CHAR_CODE('hidr')
|
||||
weLineBreakHook = FOUR_CHAR_CODE('lbrk')
|
||||
wePixelToCharHook = FOUR_CHAR_CODE('p2c ')
|
||||
wePort = FOUR_CHAR_CODE('port')
|
||||
wePreTrackDragHook = FOUR_CHAR_CODE('ptrk')
|
||||
weRefCon = FOUR_CHAR_CODE('refc')
|
||||
weScrollProc = FOUR_CHAR_CODE('scrl')
|
||||
weText = FOUR_CHAR_CODE('text')
|
||||
weTranslateDragHook = FOUR_CHAR_CODE('xdrg')
|
||||
weTranslucencyThreshold = FOUR_CHAR_CODE('tluc')
|
||||
weTSMDocumentID = FOUR_CHAR_CODE('tsmd')
|
||||
weTSMPreUpdate = FOUR_CHAR_CODE('pre ')
|
||||
weTSMPostUpdate = FOUR_CHAR_CODE('post')
|
||||
weURLHint = FOUR_CHAR_CODE('urlh')
|
||||
weWordBreakHook = FOUR_CHAR_CODE('wbrk')
|
||||
weNewHandler = FOUR_CHAR_CODE('new ')
|
||||
weDisposeHandler = FOUR_CHAR_CODE('free')
|
||||
weDrawHandler = FOUR_CHAR_CODE('draw')
|
||||
weClickHandler = FOUR_CHAR_CODE('clik')
|
||||
weStreamHandler = FOUR_CHAR_CODE('strm')
|
||||
weHoverHandler = FOUR_CHAR_CODE('hovr')
|
||||
kTypeText = FOUR_CHAR_CODE('TEXT')
|
||||
kTypeStyles = FOUR_CHAR_CODE('styl')
|
||||
kTypeSoup = FOUR_CHAR_CODE('SOUP')
|
||||
kTypeFontTable = FOUR_CHAR_CODE('FISH')
|
||||
kTypeParaFormat = FOUR_CHAR_CODE('WEpf')
|
||||
kTypeRulerScrap = FOUR_CHAR_CODE('WEru')
|
||||
kTypeCharFormat = FOUR_CHAR_CODE('WEcf')
|
||||
kTypeStyleScrap = FOUR_CHAR_CODE('WEst')
|
||||
kTypeUnicodeText = FOUR_CHAR_CODE('utxt')
|
||||
kTypeUTF8Text = FOUR_CHAR_CODE('UTF8')
|
||||
kTypeStyledText = FOUR_CHAR_CODE('STXT')
|
||||
weAKNone = 0
|
||||
weAKUnspecified = 1
|
||||
weAKTyping = 2
|
||||
weAKCut = 3
|
||||
weAKPaste = 4
|
||||
weAKClear = 5
|
||||
weAKDrag = 6
|
||||
weAKSetStyle = 7
|
||||
weAKSetRuler = 8
|
||||
weAKBackspace = 9
|
||||
weAKFwdDelete = 10
|
||||
weAKCaseChange = 11
|
||||
weAKObjectChange = 12
|
||||
weToScrap = 0
|
||||
weToDrag = 1
|
||||
weToSoup = 2
|
||||
weMouseEnter = 0
|
||||
weMouseWithin = 1
|
||||
weMouseLeave = 2
|
||||
kCurrentSelection = -1
|
||||
kNullStyle = -2
|
|
@ -262,7 +262,7 @@ class ProgressBar:
|
|||
self.w.ShowWindow()
|
||||
self.d.DrawDialog()
|
||||
|
||||
def __del__( self ):
|
||||
def __del__(self):
|
||||
if self.w:
|
||||
self.w.BringToFront()
|
||||
self.w.HideWindow()
|
||||
|
@ -274,7 +274,7 @@ class ProgressBar:
|
|||
self.w.BringToFront()
|
||||
self.w.SetWTitle(newstr)
|
||||
|
||||
def label( self, *newstr ):
|
||||
def label(self, *newstr):
|
||||
"""label(text) - Set text in progress box"""
|
||||
self.w.BringToFront()
|
||||
if newstr:
|
||||
|
|
|
@ -1,207 +0,0 @@
|
|||
# Generated from 'WASTE.h'
|
||||
|
||||
kPascalStackBased = None # workaround for header parsing
|
||||
def FOUR_CHAR_CODE(x): return x
|
||||
weCantUndoErr = -10015
|
||||
weEmptySelectionErr = -10013
|
||||
weUnknownObjectTypeErr = -9478
|
||||
weObjectNotFoundErr = -9477
|
||||
weReadOnlyErr = -9476
|
||||
weTextNotFoundErr = -9474
|
||||
weInvalidTextEncodingErr = -9473
|
||||
weDuplicateAttributeErr = -9472
|
||||
weInvalidAttributeSizeErr = -9471
|
||||
weReadOnlyAttributeErr = -9470
|
||||
weOddByteCountErr = -9469
|
||||
weHandlerNotFoundErr = -1717
|
||||
weNotHandledErr = -1708
|
||||
weNewerVersionErr = -1706
|
||||
weCorruptDataErr = -1702
|
||||
weProtocolErr = -603
|
||||
weUndefinedSelectorErr = -50
|
||||
weFlushLeft = -2
|
||||
weFlushRight = -1
|
||||
weFlushDefault = 0
|
||||
weCenter = 1
|
||||
weJustify = 2
|
||||
weDirDefault = 1
|
||||
weDirRightToLeft = -1
|
||||
weDirLeftToRight = 0
|
||||
weDoFont = 0x0001
|
||||
weDoFace = 0x0002
|
||||
weDoSize = 0x0004
|
||||
weDoColor = 0x0008
|
||||
weDoAll = weDoFont | weDoFace | weDoSize | weDoColor
|
||||
weDoAddSize = 0x0010
|
||||
weDoToggleFace = 0x0020
|
||||
weDoReplaceFace = 0x0040
|
||||
weDoPreserveScript = 0x0080
|
||||
weDoExtractSubscript = 0x0100
|
||||
weDoFaceMask = 0x0200
|
||||
weDoDirection = 0x00000001
|
||||
weDoAlignment = 0x00000002
|
||||
weDoLeftIndent = 0x00000004
|
||||
weDoRightIndent = 0x00000008
|
||||
weDoFirstLineIndent = 0x00000010
|
||||
weDoLineSpacing = 0x00000020
|
||||
weDoSpaceBefore = 0x00000040
|
||||
weDoSpaceAfter = 0x00000080
|
||||
weDoBottomBorderStyle = 0x00000400
|
||||
kLeadingEdge = -1
|
||||
kTrailingEdge = 0
|
||||
kObjectEdge = 2
|
||||
weFAutoScroll = 0
|
||||
weFOutlineHilite = 2
|
||||
weFReadOnly = 5
|
||||
weFUndo = 6
|
||||
weFIntCutAndPaste = 7
|
||||
weFDragAndDrop = 8
|
||||
weFInhibitRecal = 9
|
||||
weFUseTempMem = 10
|
||||
weFDrawOffscreen = 11
|
||||
weFInhibitRedraw = 12
|
||||
weFMonoStyled = 13
|
||||
weFMultipleUndo = 14
|
||||
weFNoKeyboardSync = 29
|
||||
weFInhibitICSupport = 30
|
||||
weFInhibitColor = 31
|
||||
weDoAutoScroll = 1 << weFAutoScroll
|
||||
weDoOutlineHilite = 1 << weFOutlineHilite
|
||||
weDoReadOnly = 1 << weFReadOnly
|
||||
weDoUndo = 1 << weFUndo
|
||||
weDoIntCutAndPaste = 1 << weFIntCutAndPaste
|
||||
weDoDragAndDrop = 1 << weFDragAndDrop
|
||||
weDoInhibitRecal = 1 << weFInhibitRecal
|
||||
weDoUseTempMem = 1 << weFUseTempMem
|
||||
weDoDrawOffscreen = 1 << weFDrawOffscreen
|
||||
weDoInhibitRedraw = 1 << weFInhibitRedraw
|
||||
weDoMonoStyled = 1 << weFMonoStyled
|
||||
weDoMultipleUndo = 1 << weFMultipleUndo
|
||||
weDoNoKeyboardSync = 1 << weFNoKeyboardSync
|
||||
weDoInhibitICSupport = 1 << weFInhibitICSupport
|
||||
# weDoInhibitColor = 1 << weFInhibitColor
|
||||
weBitToggle = -2
|
||||
weBitTest = -1
|
||||
weBitClear = 0
|
||||
weBitSet = 1
|
||||
weLowerCase = 0
|
||||
weUpperCase = 1
|
||||
weFindWholeWords = 0x00000001
|
||||
weFindCaseInsensitive = 0x00000002
|
||||
weFindDiacriticalInsensitive = 0x00000004
|
||||
wePutIntCutAndPaste = 0x00000001
|
||||
wePutAddToTypingSequence = 0x00000002
|
||||
wePutDetectUnicodeBOM = 0x00000200
|
||||
weStreamDestinationKindMask = 0x000000FF
|
||||
weStreamIncludeObjects = 0x00000100
|
||||
weGetAddUnicodeBOM = 0x00000200
|
||||
weGetLittleEndian = 0x00000400
|
||||
weTagFontFamily = FOUR_CHAR_CODE('font')
|
||||
weTagFontSize = FOUR_CHAR_CODE('ptsz')
|
||||
weTagPlain = FOUR_CHAR_CODE('plan')
|
||||
weTagBold = FOUR_CHAR_CODE('bold')
|
||||
weTagItalic = FOUR_CHAR_CODE('ital')
|
||||
weTagUnderline = FOUR_CHAR_CODE('undl')
|
||||
weTagOutline = FOUR_CHAR_CODE('outl')
|
||||
weTagShadow = FOUR_CHAR_CODE('shad')
|
||||
weTagCondensed = FOUR_CHAR_CODE('cond')
|
||||
weTagExtended = FOUR_CHAR_CODE('pexp')
|
||||
weTagStrikethrough = FOUR_CHAR_CODE('strk')
|
||||
weTagTextColor = FOUR_CHAR_CODE('colr')
|
||||
weTagBackgroundColor = FOUR_CHAR_CODE('pbcl')
|
||||
weTagTransferMode = FOUR_CHAR_CODE('pptm')
|
||||
weTagVerticalShift = FOUR_CHAR_CODE('xshf')
|
||||
weTagAlignment = FOUR_CHAR_CODE('pjst')
|
||||
weTagDirection = FOUR_CHAR_CODE('LDIR')
|
||||
weTagLineSpacing = FOUR_CHAR_CODE('ledg')
|
||||
weTagLeftIndent = FOUR_CHAR_CODE('lein')
|
||||
weTagRightIndent = FOUR_CHAR_CODE('riin')
|
||||
weTagFirstLineIndent = FOUR_CHAR_CODE('fidt')
|
||||
weTagSpaceBefore = FOUR_CHAR_CODE('spbe')
|
||||
weTagSpaceAfter = FOUR_CHAR_CODE('spaf')
|
||||
weTagBottomBorderStyle = FOUR_CHAR_CODE('BBRD')
|
||||
weTagForceFontFamily = FOUR_CHAR_CODE('ffnt')
|
||||
weTagAddFontSize = FOUR_CHAR_CODE('+siz')
|
||||
weTagAddVerticalShift = FOUR_CHAR_CODE('+shf')
|
||||
weTagTextEncoding = FOUR_CHAR_CODE('ptxe')
|
||||
weTagQDStyles = FOUR_CHAR_CODE('qdst')
|
||||
weTagTETextStyle = FOUR_CHAR_CODE('tets')
|
||||
weTagAlignmentDefault = FOUR_CHAR_CODE('deft')
|
||||
weTagAlignmentLeft = FOUR_CHAR_CODE('left')
|
||||
weTagAlignmentCenter = FOUR_CHAR_CODE('cent')
|
||||
weTagAlignmentRight = FOUR_CHAR_CODE('rght')
|
||||
weTagAlignmentFull = FOUR_CHAR_CODE('full')
|
||||
weTagDirectionDefault = FOUR_CHAR_CODE('deft')
|
||||
weTagDirectionLeftToRight = FOUR_CHAR_CODE('L->R')
|
||||
weTagDirectionRightToLeft = FOUR_CHAR_CODE('R->L')
|
||||
weTagBorderStyleNone = FOUR_CHAR_CODE('NONE')
|
||||
weTagBorderStyleThin = FOUR_CHAR_CODE('SLDL')
|
||||
weTagBorderStyleDotted = FOUR_CHAR_CODE('DTDL')
|
||||
weTagBorderStyleThick = FOUR_CHAR_CODE('THKL')
|
||||
weLineSpacingSingle = 0x00000000
|
||||
weLineSpacingOneAndHalf = 0x00008000
|
||||
weLineSpacingDouble = 0x00010000
|
||||
weCharByteHook = FOUR_CHAR_CODE('cbyt')
|
||||
weCharToPixelHook = FOUR_CHAR_CODE('c2p ')
|
||||
weCharTypeHook = FOUR_CHAR_CODE('ctyp')
|
||||
weClickLoop = FOUR_CHAR_CODE('clik')
|
||||
weCurrentDrag = FOUR_CHAR_CODE('drag')
|
||||
weDrawTextHook = FOUR_CHAR_CODE('draw')
|
||||
weDrawTSMHiliteHook = FOUR_CHAR_CODE('dtsm')
|
||||
weEraseHook = FOUR_CHAR_CODE('eras')
|
||||
weFontFamilyToNameHook = FOUR_CHAR_CODE('ff2n')
|
||||
weFontNameToFamilyHook = FOUR_CHAR_CODE('fn2f')
|
||||
weFluxProc = FOUR_CHAR_CODE('flux')
|
||||
weHiliteDropAreaHook = FOUR_CHAR_CODE('hidr')
|
||||
weLineBreakHook = FOUR_CHAR_CODE('lbrk')
|
||||
wePixelToCharHook = FOUR_CHAR_CODE('p2c ')
|
||||
wePort = FOUR_CHAR_CODE('port')
|
||||
wePreTrackDragHook = FOUR_CHAR_CODE('ptrk')
|
||||
weRefCon = FOUR_CHAR_CODE('refc')
|
||||
weScrollProc = FOUR_CHAR_CODE('scrl')
|
||||
weText = FOUR_CHAR_CODE('text')
|
||||
weTranslateDragHook = FOUR_CHAR_CODE('xdrg')
|
||||
weTranslucencyThreshold = FOUR_CHAR_CODE('tluc')
|
||||
weTSMDocumentID = FOUR_CHAR_CODE('tsmd')
|
||||
weTSMPreUpdate = FOUR_CHAR_CODE('pre ')
|
||||
weTSMPostUpdate = FOUR_CHAR_CODE('post')
|
||||
weURLHint = FOUR_CHAR_CODE('urlh')
|
||||
weWordBreakHook = FOUR_CHAR_CODE('wbrk')
|
||||
weNewHandler = FOUR_CHAR_CODE('new ')
|
||||
weDisposeHandler = FOUR_CHAR_CODE('free')
|
||||
weDrawHandler = FOUR_CHAR_CODE('draw')
|
||||
weClickHandler = FOUR_CHAR_CODE('clik')
|
||||
weStreamHandler = FOUR_CHAR_CODE('strm')
|
||||
weHoverHandler = FOUR_CHAR_CODE('hovr')
|
||||
kTypeText = FOUR_CHAR_CODE('TEXT')
|
||||
kTypeStyles = FOUR_CHAR_CODE('styl')
|
||||
kTypeSoup = FOUR_CHAR_CODE('SOUP')
|
||||
kTypeFontTable = FOUR_CHAR_CODE('FISH')
|
||||
kTypeParaFormat = FOUR_CHAR_CODE('WEpf')
|
||||
kTypeRulerScrap = FOUR_CHAR_CODE('WEru')
|
||||
kTypeCharFormat = FOUR_CHAR_CODE('WEcf')
|
||||
kTypeStyleScrap = FOUR_CHAR_CODE('WEst')
|
||||
kTypeUnicodeText = FOUR_CHAR_CODE('utxt')
|
||||
kTypeUTF8Text = FOUR_CHAR_CODE('UTF8')
|
||||
kTypeStyledText = FOUR_CHAR_CODE('STXT')
|
||||
weAKNone = 0
|
||||
weAKUnspecified = 1
|
||||
weAKTyping = 2
|
||||
weAKCut = 3
|
||||
weAKPaste = 4
|
||||
weAKClear = 5
|
||||
weAKDrag = 6
|
||||
weAKSetStyle = 7
|
||||
weAKSetRuler = 8
|
||||
weAKBackspace = 9
|
||||
weAKFwdDelete = 10
|
||||
weAKCaseChange = 11
|
||||
weAKObjectChange = 12
|
||||
weToScrap = 0
|
||||
weToDrag = 1
|
||||
weToSoup = 2
|
||||
weMouseEnter = 0
|
||||
weMouseWithin = 1
|
||||
weMouseLeave = 2
|
||||
kCurrentSelection = -1
|
||||
kNullStyle = -2
|
|
@ -7,6 +7,7 @@ import traceback
|
|||
from Carbon import AE
|
||||
from Carbon.AppleEvents import *
|
||||
from Carbon import Evt
|
||||
from Carbon import File
|
||||
from Carbon.Events import *
|
||||
import aetools
|
||||
|
||||
|
@ -16,36 +17,36 @@ class ArgvCollector:
|
|||
|
||||
def __init__(self):
|
||||
self.quitting = 0
|
||||
self.ae_handlers = {}
|
||||
# Remove the funny -psn_xxx_xxx argument
|
||||
if len(sys.argv) > 1 and sys.argv[1][:4] == '-psn':
|
||||
del sys.argv[1]
|
||||
self.installaehandler('aevt', 'oapp', self.open_app)
|
||||
self.installaehandler('aevt', 'odoc', self.open_file)
|
||||
|
||||
def installaehandler(self, classe, type, callback):
|
||||
AE.AEInstallEventHandler(classe, type, self.callback_wrapper)
|
||||
self.ae_handlers[(classe, type)] = callback
|
||||
AE.AEInstallEventHandler(kCoreEventClass, kAEOpenApplication, self.__runapp)
|
||||
AE.AEInstallEventHandler(kCoreEventClass, kAEOpenDocuments, self.__openfiles)
|
||||
|
||||
def close(self):
|
||||
for classe, type in self.ae_handlers.keys():
|
||||
AE.AERemoveEventHandler(classe, type)
|
||||
AE.AERemoveEventHandler(kCoreEventClass, kAEOpenApplication)
|
||||
AE.AERemoveEventHandler(kCoreEventClass, kAEOpenDocuments)
|
||||
|
||||
def mainloop(self, mask = highLevelEventMask, timeout = 1*60):
|
||||
# Note: this is not the right way to run an event loop in OSX or even
|
||||
# "recent" versions of MacOS9. This is however code that has proven
|
||||
# itself.
|
||||
stoptime = Evt.TickCount() + timeout
|
||||
while not self.quitting and Evt.TickCount() < stoptime:
|
||||
self.dooneevent(mask, timeout)
|
||||
self._dooneevent(mask, timeout)
|
||||
|
||||
if not self.quitting:
|
||||
print "argvemulator: timeout waiting for arguments"
|
||||
|
||||
self.close()
|
||||
|
||||
def _quit(self):
|
||||
self.quitting = 1
|
||||
|
||||
def dooneevent(self, mask = highLevelEventMask, timeout = 1*60):
|
||||
def _dooneevent(self, mask = highLevelEventMask, timeout = 1*60):
|
||||
got, event = Evt.WaitNextEvent(mask, timeout)
|
||||
if got:
|
||||
self.lowlevelhandler(event)
|
||||
self._lowlevelhandler(event)
|
||||
|
||||
def lowlevelhandler(self, event):
|
||||
def _lowlevelhandler(self, event):
|
||||
what, message, when, where, modifiers = event
|
||||
h, v = where
|
||||
if what == kHighLevelEvent:
|
||||
|
@ -60,53 +61,28 @@ class ArgvCollector:
|
|||
else:
|
||||
print "Unhandled event:", event
|
||||
|
||||
def callback_wrapper(self, _request, _reply):
|
||||
_parameters, _attributes = aetools.unpackevent(_request)
|
||||
_class = _attributes['evcl'].type
|
||||
_type = _attributes['evid'].type
|
||||
|
||||
if self.ae_handlers.has_key((_class, _type)):
|
||||
_function = self.ae_handlers[(_class, _type)]
|
||||
elif self.ae_handlers.has_key((_class, '****')):
|
||||
_function = self.ae_handlers[(_class, '****')]
|
||||
elif self.ae_handlers.has_key(('****', '****')):
|
||||
_function = self.ae_handlers[('****', '****')]
|
||||
else:
|
||||
raise 'Cannot happen: AE callback without handler', (_class, _type)
|
||||
def _quit(self):
|
||||
self.quitting = 1
|
||||
|
||||
# XXXX Do key-to-name mapping here
|
||||
|
||||
_parameters['_attributes'] = _attributes
|
||||
_parameters['_class'] = _class
|
||||
_parameters['_type'] = _type
|
||||
if _parameters.has_key('----'):
|
||||
_object = _parameters['----']
|
||||
del _parameters['----']
|
||||
# The try/except that used to be here can mask programmer errors.
|
||||
# Let the program crash, the programmer can always add a **args
|
||||
# to the formal parameter list.
|
||||
rv = _function(_object, **_parameters)
|
||||
else:
|
||||
#Same try/except comment as above
|
||||
rv = _function(**_parameters)
|
||||
|
||||
if rv == None:
|
||||
aetools.packevent(_reply, {})
|
||||
else:
|
||||
aetools.packevent(_reply, {'----':rv})
|
||||
|
||||
def open_app(self, **args):
|
||||
def __runapp(self, requestevent, replyevent):
|
||||
self._quit()
|
||||
|
||||
def open_file(self, _object=None, **args):
|
||||
for alias in _object:
|
||||
fsr = alias.FSResolveAlias(None)[0]
|
||||
pathname = fsr.as_pathname()
|
||||
sys.argv.append(pathname)
|
||||
self._quit()
|
||||
def __openfiles(self, requestevent, replyevent):
|
||||
try:
|
||||
listdesc = requestevent.AEGetParamDesc(keyDirectObject, typeAEList)
|
||||
for i in range(listdesc.AECountItems()):
|
||||
aliasdesc = listdesc.AEGetNthDesc(i+1, typeAlias)[1]
|
||||
alias = File.Alias(rawdata=aliasdesc.data)
|
||||
fsref = alias.FSResolveAlias(None)[0]
|
||||
pathname = fsref.as_pathname()
|
||||
sys.argv.append(pathname)
|
||||
except Exception, e:
|
||||
print "argvemulator.py warning: can't unpack an open document event"
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def other(self, _object=None, _class=None, _type=None, **args):
|
||||
print 'Ignore AppleEvent', (_class, _type), 'for', _object, 'Other args:', args
|
||||
self._quit()
|
||||
|
||||
if __name__ == '__main__':
|
||||
ArgvCollector().mainloop()
|
||||
|
|
|
@ -150,7 +150,7 @@ class SSLFakeFile:
|
|||
|
||||
It only supports what is needed in smtplib.
|
||||
"""
|
||||
def __init__( self, sslobj):
|
||||
def __init__(self, sslobj):
|
||||
self.sslobj = sslobj
|
||||
|
||||
def readline(self):
|
||||
|
|
|
@ -141,7 +141,7 @@ class _socketobject(object):
|
|||
__doc__ = _realsocket.__doc__
|
||||
|
||||
__slots__ = ["_sock",
|
||||
"recv", "recv_buf", "recvfrom_buf",
|
||||
"recv", "recv_into", "recvfrom_into",
|
||||
"send", "sendto", "recvfrom",
|
||||
"__weakref__"]
|
||||
|
||||
|
@ -151,10 +151,10 @@ class _socketobject(object):
|
|||
self._sock = _sock
|
||||
self.send = self._sock.send
|
||||
self.recv = self._sock.recv
|
||||
self.recv_buf = self._sock.recv_buf
|
||||
self.recv_into = self._sock.recv_into
|
||||
self.sendto = self._sock.sendto
|
||||
self.recvfrom = self._sock.recvfrom
|
||||
self.recvfrom_buf = self._sock.recvfrom_buf
|
||||
self.recvfrom_into = self._sock.recvfrom_into
|
||||
|
||||
def close(self):
|
||||
self._sock = _closedsocket()
|
||||
|
|
|
@ -62,7 +62,7 @@ def pack(fmt, *args):
|
|||
o = _compile(fmt)
|
||||
return o.pack(*args)
|
||||
|
||||
def pack_to(fmt, buf, offset, *args):
|
||||
def pack_into(fmt, buf, offset, *args):
|
||||
"""
|
||||
Pack the values v2, v2, ... according to fmt, write
|
||||
the packed bytes into the writable buffer buf starting at offset.
|
||||
|
@ -72,7 +72,7 @@ def pack_to(fmt, buf, offset, *args):
|
|||
o = _cache[fmt]
|
||||
except KeyError:
|
||||
o = _compile(fmt)
|
||||
return o.pack_to(buf, offset, *args)
|
||||
return o.pack_into(buf, offset, *args)
|
||||
|
||||
def unpack(fmt, s):
|
||||
"""
|
||||
|
|
|
@ -388,6 +388,7 @@ if mswindows:
|
|||
hStdInput = None
|
||||
hStdOutput = None
|
||||
hStdError = None
|
||||
wShowWindow = 0
|
||||
class pywintypes:
|
||||
error = IOError
|
||||
else:
|
||||
|
@ -744,18 +745,17 @@ class Popen(object):
|
|||
args = list2cmdline(args)
|
||||
|
||||
# Process startup details
|
||||
default_startupinfo = STARTUPINFO()
|
||||
if startupinfo is None:
|
||||
startupinfo = default_startupinfo
|
||||
if not None in (p2cread, c2pwrite, errwrite):
|
||||
startupinfo = STARTUPINFO()
|
||||
if None not in (p2cread, c2pwrite, errwrite):
|
||||
startupinfo.dwFlags |= STARTF_USESTDHANDLES
|
||||
startupinfo.hStdInput = p2cread
|
||||
startupinfo.hStdOutput = c2pwrite
|
||||
startupinfo.hStdError = errwrite
|
||||
|
||||
if shell:
|
||||
default_startupinfo.dwFlags |= STARTF_USESHOWWINDOW
|
||||
default_startupinfo.wShowWindow = SW_HIDE
|
||||
startupinfo.dwFlags |= STARTF_USESHOWWINDOW
|
||||
startupinfo.wShowWindow = SW_HIDE
|
||||
comspec = os.environ.get("COMSPEC", "cmd.exe")
|
||||
args = comspec + " /c " + args
|
||||
if (GetVersion() >= 0x80000000L or
|
||||
|
|
|
@ -503,6 +503,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False):
|
|||
quiet -- if true, don't print 'skipped' messages (probably redundant)
|
||||
testdir -- test directory
|
||||
"""
|
||||
|
||||
test_support.unload(test)
|
||||
if not testdir:
|
||||
testdir = findtestdir()
|
||||
|
@ -512,11 +513,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False):
|
|||
cfp = None
|
||||
else:
|
||||
cfp = cStringIO.StringIO()
|
||||
if huntrleaks:
|
||||
if not hasattr(sys, 'gettotalrefcount'):
|
||||
raise Exception("Tracking reference leaks requires a debug build "
|
||||
"of Python")
|
||||
refrep = open(huntrleaks[2], "a")
|
||||
|
||||
try:
|
||||
save_stdout = sys.stdout
|
||||
try:
|
||||
|
@ -538,60 +535,7 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False):
|
|||
if indirect_test is not None:
|
||||
indirect_test()
|
||||
if huntrleaks:
|
||||
# This code *is* hackish and inelegant, yes.
|
||||
# But it seems to do the job.
|
||||
import copy_reg
|
||||
fs = warnings.filters[:]
|
||||
ps = copy_reg.dispatch_table.copy()
|
||||
pic = sys.path_importer_cache.copy()
|
||||
import gc
|
||||
def cleanup():
|
||||
import _strptime, linecache, warnings, dircache
|
||||
import urlparse, urllib, urllib2, mimetypes, doctest
|
||||
import struct
|
||||
from distutils.dir_util import _path_created
|
||||
_path_created.clear()
|
||||
warnings.filters[:] = fs
|
||||
gc.collect()
|
||||
re.purge()
|
||||
_strptime._regex_cache.clear()
|
||||
urlparse.clear_cache()
|
||||
urllib.urlcleanup()
|
||||
urllib2.install_opener(None)
|
||||
copy_reg.dispatch_table.clear()
|
||||
copy_reg.dispatch_table.update(ps)
|
||||
sys.path_importer_cache.clear()
|
||||
sys.path_importer_cache.update(pic)
|
||||
dircache.reset()
|
||||
linecache.clearcache()
|
||||
mimetypes._default_mime_types()
|
||||
struct._cache.clear()
|
||||
doctest.master = None
|
||||
if indirect_test:
|
||||
def run_the_test():
|
||||
indirect_test()
|
||||
else:
|
||||
def run_the_test():
|
||||
reload(the_module)
|
||||
deltas = []
|
||||
repcount = huntrleaks[0] + huntrleaks[1]
|
||||
print >> sys.stderr, "beginning", repcount, "repetitions"
|
||||
print >> sys.stderr, \
|
||||
("1234567890"*(repcount//10 + 1))[:repcount]
|
||||
cleanup()
|
||||
for i in range(repcount):
|
||||
rc = sys.gettotalrefcount()
|
||||
run_the_test()
|
||||
sys.stderr.write('.')
|
||||
cleanup()
|
||||
deltas.append(sys.gettotalrefcount() - rc - 2)
|
||||
print >>sys.stderr
|
||||
if max(map(abs, deltas[-huntrleaks[1]:])) > 0:
|
||||
print >>sys.stderr, test, 'leaked', \
|
||||
deltas[-huntrleaks[1]:], 'references'
|
||||
print >>refrep, test, 'leaked', \
|
||||
deltas[-huntrleaks[1]:], 'references'
|
||||
# The end of the huntrleaks hackishness.
|
||||
dash_R(the_module, test, indirect_test, huntrleaks)
|
||||
finally:
|
||||
sys.stdout = save_stdout
|
||||
except test_support.ResourceDenied, msg:
|
||||
|
@ -651,6 +595,77 @@ def runtest(test, generate, verbose, quiet, testdir=None, huntrleaks=False):
|
|||
sys.stdout.flush()
|
||||
return 0
|
||||
|
||||
def dash_R(the_module, test, indirect_test, huntrleaks):
|
||||
# This code is hackish and inelegant, but it seems to do the job.
|
||||
import copy_reg
|
||||
|
||||
if not hasattr(sys, 'gettotalrefcount'):
|
||||
raise Exception("Tracking reference leaks requires a debug build "
|
||||
"of Python")
|
||||
|
||||
# Save current values for dash_R_cleanup() to restore.
|
||||
fs = warnings.filters[:]
|
||||
ps = copy_reg.dispatch_table.copy()
|
||||
pic = sys.path_importer_cache.copy()
|
||||
|
||||
if indirect_test:
|
||||
def run_the_test():
|
||||
indirect_test()
|
||||
else:
|
||||
def run_the_test():
|
||||
reload(the_module)
|
||||
|
||||
deltas = []
|
||||
nwarmup, ntracked, fname = huntrleaks
|
||||
repcount = nwarmup + ntracked
|
||||
print >> sys.stderr, "beginning", repcount, "repetitions"
|
||||
print >> sys.stderr, ("1234567890"*(repcount//10 + 1))[:repcount]
|
||||
dash_R_cleanup(fs, ps, pic)
|
||||
for i in range(repcount):
|
||||
rc = sys.gettotalrefcount()
|
||||
run_the_test()
|
||||
sys.stderr.write('.')
|
||||
dash_R_cleanup(fs, ps, pic)
|
||||
if i >= nwarmup:
|
||||
deltas.append(sys.gettotalrefcount() - rc - 2)
|
||||
print >> sys.stderr
|
||||
if any(deltas):
|
||||
print >> sys.stderr, test, 'leaked', deltas, 'references'
|
||||
refrep = open(fname, "a")
|
||||
print >> refrep, test, 'leaked', deltas, 'references'
|
||||
refrep.close()
|
||||
|
||||
def dash_R_cleanup(fs, ps, pic):
|
||||
import gc, copy_reg
|
||||
import _strptime, linecache, warnings, dircache
|
||||
import urlparse, urllib, urllib2, mimetypes, doctest
|
||||
import struct, filecmp
|
||||
from distutils.dir_util import _path_created
|
||||
|
||||
# Restore some original values.
|
||||
warnings.filters[:] = fs
|
||||
copy_reg.dispatch_table.clear()
|
||||
copy_reg.dispatch_table.update(ps)
|
||||
sys.path_importer_cache.clear()
|
||||
sys.path_importer_cache.update(pic)
|
||||
|
||||
# Clear assorted module caches.
|
||||
_path_created.clear()
|
||||
re.purge()
|
||||
_strptime._regex_cache.clear()
|
||||
urlparse.clear_cache()
|
||||
urllib.urlcleanup()
|
||||
urllib2.install_opener(None)
|
||||
dircache.reset()
|
||||
linecache.clearcache()
|
||||
mimetypes._default_mime_types()
|
||||
struct._cache.clear()
|
||||
filecmp._cache.clear()
|
||||
doctest.master = None
|
||||
|
||||
# Collect cyclic trash.
|
||||
gc.collect()
|
||||
|
||||
def reportdiff(expected, output):
|
||||
import difflib
|
||||
print "*" * 70
|
||||
|
|
|
@ -44,6 +44,8 @@ def suite():
|
|||
'test_queue',
|
||||
'test_recno',
|
||||
'test_thread',
|
||||
'test_sequence',
|
||||
'test_cursor_pget_bug',
|
||||
]
|
||||
|
||||
alltests = unittest.TestSuite()
|
||||
|
|
|
@ -336,7 +336,7 @@ class BuiltinTest(unittest.TestCase):
|
|||
_cells = {}
|
||||
def __setitem__(self, key, formula):
|
||||
self._cells[key] = formula
|
||||
def __getitem__(self, key ):
|
||||
def __getitem__(self, key):
|
||||
return eval(self._cells[key], globals(), self)
|
||||
|
||||
ss = SpreadSheet()
|
||||
|
|
|
@ -363,3 +363,37 @@ except AttributeError, x:
|
|||
pass
|
||||
else:
|
||||
print "attribute error for I.__init__ got masked"
|
||||
|
||||
|
||||
# Test comparison and hash of methods
|
||||
class A:
|
||||
def __init__(self, x):
|
||||
self.x = x
|
||||
def f(self):
|
||||
pass
|
||||
def g(self):
|
||||
pass
|
||||
def __eq__(self, other):
|
||||
return self.x == other.x
|
||||
def __hash__(self):
|
||||
return self.x
|
||||
class B(A):
|
||||
pass
|
||||
|
||||
a1 = A(1)
|
||||
a2 = A(2)
|
||||
assert a1.f == a1.f
|
||||
assert a1.f != a2.f
|
||||
assert a1.f != a1.g
|
||||
assert a1.f == A(1).f
|
||||
assert hash(a1.f) == hash(a1.f)
|
||||
assert hash(a1.f) == hash(A(1).f)
|
||||
|
||||
assert A.f != a1.f
|
||||
assert A.f != A.g
|
||||
assert B.f == A.f
|
||||
assert hash(B.f) == hash(A.f)
|
||||
|
||||
# the following triggers a SystemError in 2.4
|
||||
a = A(hash(A.f.im_func)^(-1))
|
||||
hash(a.f)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue