AppPkg/Applications/Python/Python-2.7.10: Initial Checkin part 4/5.

The Lib directory from the cPython 2.7.10 distribution.
These files are unchanged and set the baseline for subsequent commits.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Daryl McDaniel <edk2-lists@mc2research.org>


git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18740 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
Daryl McDaniel
2015-11-07 19:33:58 +00:00
committed by darylm503
parent 53b2ba5790
commit 3257aa9932
239 changed files with 74975 additions and 0 deletions

View File

@ -0,0 +1,603 @@
"""HTTP server base class.
Note: the class in this module doesn't implement any HTTP request; see
SimpleHTTPServer for simple implementations of GET, HEAD and POST
(including CGI scripts). It does, however, optionally implement HTTP/1.1
persistent connections, as of version 0.3.
Contents:
- BaseHTTPRequestHandler: HTTP request handler base class
- test: test function
XXX To do:
- log requests even later (to capture byte count)
- log user-agent header and other interesting goodies
- send error log to separate file
"""
# See also:
#
# HTTP Working Group T. Berners-Lee
# INTERNET-DRAFT R. T. Fielding
# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
# Expires September 8, 1995 March 8, 1995
#
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
#
# and
#
# Network Working Group R. Fielding
# Request for Comments: 2616 et al
# Obsoletes: 2068 June 1999
# Category: Standards Track
#
# URL: http://www.faqs.org/rfcs/rfc2616.html
# Log files
# ---------
#
# Here's a quote from the NCSA httpd docs about log file format.
#
# | The logfile format is as follows. Each line consists of:
# |
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
# |
# | host: Either the DNS name or the IP number of the remote client
# | rfc931: Any information returned by identd for this person,
# | - otherwise.
# | authuser: If user sent a userid for authentication, the user name,
# | - otherwise.
# | DD: Day
# | Mon: Month (calendar name)
# | YYYY: Year
# | hh: hour (24-hour format, the machine's timezone)
# | mm: minutes
# | ss: seconds
# | request: The first line of the HTTP request as sent by the client.
# | ddd: the status code returned by the server, - if not available.
# | bbbb: the total number of bytes sent,
# | *not including the HTTP/1.0 header*, - if not available
# |
# | You can determine the name of the file accessed through request.
#
# (Actually, the latter is only true if you know the server configuration
# at the time the request was made!)
__version__ = "0.3"
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
import sys
import time
import socket # For gethostbyaddr()
from warnings import filterwarnings, catch_warnings
with catch_warnings():
if sys.py3kwarning:
filterwarnings("ignore", ".*mimetools has been removed",
DeprecationWarning)
import mimetools
import SocketServer
# Default error message template
DEFAULT_ERROR_MESSAGE = """\
<head>
<title>Error response</title>
</head>
<body>
<h1>Error response</h1>
<p>Error code %(code)d.
<p>Message: %(message)s.
<p>Error code explanation: %(code)s = %(explain)s.
</body>
"""
DEFAULT_ERROR_CONTENT_TYPE = "text/html"
def _quote_html(html):
return html.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
class HTTPServer(SocketServer.TCPServer):
allow_reuse_address = 1 # Seems to make sense in testing environment
def server_bind(self):
"""Override server_bind to store the server name."""
SocketServer.TCPServer.server_bind(self)
host, port = self.socket.getsockname()[:2]
self.server_name = socket.getfqdn(host)
self.server_port = port
class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler):
"""HTTP request handler base class.
The following explanation of HTTP serves to guide you through the
code as well as to expose any misunderstandings I may have about
HTTP (so you don't need to read the code to figure out I'm wrong
:-).
HTTP (HyperText Transfer Protocol) is an extensible protocol on
top of a reliable stream transport (e.g. TCP/IP). The protocol
recognizes three parts to a request:
1. One line identifying the request type and path
2. An optional set of RFC-822-style headers
3. An optional data part
The headers and data are separated by a blank line.
The first line of the request has the form
<command> <path> <version>
where <command> is a (case-sensitive) keyword such as GET or POST,
<path> is a string containing path information for the request,
and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
<path> is encoded using the URL encoding scheme (using %xx to signify
the ASCII character with hex code xx).
The specification specifies that lines are separated by CRLF but
for compatibility with the widest range of clients recommends
servers also handle LF. Similarly, whitespace in the request line
is treated sensibly (allowing multiple spaces between components
and allowing trailing whitespace).
Similarly, for output, lines ought to be separated by CRLF pairs
but most clients grok LF characters just fine.
If the first line of the request has the form
<command> <path>
(i.e. <version> is left out) then this is assumed to be an HTTP
0.9 request; this form has no optional headers and data part and
the reply consists of just the data.
The reply form of the HTTP 1.x protocol again has three parts:
1. One line giving the response code
2. An optional set of RFC-822-style headers
3. The data
Again, the headers and data are separated by a blank line.
The response code line has the form
<version> <responsecode> <responsestring>
where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
<responsecode> is a 3-digit response code indicating success or
failure of the request, and <responsestring> is an optional
human-readable string explaining what the response code means.
This server parses the request and the headers, and then calls a
function specific to the request type (<command>). Specifically,
a request SPAM will be handled by a method do_SPAM(). If no
such method exists the server sends an error response to the
client. If it exists, it is called with no arguments:
do_SPAM()
Note that the request name is case sensitive (i.e. SPAM and spam
are different requests).
The various request details are stored in instance variables:
- client_address is the client IP address in the form (host,
port);
- command, path and version are the broken-down request line;
- headers is an instance of mimetools.Message (or a derived
class) containing the header information;
- rfile is a file object open for reading positioned at the
start of the optional input data part;
- wfile is a file object open for writing.
IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
The first thing to be written must be the response line. Then
follow 0 or more header lines, then a blank line, and then the
actual data (if any). The meaning of the header lines depends on
the command executed by the server; in most cases, when data is
returned, there should be at least one header line of the form
Content-type: <type>/<subtype>
where <type> and <subtype> should be registered MIME types,
e.g. "text/html" or "text/plain".
"""
# The Python system version, truncated to its first component.
sys_version = "Python/" + sys.version.split()[0]
# The server software version. You may want to override this.
# The format is multiple whitespace-separated strings,
# where each string is of the form name[/version].
server_version = "BaseHTTP/" + __version__
# The default request version. This only affects responses up until
# the point where the request line is parsed, so it mainly decides what
# the client gets back when sending a malformed request line.
# Most web servers default to HTTP 0.9, i.e. don't send a status line.
default_request_version = "HTTP/0.9"
def parse_request(self):
"""Parse a request (internal).
The request should be stored in self.raw_requestline; the results
are in self.command, self.path, self.request_version and
self.headers.
Return True for success, False for failure; on failure, an
error is sent back.
"""
self.command = None # set in case of error on the first line
self.request_version = version = self.default_request_version
self.close_connection = 1
requestline = self.raw_requestline
requestline = requestline.rstrip('\r\n')
self.requestline = requestline
words = requestline.split()
if len(words) == 3:
command, path, version = words
if version[:5] != 'HTTP/':
self.send_error(400, "Bad request version (%r)" % version)
return False
try:
base_version_number = version.split('/', 1)[1]
version_number = base_version_number.split(".")
# RFC 2145 section 3.1 says there can be only one "." and
# - major and minor numbers MUST be treated as
# separate integers;
# - HTTP/2.4 is a lower version than HTTP/2.13, which in
# turn is lower than HTTP/12.3;
# - Leading zeros MUST be ignored by recipients.
if len(version_number) != 2:
raise ValueError
version_number = int(version_number[0]), int(version_number[1])
except (ValueError, IndexError):
self.send_error(400, "Bad request version (%r)" % version)
return False
if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
self.close_connection = 0
if version_number >= (2, 0):
self.send_error(505,
"Invalid HTTP Version (%s)" % base_version_number)
return False
elif len(words) == 2:
command, path = words
self.close_connection = 1
if command != 'GET':
self.send_error(400,
"Bad HTTP/0.9 request type (%r)" % command)
return False
elif not words:
return False
else:
self.send_error(400, "Bad request syntax (%r)" % requestline)
return False
self.command, self.path, self.request_version = command, path, version
# Examine the headers and look for a Connection directive
self.headers = self.MessageClass(self.rfile, 0)
conntype = self.headers.get('Connection', "")
if conntype.lower() == 'close':
self.close_connection = 1
elif (conntype.lower() == 'keep-alive' and
self.protocol_version >= "HTTP/1.1"):
self.close_connection = 0
return True
def handle_one_request(self):
"""Handle a single HTTP request.
You normally don't need to override this method; see the class
__doc__ string for information on how to handle specific HTTP
commands such as GET and POST.
"""
try:
self.raw_requestline = self.rfile.readline(65537)
if len(self.raw_requestline) > 65536:
self.requestline = ''
self.request_version = ''
self.command = ''
self.send_error(414)
return
if not self.raw_requestline:
self.close_connection = 1
return
if not self.parse_request():
# An error code has been sent, just exit
return
mname = 'do_' + self.command
if not hasattr(self, mname):
self.send_error(501, "Unsupported method (%r)" % self.command)
return
method = getattr(self, mname)
method()
self.wfile.flush() #actually send the response if not already done.
except socket.timeout, e:
#a read or a write timed out. Discard this connection
self.log_error("Request timed out: %r", e)
self.close_connection = 1
return
def handle(self):
"""Handle multiple requests if necessary."""
self.close_connection = 1
self.handle_one_request()
while not self.close_connection:
self.handle_one_request()
def send_error(self, code, message=None):
"""Send and log an error reply.
Arguments are the error code, and a detailed message.
The detailed message defaults to the short entry matching the
response code.
This sends an error response (so it must be called before any
output has been generated), logs the error, and finally sends
a piece of HTML explaining the error to the user.
"""
try:
short, long = self.responses[code]
except KeyError:
short, long = '???', '???'
if message is None:
message = short
explain = long
self.log_error("code %d, message %s", code, message)
# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201)
content = (self.error_message_format %
{'code': code, 'message': _quote_html(message), 'explain': explain})
self.send_response(code, message)
self.send_header("Content-Type", self.error_content_type)
self.send_header('Connection', 'close')
self.end_headers()
if self.command != 'HEAD' and code >= 200 and code not in (204, 304):
self.wfile.write(content)
error_message_format = DEFAULT_ERROR_MESSAGE
error_content_type = DEFAULT_ERROR_CONTENT_TYPE
def send_response(self, code, message=None):
"""Send the response header and log the response code.
Also send two standard headers with the server software
version and the current date.
"""
self.log_request(code)
if message is None:
if code in self.responses:
message = self.responses[code][0]
else:
message = ''
if self.request_version != 'HTTP/0.9':
self.wfile.write("%s %d %s\r\n" %
(self.protocol_version, code, message))
# print (self.protocol_version, code, message)
self.send_header('Server', self.version_string())
self.send_header('Date', self.date_time_string())
def send_header(self, keyword, value):
"""Send a MIME header."""
if self.request_version != 'HTTP/0.9':
self.wfile.write("%s: %s\r\n" % (keyword, value))
if keyword.lower() == 'connection':
if value.lower() == 'close':
self.close_connection = 1
elif value.lower() == 'keep-alive':
self.close_connection = 0
def end_headers(self):
"""Send the blank line ending the MIME headers."""
if self.request_version != 'HTTP/0.9':
self.wfile.write("\r\n")
def log_request(self, code='-', size='-'):
"""Log an accepted request.
This is called by send_response().
"""
self.log_message('"%s" %s %s',
self.requestline, str(code), str(size))
def log_error(self, format, *args):
"""Log an error.
This is called when a request cannot be fulfilled. By
default it passes the message on to log_message().
Arguments are the same as for log_message().
XXX This should go to the separate error log.
"""
self.log_message(format, *args)
def log_message(self, format, *args):
"""Log an arbitrary message.
This is used by all other logging functions. Override
it if you have specific logging wishes.
The first argument, FORMAT, is a format string for the
message to be logged. If the format string contains
any % escapes requiring parameters, they should be
specified as subsequent arguments (it's just like
printf!).
The client ip address and current date/time are prefixed to every
message.
"""
sys.stderr.write("%s - - [%s] %s\n" %
(self.client_address[0],
self.log_date_time_string(),
format%args))
def version_string(self):
"""Return the server software version string."""
return self.server_version + ' ' + self.sys_version
def date_time_string(self, timestamp=None):
"""Return the current date and time formatted for a message header."""
if timestamp is None:
timestamp = time.time()
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
self.weekdayname[wd],
day, self.monthname[month], year,
hh, mm, ss)
return s
def log_date_time_string(self):
"""Return the current time formatted for logging."""
now = time.time()
year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
s = "%02d/%3s/%04d %02d:%02d:%02d" % (
day, self.monthname[month], year, hh, mm, ss)
return s
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
monthname = [None,
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def address_string(self):
"""Return the client address formatted for logging.
This version looks up the full hostname using gethostbyaddr(),
and tries to find a name that contains at least one dot.
"""
host, port = self.client_address[:2]
return socket.getfqdn(host)
# Essentially static class variables
# The version of the HTTP protocol we support.
# Set this to HTTP/1.1 to enable automatic keepalive
protocol_version = "HTTP/1.0"
# The Message-like class used to parse headers
MessageClass = mimetools.Message
# Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}.
# See RFC 2616.
responses = {
100: ('Continue', 'Request received, please continue'),
101: ('Switching Protocols',
'Switching to new protocol; obey Upgrade header'),
200: ('OK', 'Request fulfilled, document follows'),
201: ('Created', 'Document created, URL follows'),
202: ('Accepted',
'Request accepted, processing continues off-line'),
203: ('Non-Authoritative Information', 'Request fulfilled from cache'),
204: ('No Content', 'Request fulfilled, nothing follows'),
205: ('Reset Content', 'Clear input form for further input.'),
206: ('Partial Content', 'Partial content follows.'),
300: ('Multiple Choices',
'Object has several resources -- see URI list'),
301: ('Moved Permanently', 'Object moved permanently -- see URI list'),
302: ('Found', 'Object moved temporarily -- see URI list'),
303: ('See Other', 'Object moved -- see Method and URL list'),
304: ('Not Modified',
'Document has not changed since given time'),
305: ('Use Proxy',
'You must use proxy specified in Location to access this '
'resource.'),
307: ('Temporary Redirect',
'Object moved temporarily -- see URI list'),
400: ('Bad Request',
'Bad request syntax or unsupported method'),
401: ('Unauthorized',
'No permission -- see authorization schemes'),
402: ('Payment Required',
'No payment -- see charging schemes'),
403: ('Forbidden',
'Request forbidden -- authorization will not help'),
404: ('Not Found', 'Nothing matches the given URI'),
405: ('Method Not Allowed',
'Specified method is invalid for this resource.'),
406: ('Not Acceptable', 'URI not available in preferred format.'),
407: ('Proxy Authentication Required', 'You must authenticate with '
'this proxy before proceeding.'),
408: ('Request Timeout', 'Request timed out; try again later.'),
409: ('Conflict', 'Request conflict.'),
410: ('Gone',
'URI no longer exists and has been permanently removed.'),
411: ('Length Required', 'Client must specify Content-Length.'),
412: ('Precondition Failed', 'Precondition in headers is false.'),
413: ('Request Entity Too Large', 'Entity is too large.'),
414: ('Request-URI Too Long', 'URI is too long.'),
415: ('Unsupported Media Type', 'Entity body in unsupported format.'),
416: ('Requested Range Not Satisfiable',
'Cannot satisfy request range.'),
417: ('Expectation Failed',
'Expect condition could not be satisfied.'),
500: ('Internal Server Error', 'Server got itself in trouble'),
501: ('Not Implemented',
'Server does not support this operation'),
502: ('Bad Gateway', 'Invalid responses from another server/proxy.'),
503: ('Service Unavailable',
'The server cannot process the request due to a high load'),
504: ('Gateway Timeout',
'The gateway server did not receive a timely response'),
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
}
def test(HandlerClass = BaseHTTPRequestHandler,
ServerClass = HTTPServer, protocol="HTTP/1.0"):
"""Test the HTTP request handler class.
This runs an HTTP server on port 8000 (or the first command line
argument).
"""
if sys.argv[1:]:
port = int(sys.argv[1])
else:
port = 8000
server_address = ('', port)
HandlerClass.protocol_version = protocol
httpd = ServerClass(server_address, HandlerClass)
sa = httpd.socket.getsockname()
print "Serving HTTP on", sa[0], "port", sa[1], "..."
httpd.serve_forever()
if __name__ == '__main__':
test()

View File

@ -0,0 +1,475 @@
"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import markupbase
import re
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
# note: if you change tagfind/attrfind remember to update locatestarttagend too
tagfind = re.compile('([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
# this regex is currently unused, but left for backward compatibility
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*')
attrfind = re.compile(
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
locatestarttagend = re.compile(r"""
<[a-zA-Z][^\t\n\r\f />\x00]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParseError(Exception):
"""Exception raised for all parse errors."""
def __init__(self, msg, position=(None, None)):
assert msg
self.msg = msg
self.lineno = position[0]
self.offset = position[1]
def __str__(self):
result = self.msg
if self.lineno is not None:
result = result + ", at line %d" % self.lineno
if self.offset is not None:
result = result + ", column %d" % (self.offset + 1)
return result
class HTMLParser(markupbase.ParserBase):
"""Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). Entity references are
passed by calling self.handle_entityref() with the entity
reference as the argument. Numeric character references are
passed to self.handle_charref() with the string containing the
reference as the argument.
"""
CDATA_CONTENT_ELEMENTS = ("script", "style")
def __init__(self):
"""Initialize and reset this instance."""
self.reset()
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.rawdata = ''
self.lasttag = '???'
self.interesting = interesting_normal
self.cdata_elem = None
markupbase.ParserBase.reset(self)
def feed(self, data):
r"""Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
"""
self.rawdata = self.rawdata + data
self.goahead(0)
def close(self):
"""Handle any buffered data."""
self.goahead(1)
def error(self, message):
raise HTMLParseError(message, self.getpos())
__starttag_text = None
def get_starttag_text(self):
"""Return full source of start tag: '<...>'."""
return self.__starttag_text
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
def clear_cdata_mode(self):
self.interesting = interesting_normal
self.cdata_elem = None
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
def goahead(self, end):
rawdata = self.rawdata
i = 0
n = len(rawdata)
while i < n:
match = self.interesting.search(rawdata, i) # < or &
if match:
j = match.start()
else:
if self.cdata_elem:
break
j = n
if i < j: self.handle_data(rawdata[i:j])
i = self.updatepos(i, j)
if i == n: break
startswith = rawdata.startswith
if startswith('<', i):
if starttagopen.match(rawdata, i): # < + letter
k = self.parse_starttag(i)
elif startswith("</", i):
k = self.parse_endtag(i)
elif startswith("<!--", i):
k = self.parse_comment(i)
elif startswith("<?", i):
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
elif (i + 1) < n:
self.handle_data("<")
k = i + 1
else:
break
if k < 0:
if not end:
break
k = rawdata.find('>', i + 1)
if k < 0:
k = rawdata.find('<', i + 1)
if k < 0:
k = i + 1
else:
k += 1
self.handle_data(rawdata[i:k])
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
if match:
name = match.group()[2:-1]
self.handle_charref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
else:
if ";" in rawdata[i:]: # bail by consuming '&#'
self.handle_data(rawdata[i:i+2])
i = self.updatepos(i, i+2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
match = incomplete.match(rawdata, i)
if match:
# match.group() will contain at least 2 chars
if end and match.group() == rawdata[i:]:
self.error("EOF in middle of entity or char ref")
# incomplete
break
elif (i + 1) < n:
# not the end of the buffer, and can't be confused
# with some other construct
self.handle_data("&")
i = self.updatepos(i, i + 1)
else:
break
else:
assert 0, "interesting.search() lied"
# end while
if end and i < n and not self.cdata_elem:
self.handle_data(rawdata[i:n])
i = self.updatepos(i, n)
self.rawdata = rawdata[i:]
# Internal -- parse html declarations, return length or -1 if not terminated
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
# See also parse_declaration in _markupbase
def parse_html_declaration(self, i):
rawdata = self.rawdata
if rawdata[i:i+2] != '<!':
self.error('unexpected call to parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+3] == '<![':
return self.parse_marked_section(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', i+9)
if gtpos == -1:
return -1
self.handle_decl(rawdata[i+2:gtpos])
return gtpos+1
else:
return self.parse_bogus_comment(i)
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def parse_bogus_comment(self, i, report=1):
rawdata = self.rawdata
if rawdata[i:i+2] not in ('<!', '</'):
self.error('unexpected call to parse_comment()')
pos = rawdata.find('>', i+2)
if pos == -1:
return -1
if report:
self.handle_comment(rawdata[i+2:pos])
return pos + 1
# Internal -- parse processing instr, return end or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
match = piclose.search(rawdata, i+2) # >
if not match:
return -1
j = match.start()
self.handle_pi(rawdata[i+2: j])
j = match.end()
return j
# Internal -- handle starttag, return end or -1 if not terminated
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = match.group(1).lower()
while k < endpos:
m = attrfind.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
# Internal -- check to see if we have a complete starttag; return end
# or -1 if incomplete.
def check_for_whole_start_tag(self, i):
rawdata = self.rawdata
m = locatestarttagend.match(rawdata, i)
if m:
j = m.end()
next = rawdata[j:j+1]
if next == ">":
return j + 1
if next == "/":
if rawdata.startswith("/>", j):
return j + 2
if rawdata.startswith("/", j):
# buffer boundary
return -1
# else bogus input
self.updatepos(i, j + 1)
self.error("malformed empty start tag")
if next == "":
# end of input
return -1
if next in ("abcdefghijklmnopqrstuvwxyz=/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
if j > i:
return j
else:
return i + 1
raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete
def parse_endtag(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
match = endendtag.search(rawdata, i+1) # >
if not match:
return -1
gtpos = match.end()
match = endtagfind.match(rawdata, i) # </ + tag + >
if not match:
if self.cdata_elem is not None:
self.handle_data(rawdata[i:gtpos])
return gtpos
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
namematch = tagfind.match(rawdata, i+2)
if not namematch:
# w3.org/TR/html5/tokenization.html#end-tag-open-state
if rawdata[i:i+3] == '</>':
return i+3
else:
return self.parse_bogus_comment(i)
tagname = namematch.group(1).lower()
# consume and ignore other stuff between the name and the >
# Note: this is not 100% correct, since we might have things like
# </tag attr=">">, but looking for > after tha name should cover
# most of the cases and is much simpler
gtpos = rawdata.find('>', namematch.end())
self.handle_endtag(tagname)
return gtpos+1
elem = match.group(1).lower() # script or style
if self.cdata_elem is not None:
if elem != self.cdata_elem:
self.handle_data(rawdata[i:gtpos])
return gtpos
self.handle_endtag(elem)
self.clear_cdata_mode()
return gtpos
# Overridable -- finish processing of start+end tag: <tag.../>
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
self.handle_endtag(tag)
# Overridable -- handle start tag
def handle_starttag(self, tag, attrs):
pass
# Overridable -- handle end tag
def handle_endtag(self, tag):
pass
# Overridable -- handle character reference
def handle_charref(self, name):
pass
# Overridable -- handle entity reference
def handle_entityref(self, name):
pass
# Overridable -- handle data
def handle_data(self, data):
pass
# Overridable -- handle comment
def handle_comment(self, data):
pass
# Overridable -- handle declaration
def handle_decl(self, decl):
pass
# Overridable -- handle processing instruction
def handle_pi(self, data):
pass
def unknown_decl(self, data):
pass
# Internal -- helper to remove special character quoting
entitydefs = None
def unescape(self, s):
if '&' not in s:
return s
def replaceEntities(s):
s = s.groups()[0]
try:
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:], 16)
else:
c = int(s)
return unichr(c)
except ValueError:
return '&#'+s+';'
else:
# Cannot use name2codepoint directly, because HTMLParser supports apos,
# which is not part of HTML 4
import htmlentitydefs
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
for k, v in htmlentitydefs.name2codepoint.iteritems():
entitydefs[k] = unichr(v)
try:
return self.entitydefs[s]
except KeyError:
return '&'+s+';'
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)

View File

@ -0,0 +1,235 @@
"""Simple HTTP Server.
This module builds on BaseHTTPServer by implementing the standard GET
and HEAD requests in a fairly straightforward manner.
"""
__version__ = "0.6"
__all__ = ["SimpleHTTPRequestHandler"]
import os
import posixpath
import BaseHTTPServer
import urllib
import urlparse
import cgi
import sys
import shutil
import mimetypes
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
"""Simple HTTP request handler with GET and HEAD commands.
This serves files from the current directory and any of its
subdirectories. The MIME type for files is determined by
calling the .guess_type() method.
The GET and HEAD requests are identical except that the HEAD
request omits the actual contents of the file.
"""
server_version = "SimpleHTTP/" + __version__
def do_GET(self):
"""Serve a GET request."""
f = self.send_head()
if f:
try:
self.copyfile(f, self.wfile)
finally:
f.close()
def do_HEAD(self):
"""Serve a HEAD request."""
f = self.send_head()
if f:
f.close()
def send_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
"""
path = self.translate_path(self.path)
f = None
if os.path.isdir(path):
parts = urlparse.urlsplit(self.path)
if not parts.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
new_parts = (parts[0], parts[1], parts[2] + '/',
parts[3], parts[4])
new_url = urlparse.urlunsplit(new_parts)
self.send_header("Location", new_url)
self.end_headers()
return None
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
try:
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
try:
self.send_response(200)
self.send_header("Content-type", ctype)
fs = os.fstat(f.fileno())
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
except:
f.close()
raise
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
"""
try:
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(urllib.unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or @ for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
f.write('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
encoding = sys.getfilesystemencoding()
self.send_header("Content-type", "text/html; charset=%s" % encoding)
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
"""Translate a /-separated PATH to the local filename syntax.
Components that mean special things to the local file system
(e.g. drive or directory names) are ignored. (XXX They should
probably be diagnosed.)
"""
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
# Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/')
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
if trailing_slash:
path += '/'
return path
def copyfile(self, source, outputfile):
"""Copy all data between two file objects.
The SOURCE argument is a file object open for reading
(or anything with a read() method) and the DESTINATION
argument is a file object open for writing (or
anything with a write() method).
The only reason for overriding this would be to change
the block size or perhaps to replace newlines by CRLF
-- note however that this the default server uses this
to copy binary data as well.
"""
shutil.copyfileobj(source, outputfile)
def guess_type(self, path):
"""Guess the type of a file.
Argument is a PATH (a filename).
Return value is a string of the form type/subtype,
usable for a MIME Content-type header.
The default implementation looks the file's extension
up in the table self.extensions_map, using application/octet-stream
as a default; however it would be permissible (if
slow) to look inside the data to make a better guess.
"""
base, ext = posixpath.splitext(path)
if ext in self.extensions_map:
return self.extensions_map[ext]
ext = ext.lower()
if ext in self.extensions_map:
return self.extensions_map[ext]
else:
return self.extensions_map['']
if not mimetypes.inited:
mimetypes.init() # try to read system mime.types
extensions_map = mimetypes.types_map.copy()
extensions_map.update({
'': 'application/octet-stream', # Default
'.py': 'text/plain',
'.c': 'text/plain',
'.h': 'text/plain',
})
def test(HandlerClass = SimpleHTTPRequestHandler,
ServerClass = BaseHTTPServer.HTTPServer):
BaseHTTPServer.test(HandlerClass, ServerClass)
if __name__ == '__main__':
test()

View File

@ -0,0 +1,737 @@
"""Generic socket server classes.
This module tries to capture the various aspects of defining a server:
For socket-based servers:
- address family:
- AF_INET{,6}: IP (Internet Protocol) sockets (default)
- AF_UNIX: Unix domain sockets
- others, e.g. AF_DECNET are conceivable (see <socket.h>
- socket type:
- SOCK_STREAM (reliable stream, e.g. TCP)
- SOCK_DGRAM (datagrams, e.g. UDP)
For request-based servers (including socket-based):
- client address verification before further looking at the request
(This is actually a hook for any processing that needs to look
at the request before anything else, e.g. logging)
- how to handle multiple requests:
- synchronous (one request is handled at a time)
- forking (each request is handled by a new process)
- threading (each request is handled by a new thread)
The classes in this module favor the server type that is simplest to
write: a synchronous TCP/IP server. This is bad class design, but
save some typing. (There's also the issue that a deep class hierarchy
slows down method lookups.)
There are five classes in an inheritance diagram, four of which represent
synchronous servers of four types:
+------------+
| BaseServer |
+------------+
|
v
+-----------+ +------------------+
| TCPServer |------->| UnixStreamServer |
+-----------+ +------------------+
|
v
+-----------+ +--------------------+
| UDPServer |------->| UnixDatagramServer |
+-----------+ +--------------------+
Note that UnixDatagramServer derives from UDPServer, not from
UnixStreamServer -- the only difference between an IP and a Unix
stream server is the address family, which is simply repeated in both
unix server classes.
Forking and threading versions of each type of server can be created
using the ForkingMixIn and ThreadingMixIn mix-in classes. For
instance, a threading UDP server class is created as follows:
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
The Mix-in class must come first, since it overrides a method defined
in UDPServer! Setting the various member variables also changes
the behavior of the underlying server mechanism.
To implement a service, you must derive a class from
BaseRequestHandler and redefine its handle() method. You can then run
various versions of the service by combining one of the server classes
with your request handler class.
The request handler class must be different for datagram or stream
services. This can be hidden by using the request handler
subclasses StreamRequestHandler or DatagramRequestHandler.
Of course, you still have to use your head!
For instance, it makes no sense to use a forking server if the service
contains state in memory that can be modified by requests (since the
modifications in the child process would never reach the initial state
kept in the parent process and passed to each child). In this case,
you can use a threading server, but you will probably have to use
locks to avoid two requests that come in nearly simultaneous to apply
conflicting changes to the server state.
On the other hand, if you are building e.g. an HTTP server, where all
data is stored externally (e.g. in the file system), a synchronous
class will essentially render the service "deaf" while one request is
being handled -- which may be for a very long time if a client is slow
to read all the data it has requested. Here a threading or forking
server is appropriate.
In some cases, it may be appropriate to process part of a request
synchronously, but to finish processing in a forked child depending on
the request data. This can be implemented by using a synchronous
server and doing an explicit fork in the request handler class
handle() method.
Another approach to handling multiple simultaneous requests in an
environment that supports neither threads nor fork (or where these are
too expensive or inappropriate for the service) is to maintain an
explicit table of partially finished requests and to use select() to
decide which request to work on next (or whether to handle a new
incoming request). This is particularly important for stream services
where each client can potentially be connected for a long time (if
threads or subprocesses cannot be used).
Future work:
- Standard classes for Sun RPC (which uses either UDP or TCP)
- Standard mix-in classes to implement various authentication
and encryption schemes
- Standard framework for select-based multiplexing
XXX Open problems:
- What to do with out-of-band data?
BaseServer:
- split generic "request" functionality out into BaseServer class.
Copyright (C) 2000 Luke Kenneth Casson Leighton <lkcl@samba.org>
example: read entries from a SQL database (requires overriding
get_request() to return a table entry from the database).
entry is processed by a RequestHandlerClass.
"""
# Author of the BaseServer patch: Luke Kenneth Casson Leighton
# XXX Warning!
# There is a test suite for this module, but it cannot be run by the
# standard regression test.
# To run it manually, run Lib/test/test_socketserver.py.
__version__ = "0.4"
import socket
import select
import sys
import os
import errno
try:
import threading
except ImportError:
import dummy_threading as threading
__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer",
"ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler",
"StreamRequestHandler","DatagramRequestHandler",
"ThreadingMixIn", "ForkingMixIn"]
if hasattr(socket, "AF_UNIX"):
__all__.extend(["UnixStreamServer","UnixDatagramServer",
"ThreadingUnixStreamServer",
"ThreadingUnixDatagramServer"])
def _eintr_retry(func, *args):
"""restart a system call interrupted by EINTR"""
while True:
try:
return func(*args)
except (OSError, select.error) as e:
if e.args[0] != errno.EINTR:
raise
class BaseServer:
"""Base class for server classes.
Methods for the caller:
- __init__(server_address, RequestHandlerClass)
- serve_forever(poll_interval=0.5)
- shutdown()
- handle_request() # if you do not use serve_forever()
- fileno() -> int # for select()
Methods that may be overridden:
- server_bind()
- server_activate()
- get_request() -> request, client_address
- handle_timeout()
- verify_request(request, client_address)
- server_close()
- process_request(request, client_address)
- shutdown_request(request)
- close_request(request)
- handle_error()
Methods for derived classes:
- finish_request(request, client_address)
Class variables that may be overridden by derived classes or
instances:
- timeout
- address_family
- socket_type
- allow_reuse_address
Instance variables:
- RequestHandlerClass
- socket
"""
timeout = None
def __init__(self, server_address, RequestHandlerClass):
"""Constructor. May be extended, do not override."""
self.server_address = server_address
self.RequestHandlerClass = RequestHandlerClass
self.__is_shut_down = threading.Event()
self.__shutdown_request = False
def server_activate(self):
"""Called by constructor to activate the server.
May be overridden.
"""
pass
def serve_forever(self, poll_interval=0.5):
"""Handle one request at a time until shutdown.
Polls for shutdown every poll_interval seconds. Ignores
self.timeout. If you need to do periodic tasks, do them in
another thread.
"""
self.__is_shut_down.clear()
try:
while not self.__shutdown_request:
# XXX: Consider using another file descriptor or
# connecting to the socket to wake this up instead of
# polling. Polling reduces our responsiveness to a
# shutdown request and wastes cpu at all other times.
r, w, e = _eintr_retry(select.select, [self], [], [],
poll_interval)
if self in r:
self._handle_request_noblock()
finally:
self.__shutdown_request = False
self.__is_shut_down.set()
def shutdown(self):
"""Stops the serve_forever loop.
Blocks until the loop has finished. This must be called while
serve_forever() is running in another thread, or it will
deadlock.
"""
self.__shutdown_request = True
self.__is_shut_down.wait()
# The distinction between handling, getting, processing and
# finishing a request is fairly arbitrary. Remember:
#
# - handle_request() is the top-level call. It calls
# select, get_request(), verify_request() and process_request()
# - get_request() is different for stream or datagram sockets
# - process_request() is the place that may fork a new process
# or create a new thread to finish the request
# - finish_request() instantiates the request handler class;
# this constructor will handle the request all by itself
def handle_request(self):
"""Handle one request, possibly blocking.
Respects self.timeout.
"""
# Support people who used socket.settimeout() to escape
# handle_request before self.timeout was available.
timeout = self.socket.gettimeout()
if timeout is None:
timeout = self.timeout
elif self.timeout is not None:
timeout = min(timeout, self.timeout)
fd_sets = _eintr_retry(select.select, [self], [], [], timeout)
if not fd_sets[0]:
self.handle_timeout()
return
self._handle_request_noblock()
def _handle_request_noblock(self):
"""Handle one request, without blocking.
I assume that select.select has returned that the socket is
readable before this function was called, so there should be
no risk of blocking in get_request().
"""
try:
request, client_address = self.get_request()
except socket.error:
return
if self.verify_request(request, client_address):
try:
self.process_request(request, client_address)
except:
self.handle_error(request, client_address)
self.shutdown_request(request)
def handle_timeout(self):
"""Called if no new request arrives within self.timeout.
Overridden by ForkingMixIn.
"""
pass
def verify_request(self, request, client_address):
"""Verify the request. May be overridden.
Return True if we should proceed with this request.
"""
return True
def process_request(self, request, client_address):
"""Call finish_request.
Overridden by ForkingMixIn and ThreadingMixIn.
"""
self.finish_request(request, client_address)
self.shutdown_request(request)
def server_close(self):
"""Called to clean-up the server.
May be overridden.
"""
pass
def finish_request(self, request, client_address):
"""Finish one request by instantiating RequestHandlerClass."""
self.RequestHandlerClass(request, client_address, self)
def shutdown_request(self, request):
"""Called to shutdown and close an individual request."""
self.close_request(request)
def close_request(self, request):
"""Called to clean up an individual request."""
pass
def handle_error(self, request, client_address):
"""Handle an error gracefully. May be overridden.
The default is to print a traceback and continue.
"""
print '-'*40
print 'Exception happened during processing of request from',
print client_address
import traceback
traceback.print_exc() # XXX But this goes to stderr!
print '-'*40
class TCPServer(BaseServer):
"""Base class for various socket-based server classes.
Defaults to synchronous IP stream (i.e., TCP).
Methods for the caller:
- __init__(server_address, RequestHandlerClass, bind_and_activate=True)
- serve_forever(poll_interval=0.5)
- shutdown()
- handle_request() # if you don't use serve_forever()
- fileno() -> int # for select()
Methods that may be overridden:
- server_bind()
- server_activate()
- get_request() -> request, client_address
- handle_timeout()
- verify_request(request, client_address)
- process_request(request, client_address)
- shutdown_request(request)
- close_request(request)
- handle_error()
Methods for derived classes:
- finish_request(request, client_address)
Class variables that may be overridden by derived classes or
instances:
- timeout
- address_family
- socket_type
- request_queue_size (only for stream sockets)
- allow_reuse_address
Instance variables:
- server_address
- RequestHandlerClass
- socket
"""
address_family = socket.AF_INET
socket_type = socket.SOCK_STREAM
request_queue_size = 5
allow_reuse_address = False
def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True):
"""Constructor. May be extended, do not override."""
BaseServer.__init__(self, server_address, RequestHandlerClass)
self.socket = socket.socket(self.address_family,
self.socket_type)
if bind_and_activate:
try:
self.server_bind()
self.server_activate()
except:
self.server_close()
raise
def server_bind(self):
"""Called by constructor to bind the socket.
May be overridden.
"""
if self.allow_reuse_address:
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.socket.bind(self.server_address)
self.server_address = self.socket.getsockname()
def server_activate(self):
"""Called by constructor to activate the server.
May be overridden.
"""
self.socket.listen(self.request_queue_size)
def server_close(self):
"""Called to clean-up the server.
May be overridden.
"""
self.socket.close()
def fileno(self):
"""Return socket file number.
Interface required by select().
"""
return self.socket.fileno()
def get_request(self):
"""Get the request and client address from the socket.
May be overridden.
"""
return self.socket.accept()
def shutdown_request(self, request):
"""Called to shutdown and close an individual request."""
try:
#explicitly shutdown. socket.close() merely releases
#the socket and waits for GC to perform the actual close.
request.shutdown(socket.SHUT_WR)
except socket.error:
pass #some platforms may raise ENOTCONN here
self.close_request(request)
def close_request(self, request):
"""Called to clean up an individual request."""
request.close()
class UDPServer(TCPServer):
"""UDP server class."""
allow_reuse_address = False
socket_type = socket.SOCK_DGRAM
max_packet_size = 8192
def get_request(self):
data, client_addr = self.socket.recvfrom(self.max_packet_size)
return (data, self.socket), client_addr
def server_activate(self):
# No need to call listen() for UDP.
pass
def shutdown_request(self, request):
# No need to shutdown anything.
self.close_request(request)
def close_request(self, request):
# No need to close anything.
pass
class ForkingMixIn:
"""Mix-in class to handle each request in a new process."""
timeout = 300
active_children = None
max_children = 40
def collect_children(self):
"""Internal routine to wait for children that have exited."""
if self.active_children is None:
return
# If we're above the max number of children, wait and reap them until
# we go back below threshold. Note that we use waitpid(-1) below to be
# able to collect children in size(<defunct children>) syscalls instead
# of size(<children>): the downside is that this might reap children
# which we didn't spawn, which is why we only resort to this when we're
# above max_children.
while len(self.active_children) >= self.max_children:
try:
pid, _ = os.waitpid(-1, 0)
self.active_children.discard(pid)
except OSError as e:
if e.errno == errno.ECHILD:
# we don't have any children, we're done
self.active_children.clear()
elif e.errno != errno.EINTR:
break
# Now reap all defunct children.
for pid in self.active_children.copy():
try:
pid, _ = os.waitpid(pid, os.WNOHANG)
# if the child hasn't exited yet, pid will be 0 and ignored by
# discard() below
self.active_children.discard(pid)
except OSError as e:
if e.errno == errno.ECHILD:
# someone else reaped it
self.active_children.discard(pid)
def handle_timeout(self):
"""Wait for zombies after self.timeout seconds of inactivity.
May be extended, do not override.
"""
self.collect_children()
def process_request(self, request, client_address):
"""Fork a new subprocess to process the request."""
self.collect_children()
pid = os.fork()
if pid:
# Parent process
if self.active_children is None:
self.active_children = set()
self.active_children.add(pid)
self.close_request(request) #close handle in parent process
return
else:
# Child process.
# This must never return, hence os._exit()!
try:
self.finish_request(request, client_address)
self.shutdown_request(request)
os._exit(0)
except:
try:
self.handle_error(request, client_address)
self.shutdown_request(request)
finally:
os._exit(1)
class ThreadingMixIn:
"""Mix-in class to handle each request in a new thread."""
# Decides how threads will act upon termination of the
# main process
daemon_threads = False
def process_request_thread(self, request, client_address):
"""Same as in BaseServer but as a thread.
In addition, exception handling is done here.
"""
try:
self.finish_request(request, client_address)
self.shutdown_request(request)
except:
self.handle_error(request, client_address)
self.shutdown_request(request)
def process_request(self, request, client_address):
"""Start a new thread to process the request."""
t = threading.Thread(target = self.process_request_thread,
args = (request, client_address))
t.daemon = self.daemon_threads
t.start()
class ForkingUDPServer(ForkingMixIn, UDPServer): pass
class ForkingTCPServer(ForkingMixIn, TCPServer): pass
class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass
class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass
if hasattr(socket, 'AF_UNIX'):
class UnixStreamServer(TCPServer):
address_family = socket.AF_UNIX
class UnixDatagramServer(UDPServer):
address_family = socket.AF_UNIX
class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass
class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass
class BaseRequestHandler:
"""Base class for request handler classes.
This class is instantiated for each request to be handled. The
constructor sets the instance variables request, client_address
and server, and then calls the handle() method. To implement a
specific service, all you need to do is to derive a class which
defines a handle() method.
The handle() method can find the request as self.request, the
client address as self.client_address, and the server (in case it
needs access to per-server information) as self.server. Since a
separate instance is created for each request, the handle() method
can define arbitrary other instance variariables.
"""
def __init__(self, request, client_address, server):
self.request = request
self.client_address = client_address
self.server = server
self.setup()
try:
self.handle()
finally:
self.finish()
def setup(self):
pass
def handle(self):
pass
def finish(self):
pass
# The following two classes make it possible to use the same service
# class for stream or datagram servers.
# Each class sets up these instance variables:
# - rfile: a file object from which receives the request is read
# - wfile: a file object to which the reply is written
# When the handle() method returns, wfile is flushed properly
class StreamRequestHandler(BaseRequestHandler):
"""Define self.rfile and self.wfile for stream sockets."""
# Default buffer sizes for rfile, wfile.
# We default rfile to buffered because otherwise it could be
# really slow for large data (a getc() call per byte); we make
# wfile unbuffered because (a) often after a write() we want to
# read and we need to flush the line; (b) big writes to unbuffered
# files are typically optimized by stdio even when big reads
# aren't.
rbufsize = -1
wbufsize = 0
# A timeout to apply to the request socket, if not None.
timeout = None
# Disable nagle algorithm for this socket, if True.
# Use only when wbufsize != 0, to avoid small packets.
disable_nagle_algorithm = False
def setup(self):
self.connection = self.request
if self.timeout is not None:
self.connection.settimeout(self.timeout)
if self.disable_nagle_algorithm:
self.connection.setsockopt(socket.IPPROTO_TCP,
socket.TCP_NODELAY, True)
self.rfile = self.connection.makefile('rb', self.rbufsize)
self.wfile = self.connection.makefile('wb', self.wbufsize)
def finish(self):
if not self.wfile.closed:
try:
self.wfile.flush()
except socket.error:
# An final socket error may have occurred here, such as
# the local error ECONNABORTED.
pass
self.wfile.close()
self.rfile.close()
class DatagramRequestHandler(BaseRequestHandler):
# XXX Regrettably, I cannot get this working on Linux;
# s.recvfrom() doesn't return a meaningful client address.
"""Define self.rfile and self.wfile for datagram sockets."""
def setup(self):
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
self.packet, self.socket = self.request
self.rfile = StringIO(self.packet)
self.wfile = StringIO()
def finish(self):
self.socket.sendto(self.wfile.getvalue(), self.client_address)

View File

@ -0,0 +1,324 @@
r"""File-like objects that read from or write to a string buffer.
This implements (nearly) all stdio methods.
f = StringIO() # ready for writing
f = StringIO(buf) # ready for reading
f.close() # explicitly release resources held
flag = f.isatty() # always false
pos = f.tell() # get current position
f.seek(pos) # set current position
f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
buf = f.read() # read until EOF
buf = f.read(n) # read up to n bytes
buf = f.readline() # read until end of line ('\n') or EOF
list = f.readlines()# list of f.readline() results until EOF
f.truncate([size]) # truncate file at to at most size (default: current pos)
f.write(buf) # write at current position
f.writelines(list) # for line in list: f.write(line)
f.getvalue() # return whole file's contents as a string
Notes:
- Using a real file is often faster (but less convenient).
- There's also a much faster implementation in C, called cStringIO, but
it's not subclassable.
- fileno() is left unimplemented so that code which uses it triggers
an exception early.
- Seeking far beyond EOF and then writing will insert real null
bytes that occupy space in the buffer.
- There's a simple test set (see end of this file).
"""
try:
from errno import EINVAL
except ImportError:
EINVAL = 22
__all__ = ["StringIO"]
def _complain_ifclosed(closed):
if closed:
raise ValueError, "I/O operation on closed file"
class StringIO:
"""class StringIO([buffer])
When a StringIO object is created, it can be initialized to an existing
string by passing the string to the constructor. If no string is given,
the StringIO will start empty.
The StringIO object can accept either Unicode or 8-bit strings, but
mixing the two may take some care. If both are used, 8-bit strings that
cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
a UnicodeError to be raised when getvalue() is called.
"""
def __init__(self, buf = ''):
# Force self.buf to be a string or unicode
if not isinstance(buf, basestring):
buf = str(buf)
self.buf = buf
self.len = len(buf)
self.buflist = []
self.pos = 0
self.closed = False
self.softspace = 0
def __iter__(self):
return self
def next(self):
"""A file object is its own iterator, for example iter(f) returns f
(unless f is closed). When a file is used as an iterator, typically
in a for loop (for example, for line in f: print line), the next()
method is called repeatedly. This method returns the next input line,
or raises StopIteration when EOF is hit.
"""
_complain_ifclosed(self.closed)
r = self.readline()
if not r:
raise StopIteration
return r
def close(self):
"""Free the memory buffer.
"""
if not self.closed:
self.closed = True
del self.buf, self.pos
def isatty(self):
"""Returns False because StringIO objects are not connected to a
tty-like device.
"""
_complain_ifclosed(self.closed)
return False
def seek(self, pos, mode = 0):
"""Set the file's current position.
The mode argument is optional and defaults to 0 (absolute file
positioning); other values are 1 (seek relative to the current
position) and 2 (seek relative to the file's end).
There is no return value.
"""
_complain_ifclosed(self.closed)
if self.buflist:
self.buf += ''.join(self.buflist)
self.buflist = []
if mode == 1:
pos += self.pos
elif mode == 2:
pos += self.len
self.pos = max(0, pos)
def tell(self):
"""Return the file's current position."""
_complain_ifclosed(self.closed)
return self.pos
def read(self, n = -1):
"""Read at most size bytes from the file
(less if the read hits EOF before obtaining size bytes).
If the size argument is negative or omitted, read all data until EOF
is reached. The bytes are returned as a string object. An empty
string is returned when EOF is encountered immediately.
"""
_complain_ifclosed(self.closed)
if self.buflist:
self.buf += ''.join(self.buflist)
self.buflist = []
if n is None or n < 0:
newpos = self.len
else:
newpos = min(self.pos+n, self.len)
r = self.buf[self.pos:newpos]
self.pos = newpos
return r
def readline(self, length=None):
r"""Read one entire line from the file.
A trailing newline character is kept in the string (but may be absent
when a file ends with an incomplete line). If the size argument is
present and non-negative, it is a maximum byte count (including the
trailing newline) and an incomplete line may be returned.
An empty string is returned only when EOF is encountered immediately.
Note: Unlike stdio's fgets(), the returned string contains null
characters ('\0') if they occurred in the input.
"""
_complain_ifclosed(self.closed)
if self.buflist:
self.buf += ''.join(self.buflist)
self.buflist = []
i = self.buf.find('\n', self.pos)
if i < 0:
newpos = self.len
else:
newpos = i+1
if length is not None and length >= 0:
if self.pos + length < newpos:
newpos = self.pos + length
r = self.buf[self.pos:newpos]
self.pos = newpos
return r
def readlines(self, sizehint = 0):
"""Read until EOF using readline() and return a list containing the
lines thus read.
If the optional sizehint argument is present, instead of reading up
to EOF, whole lines totalling approximately sizehint bytes (or more
to accommodate a final whole line).
"""
total = 0
lines = []
line = self.readline()
while line:
lines.append(line)
total += len(line)
if 0 < sizehint <= total:
break
line = self.readline()
return lines
def truncate(self, size=None):
"""Truncate the file's size.
If the optional size argument is present, the file is truncated to
(at most) that size. The size defaults to the current position.
The current file position is not changed unless the position
is beyond the new file size.
If the specified size exceeds the file's current size, the
file remains unchanged.
"""
_complain_ifclosed(self.closed)
if size is None:
size = self.pos
elif size < 0:
raise IOError(EINVAL, "Negative size not allowed")
elif size < self.pos:
self.pos = size
self.buf = self.getvalue()[:size]
self.len = size
def write(self, s):
"""Write a string to the file.
There is no return value.
"""
_complain_ifclosed(self.closed)
if not s: return
# Force s to be a string or unicode
if not isinstance(s, basestring):
s = str(s)
spos = self.pos
slen = self.len
if spos == slen:
self.buflist.append(s)
self.len = self.pos = spos + len(s)
return
if spos > slen:
self.buflist.append('\0'*(spos - slen))
slen = spos
newpos = spos + len(s)
if spos < slen:
if self.buflist:
self.buf += ''.join(self.buflist)
self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
self.buf = ''
if newpos > slen:
slen = newpos
else:
self.buflist.append(s)
slen = newpos
self.len = slen
self.pos = newpos
def writelines(self, iterable):
"""Write a sequence of strings to the file. The sequence can be any
iterable object producing strings, typically a list of strings. There
is no return value.
(The name is intended to match readlines(); writelines() does not add
line separators.)
"""
write = self.write
for line in iterable:
write(line)
def flush(self):
"""Flush the internal buffer
"""
_complain_ifclosed(self.closed)
def getvalue(self):
"""
Retrieve the entire contents of the "file" at any time before
the StringIO object's close() method is called.
The StringIO object can accept either Unicode or 8-bit strings,
but mixing the two may take some care. If both are used, 8-bit
strings that cannot be interpreted as 7-bit ASCII (that use the
8th bit) will cause a UnicodeError to be raised when getvalue()
is called.
"""
_complain_ifclosed(self.closed)
if self.buflist:
self.buf += ''.join(self.buflist)
self.buflist = []
return self.buf
# A little test suite
def test():
import sys
if sys.argv[1:]:
file = sys.argv[1]
else:
file = '/etc/passwd'
lines = open(file, 'r').readlines()
text = open(file, 'r').read()
f = StringIO()
for line in lines[:-2]:
f.write(line)
f.writelines(lines[-2:])
if f.getvalue() != text:
raise RuntimeError, 'write failed'
length = f.tell()
print 'File length =', length
f.seek(len(lines[0]))
f.write(lines[1])
f.seek(0)
print 'First line =', repr(f.readline())
print 'Position =', f.tell()
line = f.readline()
print 'Second line =', repr(line)
f.seek(-len(line), 1)
line2 = f.read(len(line))
if line != line2:
raise RuntimeError, 'bad result after seek back'
f.seek(len(line2), 1)
list = f.readlines()
line = list[-1]
f.seek(f.tell() - len(line))
line2 = f.read()
if line != line2:
raise RuntimeError, 'bad result after seek back from EOF'
print 'Read', len(list), 'more lines'
print 'File length =', f.tell()
if f.tell() != length:
raise RuntimeError, 'bad length'
f.truncate(length/2)
f.seek(0, 2)
print 'Truncated length =', f.tell()
if f.tell() != length/2:
raise RuntimeError, 'truncate did not adjust length'
f.close()
if __name__ == '__main__':
test()

View File

@ -0,0 +1,180 @@
"""A more or less complete user-defined wrapper around dictionary objects."""
class UserDict:
def __init__(self, dict=None, **kwargs):
self.data = {}
if dict is not None:
self.update(dict)
if len(kwargs):
self.update(kwargs)
def __repr__(self): return repr(self.data)
def __cmp__(self, dict):
if isinstance(dict, UserDict):
return cmp(self.data, dict.data)
else:
return cmp(self.data, dict)
__hash__ = None # Avoid Py3k warning
def __len__(self): return len(self.data)
def __getitem__(self, key):
if key in self.data:
return self.data[key]
if hasattr(self.__class__, "__missing__"):
return self.__class__.__missing__(self, key)
raise KeyError(key)
def __setitem__(self, key, item): self.data[key] = item
def __delitem__(self, key): del self.data[key]
def clear(self): self.data.clear()
def copy(self):
if self.__class__ is UserDict:
return UserDict(self.data.copy())
import copy
data = self.data
try:
self.data = {}
c = copy.copy(self)
finally:
self.data = data
c.update(self)
return c
def keys(self): return self.data.keys()
def items(self): return self.data.items()
def iteritems(self): return self.data.iteritems()
def iterkeys(self): return self.data.iterkeys()
def itervalues(self): return self.data.itervalues()
def values(self): return self.data.values()
def has_key(self, key): return key in self.data
def update(self, dict=None, **kwargs):
if dict is None:
pass
elif isinstance(dict, UserDict):
self.data.update(dict.data)
elif isinstance(dict, type({})) or not hasattr(dict, 'items'):
self.data.update(dict)
else:
for k, v in dict.items():
self[k] = v
if len(kwargs):
self.data.update(kwargs)
def get(self, key, failobj=None):
if key not in self:
return failobj
return self[key]
def setdefault(self, key, failobj=None):
if key not in self:
self[key] = failobj
return self[key]
def pop(self, key, *args):
return self.data.pop(key, *args)
def popitem(self):
return self.data.popitem()
def __contains__(self, key):
return key in self.data
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
class IterableUserDict(UserDict):
def __iter__(self):
return iter(self.data)
import _abcoll
_abcoll.MutableMapping.register(IterableUserDict)
class DictMixin:
# Mixin defining all dictionary methods for classes that already have
# a minimum dictionary interface including getitem, setitem, delitem,
# and keys. Without knowledge of the subclass constructor, the mixin
# does not define __init__() or copy(). In addition to the four base
# methods, progressively more efficiency comes with defining
# __contains__(), __iter__(), and iteritems().
# second level definitions support higher levels
def __iter__(self):
for k in self.keys():
yield k
def has_key(self, key):
try:
self[key]
except KeyError:
return False
return True
def __contains__(self, key):
return self.has_key(key)
# third level takes advantage of second level definitions
def iteritems(self):
for k in self:
yield (k, self[k])
def iterkeys(self):
return self.__iter__()
# fourth level uses definitions from lower levels
def itervalues(self):
for _, v in self.iteritems():
yield v
def values(self):
return [v for _, v in self.iteritems()]
def items(self):
return list(self.iteritems())
def clear(self):
for key in self.keys():
del self[key]
def setdefault(self, key, default=None):
try:
return self[key]
except KeyError:
self[key] = default
return default
def pop(self, key, *args):
if len(args) > 1:
raise TypeError, "pop expected at most 2 arguments, got "\
+ repr(1 + len(args))
try:
value = self[key]
except KeyError:
if args:
return args[0]
raise
del self[key]
return value
def popitem(self):
try:
k, v = self.iteritems().next()
except StopIteration:
raise KeyError, 'container is empty'
del self[k]
return (k, v)
def update(self, other=None, **kwargs):
# Make progressively weaker assumptions about "other"
if other is None:
pass
elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
for k, v in other.iteritems():
self[k] = v
elif hasattr(other, 'keys'):
for k in other.keys():
self[k] = other[k]
else:
for k, v in other:
self[k] = v
if kwargs:
self.update(kwargs)
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def __repr__(self):
return repr(dict(self.iteritems()))
def __cmp__(self, other):
if other is None:
return 1
if isinstance(other, DictMixin):
other = dict(other.iteritems())
return cmp(dict(self.iteritems()), other)
def __len__(self):
return len(self.keys())

View File

@ -0,0 +1,692 @@
# Copyright 2007 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Abstract Base Classes (ABCs) for collections, according to PEP 3119.
DON'T USE THIS MODULE DIRECTLY! The classes here should be imported
via collections; they are defined here only to alleviate certain
bootstrapping issues. Unit tests are in test_collections.
"""
from abc import ABCMeta, abstractmethod
import sys
__all__ = ["Hashable", "Iterable", "Iterator",
"Sized", "Container", "Callable",
"Set", "MutableSet",
"Mapping", "MutableMapping",
"MappingView", "KeysView", "ItemsView", "ValuesView",
"Sequence", "MutableSequence",
]
### ONE-TRICK PONIES ###
def _hasattr(C, attr):
try:
return any(attr in B.__dict__ for B in C.__mro__)
except AttributeError:
# Old-style class
return hasattr(C, attr)
class Hashable:
__metaclass__ = ABCMeta
@abstractmethod
def __hash__(self):
return 0
@classmethod
def __subclasshook__(cls, C):
if cls is Hashable:
try:
for B in C.__mro__:
if "__hash__" in B.__dict__:
if B.__dict__["__hash__"]:
return True
break
except AttributeError:
# Old-style class
if getattr(C, "__hash__", None):
return True
return NotImplemented
class Iterable:
__metaclass__ = ABCMeta
@abstractmethod
def __iter__(self):
while False:
yield None
@classmethod
def __subclasshook__(cls, C):
if cls is Iterable:
if _hasattr(C, "__iter__"):
return True
return NotImplemented
Iterable.register(str)
class Iterator(Iterable):
@abstractmethod
def next(self):
'Return the next item from the iterator. When exhausted, raise StopIteration'
raise StopIteration
def __iter__(self):
return self
@classmethod
def __subclasshook__(cls, C):
if cls is Iterator:
if _hasattr(C, "next") and _hasattr(C, "__iter__"):
return True
return NotImplemented
class Sized:
__metaclass__ = ABCMeta
@abstractmethod
def __len__(self):
return 0
@classmethod
def __subclasshook__(cls, C):
if cls is Sized:
if _hasattr(C, "__len__"):
return True
return NotImplemented
class Container:
__metaclass__ = ABCMeta
@abstractmethod
def __contains__(self, x):
return False
@classmethod
def __subclasshook__(cls, C):
if cls is Container:
if _hasattr(C, "__contains__"):
return True
return NotImplemented
class Callable:
__metaclass__ = ABCMeta
@abstractmethod
def __call__(self, *args, **kwds):
return False
@classmethod
def __subclasshook__(cls, C):
if cls is Callable:
if _hasattr(C, "__call__"):
return True
return NotImplemented
### SETS ###
class Set(Sized, Iterable, Container):
"""A set is a finite, iterable container.
This class provides concrete generic implementations of all
methods except for __contains__, __iter__ and __len__.
To override the comparisons (presumably for speed, as the
semantics are fixed), redefine __le__ and __ge__,
then the other operations will automatically follow suit.
"""
def __le__(self, other):
if not isinstance(other, Set):
return NotImplemented
if len(self) > len(other):
return False
for elem in self:
if elem not in other:
return False
return True
def __lt__(self, other):
if not isinstance(other, Set):
return NotImplemented
return len(self) < len(other) and self.__le__(other)
def __gt__(self, other):
if not isinstance(other, Set):
return NotImplemented
return len(self) > len(other) and self.__ge__(other)
def __ge__(self, other):
if not isinstance(other, Set):
return NotImplemented
if len(self) < len(other):
return False
for elem in other:
if elem not in self:
return False
return True
def __eq__(self, other):
if not isinstance(other, Set):
return NotImplemented
return len(self) == len(other) and self.__le__(other)
def __ne__(self, other):
return not (self == other)
@classmethod
def _from_iterable(cls, it):
'''Construct an instance of the class from any iterable input.
Must override this method if the class constructor signature
does not accept an iterable for an input.
'''
return cls(it)
def __and__(self, other):
if not isinstance(other, Iterable):
return NotImplemented
return self._from_iterable(value for value in other if value in self)
__rand__ = __and__
def isdisjoint(self, other):
'Return True if two sets have a null intersection.'
for value in other:
if value in self:
return False
return True
def __or__(self, other):
if not isinstance(other, Iterable):
return NotImplemented
chain = (e for s in (self, other) for e in s)
return self._from_iterable(chain)
__ror__ = __or__
def __sub__(self, other):
if not isinstance(other, Set):
if not isinstance(other, Iterable):
return NotImplemented
other = self._from_iterable(other)
return self._from_iterable(value for value in self
if value not in other)
def __rsub__(self, other):
if not isinstance(other, Set):
if not isinstance(other, Iterable):
return NotImplemented
other = self._from_iterable(other)
return self._from_iterable(value for value in other
if value not in self)
def __xor__(self, other):
if not isinstance(other, Set):
if not isinstance(other, Iterable):
return NotImplemented
other = self._from_iterable(other)
return (self - other) | (other - self)
__rxor__ = __xor__
# Sets are not hashable by default, but subclasses can change this
__hash__ = None
def _hash(self):
"""Compute the hash value of a set.
Note that we don't define __hash__: not all sets are hashable.
But if you define a hashable set type, its __hash__ should
call this function.
This must be compatible __eq__.
All sets ought to compare equal if they contain the same
elements, regardless of how they are implemented, and
regardless of the order of the elements; so there's not much
freedom for __eq__ or __hash__. We match the algorithm used
by the built-in frozenset type.
"""
MAX = sys.maxint
MASK = 2 * MAX + 1
n = len(self)
h = 1927868237 * (n + 1)
h &= MASK
for x in self:
hx = hash(x)
h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167
h &= MASK
h = h * 69069 + 907133923
h &= MASK
if h > MAX:
h -= MASK + 1
if h == -1:
h = 590923713
return h
Set.register(frozenset)
class MutableSet(Set):
"""A mutable set is a finite, iterable container.
This class provides concrete generic implementations of all
methods except for __contains__, __iter__, __len__,
add(), and discard().
To override the comparisons (presumably for speed, as the
semantics are fixed), all you have to do is redefine __le__ and
then the other operations will automatically follow suit.
"""
@abstractmethod
def add(self, value):
"""Add an element."""
raise NotImplementedError
@abstractmethod
def discard(self, value):
"""Remove an element. Do not raise an exception if absent."""
raise NotImplementedError
def remove(self, value):
"""Remove an element. If not a member, raise a KeyError."""
if value not in self:
raise KeyError(value)
self.discard(value)
def pop(self):
"""Return the popped value. Raise KeyError if empty."""
it = iter(self)
try:
value = next(it)
except StopIteration:
raise KeyError
self.discard(value)
return value
def clear(self):
"""This is slow (creates N new iterators!) but effective."""
try:
while True:
self.pop()
except KeyError:
pass
def __ior__(self, it):
for value in it:
self.add(value)
return self
def __iand__(self, it):
for value in (self - it):
self.discard(value)
return self
def __ixor__(self, it):
if it is self:
self.clear()
else:
if not isinstance(it, Set):
it = self._from_iterable(it)
for value in it:
if value in self:
self.discard(value)
else:
self.add(value)
return self
def __isub__(self, it):
if it is self:
self.clear()
else:
for value in it:
self.discard(value)
return self
MutableSet.register(set)
### MAPPINGS ###
class Mapping(Sized, Iterable, Container):
"""A Mapping is a generic container for associating key/value
pairs.
This class provides concrete generic implementations of all
methods except for __getitem__, __iter__, and __len__.
"""
@abstractmethod
def __getitem__(self, key):
raise KeyError
def get(self, key, default=None):
'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.'
try:
return self[key]
except KeyError:
return default
def __contains__(self, key):
try:
self[key]
except KeyError:
return False
else:
return True
def iterkeys(self):
'D.iterkeys() -> an iterator over the keys of D'
return iter(self)
def itervalues(self):
'D.itervalues() -> an iterator over the values of D'
for key in self:
yield self[key]
def iteritems(self):
'D.iteritems() -> an iterator over the (key, value) items of D'
for key in self:
yield (key, self[key])
def keys(self):
"D.keys() -> list of D's keys"
return list(self)
def items(self):
"D.items() -> list of D's (key, value) pairs, as 2-tuples"
return [(key, self[key]) for key in self]
def values(self):
"D.values() -> list of D's values"
return [self[key] for key in self]
# Mappings are not hashable by default, but subclasses can change this
__hash__ = None
def __eq__(self, other):
if not isinstance(other, Mapping):
return NotImplemented
return dict(self.items()) == dict(other.items())
def __ne__(self, other):
return not (self == other)
class MappingView(Sized):
def __init__(self, mapping):
self._mapping = mapping
def __len__(self):
return len(self._mapping)
def __repr__(self):
return '{0.__class__.__name__}({0._mapping!r})'.format(self)
class KeysView(MappingView, Set):
@classmethod
def _from_iterable(self, it):
return set(it)
def __contains__(self, key):
return key in self._mapping
def __iter__(self):
for key in self._mapping:
yield key
class ItemsView(MappingView, Set):
@classmethod
def _from_iterable(self, it):
return set(it)
def __contains__(self, item):
key, value = item
try:
v = self._mapping[key]
except KeyError:
return False
else:
return v == value
def __iter__(self):
for key in self._mapping:
yield (key, self._mapping[key])
class ValuesView(MappingView):
def __contains__(self, value):
for key in self._mapping:
if value == self._mapping[key]:
return True
return False
def __iter__(self):
for key in self._mapping:
yield self._mapping[key]
class MutableMapping(Mapping):
"""A MutableMapping is a generic container for associating
key/value pairs.
This class provides concrete generic implementations of all
methods except for __getitem__, __setitem__, __delitem__,
__iter__, and __len__.
"""
@abstractmethod
def __setitem__(self, key, value):
raise KeyError
@abstractmethod
def __delitem__(self, key):
raise KeyError
__marker = object()
def pop(self, key, default=__marker):
'''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
'''
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def popitem(self):
'''D.popitem() -> (k, v), remove and return some (key, value) pair
as a 2-tuple; but raise KeyError if D is empty.
'''
try:
key = next(iter(self))
except StopIteration:
raise KeyError
value = self[key]
del self[key]
return key, value
def clear(self):
'D.clear() -> None. Remove all items from D.'
try:
while True:
self.popitem()
except KeyError:
pass
def update(*args, **kwds):
''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F.
If E present and has a .keys() method, does: for k in E: D[k] = E[k]
If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
In either case, this is followed by: for k, v in F.items(): D[k] = v
'''
if not args:
raise TypeError("descriptor 'update' of 'MutableMapping' object "
"needs an argument")
self = args[0]
args = args[1:]
if len(args) > 1:
raise TypeError('update expected at most 1 arguments, got %d' %
len(args))
if args:
other = args[0]
if isinstance(other, Mapping):
for key in other:
self[key] = other[key]
elif hasattr(other, "keys"):
for key in other.keys():
self[key] = other[key]
else:
for key, value in other:
self[key] = value
for key, value in kwds.items():
self[key] = value
def setdefault(self, key, default=None):
'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D'
try:
return self[key]
except KeyError:
self[key] = default
return default
MutableMapping.register(dict)
### SEQUENCES ###
class Sequence(Sized, Iterable, Container):
"""All the operations on a read-only sequence.
Concrete subclasses must override __new__ or __init__,
__getitem__, and __len__.
"""
@abstractmethod
def __getitem__(self, index):
raise IndexError
def __iter__(self):
i = 0
try:
while True:
v = self[i]
yield v
i += 1
except IndexError:
return
def __contains__(self, value):
for v in self:
if v == value:
return True
return False
def __reversed__(self):
for i in reversed(range(len(self))):
yield self[i]
def index(self, value):
'''S.index(value) -> integer -- return first index of value.
Raises ValueError if the value is not present.
'''
for i, v in enumerate(self):
if v == value:
return i
raise ValueError
def count(self, value):
'S.count(value) -> integer -- return number of occurrences of value'
return sum(1 for v in self if v == value)
Sequence.register(tuple)
Sequence.register(basestring)
Sequence.register(buffer)
Sequence.register(xrange)
class MutableSequence(Sequence):
"""All the operations on a read-only sequence.
Concrete subclasses must provide __new__ or __init__,
__getitem__, __setitem__, __delitem__, __len__, and insert().
"""
@abstractmethod
def __setitem__(self, index, value):
raise IndexError
@abstractmethod
def __delitem__(self, index):
raise IndexError
@abstractmethod
def insert(self, index, value):
'S.insert(index, object) -- insert object before index'
raise IndexError
def append(self, value):
'S.append(object) -- append object to the end of the sequence'
self.insert(len(self), value)
def reverse(self):
'S.reverse() -- reverse *IN PLACE*'
n = len(self)
for i in range(n//2):
self[i], self[n-i-1] = self[n-i-1], self[i]
def extend(self, values):
'S.extend(iterable) -- extend sequence by appending elements from the iterable'
for v in values:
self.append(v)
def pop(self, index=-1):
'''S.pop([index]) -> item -- remove and return item at index (default last).
Raise IndexError if list is empty or index is out of range.
'''
v = self[index]
del self[index]
return v
def remove(self, value):
'''S.remove(value) -- remove first occurrence of value.
Raise ValueError if the value is not present.
'''
del self[self.index(value)]
def __iadd__(self, values):
self.extend(values)
return self
MutableSequence.register(list)

View File

@ -0,0 +1,204 @@
# Access WeakSet through the weakref module.
# This code is separated-out because it is needed
# by abc.py to load everything else at startup.
from _weakref import ref
__all__ = ['WeakSet']
class _IterationGuard(object):
# This context manager registers itself in the current iterators of the
# weak container, such as to delay all removals until the context manager
# exits.
# This technique should be relatively thread-safe (since sets are).
def __init__(self, weakcontainer):
# Don't create cycles
self.weakcontainer = ref(weakcontainer)
def __enter__(self):
w = self.weakcontainer()
if w is not None:
w._iterating.add(self)
return self
def __exit__(self, e, t, b):
w = self.weakcontainer()
if w is not None:
s = w._iterating
s.remove(self)
if not s:
w._commit_removals()
class WeakSet(object):
def __init__(self, data=None):
self.data = set()
def _remove(item, selfref=ref(self)):
self = selfref()
if self is not None:
if self._iterating:
self._pending_removals.append(item)
else:
self.data.discard(item)
self._remove = _remove
# A list of keys to be removed
self._pending_removals = []
self._iterating = set()
if data is not None:
self.update(data)
def _commit_removals(self):
l = self._pending_removals
discard = self.data.discard
while l:
discard(l.pop())
def __iter__(self):
with _IterationGuard(self):
for itemref in self.data:
item = itemref()
if item is not None:
# Caveat: the iterator will keep a strong reference to
# `item` until it is resumed or closed.
yield item
def __len__(self):
return len(self.data) - len(self._pending_removals)
def __contains__(self, item):
try:
wr = ref(item)
except TypeError:
return False
return wr in self.data
def __reduce__(self):
return (self.__class__, (list(self),),
getattr(self, '__dict__', None))
__hash__ = None
def add(self, item):
if self._pending_removals:
self._commit_removals()
self.data.add(ref(item, self._remove))
def clear(self):
if self._pending_removals:
self._commit_removals()
self.data.clear()
def copy(self):
return self.__class__(self)
def pop(self):
if self._pending_removals:
self._commit_removals()
while True:
try:
itemref = self.data.pop()
except KeyError:
raise KeyError('pop from empty WeakSet')
item = itemref()
if item is not None:
return item
def remove(self, item):
if self._pending_removals:
self._commit_removals()
self.data.remove(ref(item))
def discard(self, item):
if self._pending_removals:
self._commit_removals()
self.data.discard(ref(item))
def update(self, other):
if self._pending_removals:
self._commit_removals()
for element in other:
self.add(element)
def __ior__(self, other):
self.update(other)
return self
def difference(self, other):
newset = self.copy()
newset.difference_update(other)
return newset
__sub__ = difference
def difference_update(self, other):
self.__isub__(other)
def __isub__(self, other):
if self._pending_removals:
self._commit_removals()
if self is other:
self.data.clear()
else:
self.data.difference_update(ref(item) for item in other)
return self
def intersection(self, other):
return self.__class__(item for item in other if item in self)
__and__ = intersection
def intersection_update(self, other):
self.__iand__(other)
def __iand__(self, other):
if self._pending_removals:
self._commit_removals()
self.data.intersection_update(ref(item) for item in other)
return self
def issubset(self, other):
return self.data.issubset(ref(item) for item in other)
__le__ = issubset
def __lt__(self, other):
return self.data < set(ref(item) for item in other)
def issuperset(self, other):
return self.data.issuperset(ref(item) for item in other)
__ge__ = issuperset
def __gt__(self, other):
return self.data > set(ref(item) for item in other)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return NotImplemented
return self.data == set(ref(item) for item in other)
def __ne__(self, other):
opposite = self.__eq__(other)
if opposite is NotImplemented:
return NotImplemented
return not opposite
def symmetric_difference(self, other):
newset = self.copy()
newset.symmetric_difference_update(other)
return newset
__xor__ = symmetric_difference
def symmetric_difference_update(self, other):
self.__ixor__(other)
def __ixor__(self, other):
if self._pending_removals:
self._commit_removals()
if self is other:
self.data.clear()
else:
self.data.symmetric_difference_update(ref(item, self._remove) for item in other)
return self
def union(self, other):
return self.__class__(e for s in (self, other) for e in s)
__or__ = union
def isdisjoint(self, other):
return len(self.intersection(other)) == 0

View File

@ -0,0 +1,185 @@
# Copyright 2007 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Abstract Base Classes (ABCs) according to PEP 3119."""
import types
from _weakrefset import WeakSet
# Instance of old-style class
class _C: pass
_InstanceType = type(_C())
def abstractmethod(funcobj):
"""A decorator indicating abstract methods.
Requires that the metaclass is ABCMeta or derived from it. A
class that has a metaclass derived from ABCMeta cannot be
instantiated unless all of its abstract methods are overridden.
The abstract methods can be called using any of the normal
'super' call mechanisms.
Usage:
class C:
__metaclass__ = ABCMeta
@abstractmethod
def my_abstract_method(self, ...):
...
"""
funcobj.__isabstractmethod__ = True
return funcobj
class abstractproperty(property):
"""A decorator indicating abstract properties.
Requires that the metaclass is ABCMeta or derived from it. A
class that has a metaclass derived from ABCMeta cannot be
instantiated unless all of its abstract properties are overridden.
The abstract properties can be called using any of the normal
'super' call mechanisms.
Usage:
class C:
__metaclass__ = ABCMeta
@abstractproperty
def my_abstract_property(self):
...
This defines a read-only property; you can also define a read-write
abstract property using the 'long' form of property declaration:
class C:
__metaclass__ = ABCMeta
def getx(self): ...
def setx(self, value): ...
x = abstractproperty(getx, setx)
"""
__isabstractmethod__ = True
class ABCMeta(type):
"""Metaclass for defining Abstract Base Classes (ABCs).
Use this metaclass to create an ABC. An ABC can be subclassed
directly, and then acts as a mix-in class. You can also register
unrelated concrete classes (even built-in classes) and unrelated
ABCs as 'virtual subclasses' -- these and their descendants will
be considered subclasses of the registering ABC by the built-in
issubclass() function, but the registering ABC won't show up in
their MRO (Method Resolution Order) nor will method
implementations defined by the registering ABC be callable (not
even via super()).
"""
# A global counter that is incremented each time a class is
# registered as a virtual subclass of anything. It forces the
# negative cache to be cleared before its next use.
_abc_invalidation_counter = 0
def __new__(mcls, name, bases, namespace):
cls = super(ABCMeta, mcls).__new__(mcls, name, bases, namespace)
# Compute set of abstract method names
abstracts = set(name
for name, value in namespace.items()
if getattr(value, "__isabstractmethod__", False))
for base in bases:
for name in getattr(base, "__abstractmethods__", set()):
value = getattr(cls, name, None)
if getattr(value, "__isabstractmethod__", False):
abstracts.add(name)
cls.__abstractmethods__ = frozenset(abstracts)
# Set up inheritance registry
cls._abc_registry = WeakSet()
cls._abc_cache = WeakSet()
cls._abc_negative_cache = WeakSet()
cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
return cls
def register(cls, subclass):
"""Register a virtual subclass of an ABC."""
if not isinstance(subclass, (type, types.ClassType)):
raise TypeError("Can only register classes")
if issubclass(subclass, cls):
return # Already a subclass
# Subtle: test for cycles *after* testing for "already a subclass";
# this means we allow X.register(X) and interpret it as a no-op.
if issubclass(cls, subclass):
# This would create a cycle, which is bad for the algorithm below
raise RuntimeError("Refusing to create an inheritance cycle")
cls._abc_registry.add(subclass)
ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache
def _dump_registry(cls, file=None):
"""Debug helper to print the ABC registry."""
print >> file, "Class: %s.%s" % (cls.__module__, cls.__name__)
print >> file, "Inv.counter: %s" % ABCMeta._abc_invalidation_counter
for name in sorted(cls.__dict__.keys()):
if name.startswith("_abc_"):
value = getattr(cls, name)
print >> file, "%s: %r" % (name, value)
def __instancecheck__(cls, instance):
"""Override for isinstance(instance, cls)."""
# Inline the cache checking when it's simple.
subclass = getattr(instance, '__class__', None)
if subclass is not None and subclass in cls._abc_cache:
return True
subtype = type(instance)
# Old-style instances
if subtype is _InstanceType:
subtype = subclass
if subtype is subclass or subclass is None:
if (cls._abc_negative_cache_version ==
ABCMeta._abc_invalidation_counter and
subtype in cls._abc_negative_cache):
return False
# Fall back to the subclass check.
return cls.__subclasscheck__(subtype)
return (cls.__subclasscheck__(subclass) or
cls.__subclasscheck__(subtype))
def __subclasscheck__(cls, subclass):
"""Override for issubclass(subclass, cls)."""
# Check cache
if subclass in cls._abc_cache:
return True
# Check negative cache; may have to invalidate
if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter:
# Invalidate the negative cache
cls._abc_negative_cache = WeakSet()
cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
elif subclass in cls._abc_negative_cache:
return False
# Check the subclass hook
ok = cls.__subclasshook__(subclass)
if ok is not NotImplemented:
assert isinstance(ok, bool)
if ok:
cls._abc_cache.add(subclass)
else:
cls._abc_negative_cache.add(subclass)
return ok
# Check if it's a direct subclass
if cls in getattr(subclass, '__mro__', ()):
cls._abc_cache.add(subclass)
return True
# Check if it's a subclass of a registered class (recursive)
for rcls in cls._abc_registry:
if issubclass(subclass, rcls):
cls._abc_cache.add(subclass)
return True
# Check if it's a subclass of a subclass (recursive)
for scls in cls.__subclasses__():
if issubclass(subclass, scls):
cls._abc_cache.add(subclass)
return True
# No dice; update negative cache
cls._abc_negative_cache.add(subclass)
return False

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,311 @@
# -*- coding: utf-8 -*-
"""
ast
~~~
The `ast` module helps Python applications to process trees of the Python
abstract syntax grammar. The abstract syntax itself might change with
each Python release; this module helps to find out programmatically what
the current grammar looks like and allows modifications of it.
An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
a flag to the `compile()` builtin function or by using the `parse()`
function from this module. The result will be a tree of objects whose
classes all inherit from `ast.AST`.
A modified abstract syntax tree can be compiled into a Python code object
using the built-in `compile()` function.
Additionally various helper functions are provided that make working with
the trees simpler. The main intention of the helper functions and this
module in general is to provide an easy to use interface for libraries
that work tightly with the python syntax (template engines for example).
:copyright: Copyright 2008 by Armin Ronacher.
:license: Python License.
"""
from _ast import *
from _ast import __version__
def parse(source, filename='<unknown>', mode='exec'):
"""
Parse the source into an AST node.
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
"""
return compile(source, filename, mode, PyCF_ONLY_AST)
def literal_eval(node_or_string):
"""
Safely evaluate an expression node or a string containing a Python
expression. The string or node provided may only consist of the following
Python literal structures: strings, numbers, tuples, lists, dicts, booleans,
and None.
"""
_safe_names = {'None': None, 'True': True, 'False': False}
if isinstance(node_or_string, basestring):
node_or_string = parse(node_or_string, mode='eval')
if isinstance(node_or_string, Expression):
node_or_string = node_or_string.body
def _convert(node):
if isinstance(node, Str):
return node.s
elif isinstance(node, Num):
return node.n
elif isinstance(node, Tuple):
return tuple(map(_convert, node.elts))
elif isinstance(node, List):
return list(map(_convert, node.elts))
elif isinstance(node, Dict):
return dict((_convert(k), _convert(v)) for k, v
in zip(node.keys, node.values))
elif isinstance(node, Name):
if node.id in _safe_names:
return _safe_names[node.id]
elif isinstance(node, BinOp) and \
isinstance(node.op, (Add, Sub)) and \
isinstance(node.right, Num) and \
isinstance(node.right.n, complex) and \
isinstance(node.left, Num) and \
isinstance(node.left.n, (int, long, float)):
left = node.left.n
right = node.right.n
if isinstance(node.op, Add):
return left + right
else:
return left - right
raise ValueError('malformed string')
return _convert(node_or_string)
def dump(node, annotate_fields=True, include_attributes=False):
"""
Return a formatted dump of the tree in *node*. This is mainly useful for
debugging purposes. The returned string will show the names and the values
for fields. This makes the code impossible to evaluate, so if evaluation is
wanted *annotate_fields* must be set to False. Attributes such as line
numbers and column offsets are not dumped by default. If this is wanted,
*include_attributes* can be set to True.
"""
def _format(node):
if isinstance(node, AST):
fields = [(a, _format(b)) for a, b in iter_fields(node)]
rv = '%s(%s' % (node.__class__.__name__, ', '.join(
('%s=%s' % field for field in fields)
if annotate_fields else
(b for a, b in fields)
))
if include_attributes and node._attributes:
rv += fields and ', ' or ' '
rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
for a in node._attributes)
return rv + ')'
elif isinstance(node, list):
return '[%s]' % ', '.join(_format(x) for x in node)
return repr(node)
if not isinstance(node, AST):
raise TypeError('expected AST, got %r' % node.__class__.__name__)
return _format(node)
def copy_location(new_node, old_node):
"""
Copy source location (`lineno` and `col_offset` attributes) from
*old_node* to *new_node* if possible, and return *new_node*.
"""
for attr in 'lineno', 'col_offset':
if attr in old_node._attributes and attr in new_node._attributes \
and hasattr(old_node, attr):
setattr(new_node, attr, getattr(old_node, attr))
return new_node
def fix_missing_locations(node):
"""
When you compile a node tree with compile(), the compiler expects lineno and
col_offset attributes for every node that supports them. This is rather
tedious to fill in for generated nodes, so this helper adds these attributes
recursively where not already set, by setting them to the values of the
parent node. It works recursively starting at *node*.
"""
def _fix(node, lineno, col_offset):
if 'lineno' in node._attributes:
if not hasattr(node, 'lineno'):
node.lineno = lineno
else:
lineno = node.lineno
if 'col_offset' in node._attributes:
if not hasattr(node, 'col_offset'):
node.col_offset = col_offset
else:
col_offset = node.col_offset
for child in iter_child_nodes(node):
_fix(child, lineno, col_offset)
_fix(node, 1, 0)
return node
def increment_lineno(node, n=1):
"""
Increment the line number of each node in the tree starting at *node* by *n*.
This is useful to "move code" to a different location in a file.
"""
for child in walk(node):
if 'lineno' in child._attributes:
child.lineno = getattr(child, 'lineno', 0) + n
return node
def iter_fields(node):
"""
Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
that is present on *node*.
"""
for field in node._fields:
try:
yield field, getattr(node, field)
except AttributeError:
pass
def iter_child_nodes(node):
"""
Yield all direct child nodes of *node*, that is, all fields that are nodes
and all items of fields that are lists of nodes.
"""
for name, field in iter_fields(node):
if isinstance(field, AST):
yield field
elif isinstance(field, list):
for item in field:
if isinstance(item, AST):
yield item
def get_docstring(node, clean=True):
"""
Return the docstring for the given node or None if no docstring can
be found. If the node provided does not have docstrings a TypeError
will be raised.
"""
if not isinstance(node, (FunctionDef, ClassDef, Module)):
raise TypeError("%r can't have docstrings" % node.__class__.__name__)
if node.body and isinstance(node.body[0], Expr) and \
isinstance(node.body[0].value, Str):
if clean:
import inspect
return inspect.cleandoc(node.body[0].value.s)
return node.body[0].value.s
def walk(node):
"""
Recursively yield all descendant nodes in the tree starting at *node*
(including *node* itself), in no specified order. This is useful if you
only want to modify nodes in place and don't care about the context.
"""
from collections import deque
todo = deque([node])
while todo:
node = todo.popleft()
todo.extend(iter_child_nodes(node))
yield node
class NodeVisitor(object):
"""
A node visitor base class that walks the abstract syntax tree and calls a
visitor function for every node found. This function may return a value
which is forwarded by the `visit` method.
This class is meant to be subclassed, with the subclass adding visitor
methods.
Per default the visitor functions for the nodes are ``'visit_'`` +
class name of the node. So a `TryFinally` node visit function would
be `visit_TryFinally`. This behavior can be changed by overriding
the `visit` method. If no visitor function exists for a node
(return value `None`) the `generic_visit` visitor is used instead.
Don't use the `NodeVisitor` if you want to apply changes to nodes during
traversing. For this a special visitor exists (`NodeTransformer`) that
allows modifications.
"""
def visit(self, node):
"""Visit a node."""
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
return visitor(node)
def generic_visit(self, node):
"""Called if no explicit visitor function exists for a node."""
for field, value in iter_fields(node):
if isinstance(value, list):
for item in value:
if isinstance(item, AST):
self.visit(item)
elif isinstance(value, AST):
self.visit(value)
class NodeTransformer(NodeVisitor):
"""
A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
allows modification of nodes.
The `NodeTransformer` will walk the AST and use the return value of the
visitor methods to replace or remove the old node. If the return value of
the visitor method is ``None``, the node will be removed from its location,
otherwise it is replaced with the return value. The return value may be the
original node in which case no replacement takes place.
Here is an example transformer that rewrites all occurrences of name lookups
(``foo``) to ``data['foo']``::
class RewriteName(NodeTransformer):
def visit_Name(self, node):
return copy_location(Subscript(
value=Name(id='data', ctx=Load()),
slice=Index(value=Str(s=node.id)),
ctx=node.ctx
), node)
Keep in mind that if the node you're operating on has child nodes you must
either transform the child nodes yourself or call the :meth:`generic_visit`
method for the node first.
For nodes that were part of a collection of statements (that applies to all
statement nodes), the visitor may also return a list of nodes rather than
just a single node.
Usually you use the transformer like this::
node = YourTransformer().visit(node)
"""
def generic_visit(self, node):
for field, old_value in iter_fields(node):
old_value = getattr(node, field, None)
if isinstance(old_value, list):
new_values = []
for value in old_value:
if isinstance(value, AST):
value = self.visit(value)
if value is None:
continue
elif not isinstance(value, AST):
new_values.extend(value)
continue
new_values.append(value)
old_value[:] = new_values
elif isinstance(old_value, AST):
new_node = self.visit(old_value)
if new_node is None:
delattr(node, field)
else:
setattr(node, field, new_node)
return node

View File

@ -0,0 +1,65 @@
"""
atexit.py - allow programmer to define multiple exit functions to be executed
upon normal program termination.
One public function, register, is defined.
"""
__all__ = ["register"]
import sys
_exithandlers = []
def _run_exitfuncs():
"""run any registered exit functions
_exithandlers is traversed in reverse order so functions are executed
last in, first out.
"""
exc_info = None
while _exithandlers:
func, targs, kargs = _exithandlers.pop()
try:
func(*targs, **kargs)
except SystemExit:
exc_info = sys.exc_info()
except:
import traceback
print >> sys.stderr, "Error in atexit._run_exitfuncs:"
traceback.print_exc()
exc_info = sys.exc_info()
if exc_info is not None:
raise exc_info[0], exc_info[1], exc_info[2]
def register(func, *targs, **kargs):
"""register a function to be executed upon normal program termination
func - function to be called at exit
targs - optional arguments to pass to func
kargs - optional keyword arguments to pass to func
func is returned to facilitate usage as a decorator.
"""
_exithandlers.append((func, targs, kargs))
return func
if hasattr(sys, "exitfunc"):
# Assume it's another registered exit function - append it to our list
register(sys.exitfunc)
sys.exitfunc = _run_exitfuncs
if __name__ == "__main__":
def x1():
print "running x1"
def x2(n):
print "running x2(%r)" % (n,)
def x3(n, kwd=None):
print "running x3(%r, kwd=%r)" % (n, kwd)
register(x1)
register(x2, 12)
register(x3, 5, "bar")
register(x3, "no kwd args")

View File

@ -0,0 +1,518 @@
"""Macintosh binhex compression/decompression.
easy interface:
binhex(inputfilename, outputfilename)
hexbin(inputfilename, outputfilename)
"""
#
# Jack Jansen, CWI, August 1995.
#
# The module is supposed to be as compatible as possible. Especially the
# easy interface should work "as expected" on any platform.
# XXXX Note: currently, textfiles appear in mac-form on all platforms.
# We seem to lack a simple character-translate in python.
# (we should probably use ISO-Latin-1 on all but the mac platform).
# XXXX The simple routines are too simple: they expect to hold the complete
# files in-core. Should be fixed.
# XXXX It would be nice to handle AppleDouble format on unix
# (for servers serving macs).
# XXXX I don't understand what happens when you get 0x90 times the same byte on
# input. The resulting code (xx 90 90) would appear to be interpreted as an
# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
#
import sys
import os
import struct
import binascii
__all__ = ["binhex","hexbin","Error"]
class Error(Exception):
pass
# States (what have we written)
_DID_HEADER = 0
_DID_DATA = 1
# Various constants
REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder
LINELEN=64
RUNCHAR=chr(0x90) # run-length introducer
#
# This code is no longer byte-order dependent
#
# Workarounds for non-mac machines.
try:
from Carbon.File import FSSpec, FInfo
from MacOS import openrf
def getfileinfo(name):
finfo = FSSpec(name).FSpGetFInfo()
dir, file = os.path.split(name)
# XXX Get resource/data sizes
fp = open(name, 'rb')
fp.seek(0, 2)
dlen = fp.tell()
fp = openrf(name, '*rb')
fp.seek(0, 2)
rlen = fp.tell()
return file, finfo, dlen, rlen
def openrsrc(name, *mode):
if not mode:
mode = '*rb'
else:
mode = '*' + mode[0]
return openrf(name, mode)
except ImportError:
#
# Glue code for non-macintosh usage
#
class FInfo:
def __init__(self):
self.Type = '????'
self.Creator = '????'
self.Flags = 0
def getfileinfo(name):
finfo = FInfo()
# Quick check for textfile
fp = open(name)
data = open(name).read(256)
for c in data:
if not c.isspace() and (c<' ' or ord(c) > 0x7f):
break
else:
finfo.Type = 'TEXT'
fp.seek(0, 2)
dsize = fp.tell()
fp.close()
dir, file = os.path.split(name)
file = file.replace(':', '-', 1)
return file, finfo, dsize, 0
class openrsrc:
def __init__(self, *args):
pass
def read(self, *args):
return ''
def write(self, *args):
pass
def close(self):
pass
class _Hqxcoderengine:
"""Write data to the coder in 3-byte chunks"""
def __init__(self, ofp):
self.ofp = ofp
self.data = ''
self.hqxdata = ''
self.linelen = LINELEN-1
def write(self, data):
self.data = self.data + data
datalen = len(self.data)
todo = (datalen//3)*3
data = self.data[:todo]
self.data = self.data[todo:]
if not data:
return
self.hqxdata = self.hqxdata + binascii.b2a_hqx(data)
self._flush(0)
def _flush(self, force):
first = 0
while first <= len(self.hqxdata)-self.linelen:
last = first + self.linelen
self.ofp.write(self.hqxdata[first:last]+'\n')
self.linelen = LINELEN
first = last
self.hqxdata = self.hqxdata[first:]
if force:
self.ofp.write(self.hqxdata + ':\n')
def close(self):
if self.data:
self.hqxdata = \
self.hqxdata + binascii.b2a_hqx(self.data)
self._flush(1)
self.ofp.close()
del self.ofp
class _Rlecoderengine:
"""Write data to the RLE-coder in suitably large chunks"""
def __init__(self, ofp):
self.ofp = ofp
self.data = ''
def write(self, data):
self.data = self.data + data
if len(self.data) < REASONABLY_LARGE:
return
rledata = binascii.rlecode_hqx(self.data)
self.ofp.write(rledata)
self.data = ''
def close(self):
if self.data:
rledata = binascii.rlecode_hqx(self.data)
self.ofp.write(rledata)
self.ofp.close()
del self.ofp
class BinHex:
def __init__(self, name_finfo_dlen_rlen, ofp):
name, finfo, dlen, rlen = name_finfo_dlen_rlen
if type(ofp) == type(''):
ofname = ofp
ofp = open(ofname, 'w')
ofp.write('(This file must be converted with BinHex 4.0)\n\n:')
hqxer = _Hqxcoderengine(ofp)
self.ofp = _Rlecoderengine(hqxer)
self.crc = 0
if finfo is None:
finfo = FInfo()
self.dlen = dlen
self.rlen = rlen
self._writeinfo(name, finfo)
self.state = _DID_HEADER
def _writeinfo(self, name, finfo):
nl = len(name)
if nl > 63:
raise Error, 'Filename too long'
d = chr(nl) + name + '\0'
d2 = finfo.Type + finfo.Creator
# Force all structs to be packed with big-endian
d3 = struct.pack('>h', finfo.Flags)
d4 = struct.pack('>ii', self.dlen, self.rlen)
info = d + d2 + d3 + d4
self._write(info)
self._writecrc()
def _write(self, data):
self.crc = binascii.crc_hqx(data, self.crc)
self.ofp.write(data)
def _writecrc(self):
# XXXX Should this be here??
# self.crc = binascii.crc_hqx('\0\0', self.crc)
if self.crc < 0:
fmt = '>h'
else:
fmt = '>H'
self.ofp.write(struct.pack(fmt, self.crc))
self.crc = 0
def write(self, data):
if self.state != _DID_HEADER:
raise Error, 'Writing data at the wrong time'
self.dlen = self.dlen - len(data)
self._write(data)
def close_data(self):
if self.dlen != 0:
raise Error, 'Incorrect data size, diff=%r' % (self.rlen,)
self._writecrc()
self.state = _DID_DATA
def write_rsrc(self, data):
if self.state < _DID_DATA:
self.close_data()
if self.state != _DID_DATA:
raise Error, 'Writing resource data at the wrong time'
self.rlen = self.rlen - len(data)
self._write(data)
def close(self):
if self.state is None:
return
try:
if self.state < _DID_DATA:
self.close_data()
if self.state != _DID_DATA:
raise Error, 'Close at the wrong time'
if self.rlen != 0:
raise Error, \
"Incorrect resource-datasize, diff=%r" % (self.rlen,)
self._writecrc()
finally:
self.state = None
ofp = self.ofp
del self.ofp
ofp.close()
def binhex(inp, out):
"""(infilename, outfilename) - Create binhex-encoded copy of a file"""
finfo = getfileinfo(inp)
ofp = BinHex(finfo, out)
ifp = open(inp, 'rb')
# XXXX Do textfile translation on non-mac systems
while 1:
d = ifp.read(128000)
if not d: break
ofp.write(d)
ofp.close_data()
ifp.close()
ifp = openrsrc(inp, 'rb')
while 1:
d = ifp.read(128000)
if not d: break
ofp.write_rsrc(d)
ofp.close()
ifp.close()
class _Hqxdecoderengine:
"""Read data via the decoder in 4-byte chunks"""
def __init__(self, ifp):
self.ifp = ifp
self.eof = 0
def read(self, totalwtd):
"""Read at least wtd bytes (or until EOF)"""
decdata = ''
wtd = totalwtd
#
# The loop here is convoluted, since we don't really now how
# much to decode: there may be newlines in the incoming data.
while wtd > 0:
if self.eof: return decdata
wtd = ((wtd+2)//3)*4
data = self.ifp.read(wtd)
#
# Next problem: there may not be a complete number of
# bytes in what we pass to a2b. Solve by yet another
# loop.
#
while 1:
try:
decdatacur, self.eof = \
binascii.a2b_hqx(data)
break
except binascii.Incomplete:
pass
newdata = self.ifp.read(1)
if not newdata:
raise Error, \
'Premature EOF on binhex file'
data = data + newdata
decdata = decdata + decdatacur
wtd = totalwtd - len(decdata)
if not decdata and not self.eof:
raise Error, 'Premature EOF on binhex file'
return decdata
def close(self):
self.ifp.close()
class _Rledecoderengine:
"""Read data via the RLE-coder"""
def __init__(self, ifp):
self.ifp = ifp
self.pre_buffer = ''
self.post_buffer = ''
self.eof = 0
def read(self, wtd):
if wtd > len(self.post_buffer):
self._fill(wtd-len(self.post_buffer))
rv = self.post_buffer[:wtd]
self.post_buffer = self.post_buffer[wtd:]
return rv
def _fill(self, wtd):
self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4)
if self.ifp.eof:
self.post_buffer = self.post_buffer + \
binascii.rledecode_hqx(self.pre_buffer)
self.pre_buffer = ''
return
#
# Obfuscated code ahead. We have to take care that we don't
# end up with an orphaned RUNCHAR later on. So, we keep a couple
# of bytes in the buffer, depending on what the end of
# the buffer looks like:
# '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0)
# '?\220' - Keep 2 bytes: repeated something-else
# '\220\0' - Escaped \220: Keep 2 bytes.
# '?\220?' - Complete repeat sequence: decode all
# otherwise: keep 1 byte.
#
mark = len(self.pre_buffer)
if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR:
mark = mark - 3
elif self.pre_buffer[-1] == RUNCHAR:
mark = mark - 2
elif self.pre_buffer[-2:] == RUNCHAR + '\0':
mark = mark - 2
elif self.pre_buffer[-2] == RUNCHAR:
pass # Decode all
else:
mark = mark - 1
self.post_buffer = self.post_buffer + \
binascii.rledecode_hqx(self.pre_buffer[:mark])
self.pre_buffer = self.pre_buffer[mark:]
def close(self):
self.ifp.close()
class HexBin:
def __init__(self, ifp):
if type(ifp) == type(''):
ifp = open(ifp)
#
# Find initial colon.
#
while 1:
ch = ifp.read(1)
if not ch:
raise Error, "No binhex data found"
# Cater for \r\n terminated lines (which show up as \n\r, hence
# all lines start with \r)
if ch == '\r':
continue
if ch == ':':
break
if ch != '\n':
dummy = ifp.readline()
hqxifp = _Hqxdecoderengine(ifp)
self.ifp = _Rledecoderengine(hqxifp)
self.crc = 0
self._readheader()
def _read(self, len):
data = self.ifp.read(len)
self.crc = binascii.crc_hqx(data, self.crc)
return data
def _checkcrc(self):
filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff
#self.crc = binascii.crc_hqx('\0\0', self.crc)
# XXXX Is this needed??
self.crc = self.crc & 0xffff
if filecrc != self.crc:
raise Error, 'CRC error, computed %x, read %x' \
%(self.crc, filecrc)
self.crc = 0
def _readheader(self):
len = self._read(1)
fname = self._read(ord(len))
rest = self._read(1+4+4+2+4+4)
self._checkcrc()
type = rest[1:5]
creator = rest[5:9]
flags = struct.unpack('>h', rest[9:11])[0]
self.dlen = struct.unpack('>l', rest[11:15])[0]
self.rlen = struct.unpack('>l', rest[15:19])[0]
self.FName = fname
self.FInfo = FInfo()
self.FInfo.Creator = creator
self.FInfo.Type = type
self.FInfo.Flags = flags
self.state = _DID_HEADER
def read(self, *n):
if self.state != _DID_HEADER:
raise Error, 'Read data at wrong time'
if n:
n = n[0]
n = min(n, self.dlen)
else:
n = self.dlen
rv = ''
while len(rv) < n:
rv = rv + self._read(n-len(rv))
self.dlen = self.dlen - n
return rv
def close_data(self):
if self.state != _DID_HEADER:
raise Error, 'close_data at wrong time'
if self.dlen:
dummy = self._read(self.dlen)
self._checkcrc()
self.state = _DID_DATA
def read_rsrc(self, *n):
if self.state == _DID_HEADER:
self.close_data()
if self.state != _DID_DATA:
raise Error, 'Read resource data at wrong time'
if n:
n = n[0]
n = min(n, self.rlen)
else:
n = self.rlen
self.rlen = self.rlen - n
return self._read(n)
def close(self):
if self.state is None:
return
try:
if self.rlen:
dummy = self.read_rsrc(self.rlen)
self._checkcrc()
finally:
self.state = None
self.ifp.close()
def hexbin(inp, out):
"""(infilename, outfilename) - Decode binhexed file"""
ifp = HexBin(inp)
finfo = ifp.FInfo
if not out:
out = ifp.FName
ofp = open(out, 'wb')
# XXXX Do translation on non-mac systems
while 1:
d = ifp.read(128000)
if not d: break
ofp.write(d)
ofp.close()
ifp.close_data()
d = ifp.read_rsrc(128000)
if d:
ofp = openrsrc(out, 'wb')
ofp.write(d)
while 1:
d = ifp.read_rsrc(128000)
if not d: break
ofp.write(d)
ofp.close()
ifp.close()
def _test():
fname = sys.argv[1]
binhex(fname, fname+'.hqx')
hexbin(fname+'.hqx', fname+'.viahqx')
#hexbin(fname, fname+'.unpacked')
sys.exit(1)
if __name__ == '__main__':
_test()

View File

@ -0,0 +1,92 @@
"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the right of the rightmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e <= x, and all e in
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the left of the leftmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
insert just before the leftmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
return lo
# Overwrite above definitions with a fast C implementation
try:
from _bisect import *
except ImportError:
pass

View File

@ -0,0 +1,713 @@
"""Calendar printing functions
Note when comparing these calendars to the ones printed by cal(1): By
default, these calendars have Monday as the first day of the week, and
Sunday as the last (the European convention). Use setfirstweekday() to
set the first day of the week (0=Monday, 6=Sunday)."""
import sys
import datetime
import locale as _locale
__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday",
"firstweekday", "isleap", "leapdays", "weekday", "monthrange",
"monthcalendar", "prmonth", "month", "prcal", "calendar",
"timegm", "month_name", "month_abbr", "day_name", "day_abbr"]
# Exception raised for bad input (with string parameter for details)
error = ValueError
# Exceptions raised for bad input
class IllegalMonthError(ValueError):
def __init__(self, month):
self.month = month
def __str__(self):
return "bad month number %r; must be 1-12" % self.month
class IllegalWeekdayError(ValueError):
def __init__(self, weekday):
self.weekday = weekday
def __str__(self):
return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday
# Constants for months referenced later
January = 1
February = 2
# Number of days per month (except for February in leap years)
mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
# This module used to have hard-coded lists of day and month names, as
# English strings. The classes following emulate a read-only version of
# that, but supply localized names. Note that the values are computed
# fresh on each call, in case the user changes locale between calls.
class _localized_month:
_months = [datetime.date(2001, i+1, 1).strftime for i in range(12)]
_months.insert(0, lambda x: "")
def __init__(self, format):
self.format = format
def __getitem__(self, i):
funcs = self._months[i]
if isinstance(i, slice):
return [f(self.format) for f in funcs]
else:
return funcs(self.format)
def __len__(self):
return 13
class _localized_day:
# January 1, 2001, was a Monday.
_days = [datetime.date(2001, 1, i+1).strftime for i in range(7)]
def __init__(self, format):
self.format = format
def __getitem__(self, i):
funcs = self._days[i]
if isinstance(i, slice):
return [f(self.format) for f in funcs]
else:
return funcs(self.format)
def __len__(self):
return 7
# Full and abbreviated names of weekdays
day_name = _localized_day('%A')
day_abbr = _localized_day('%a')
# Full and abbreviated names of months (1-based arrays!!!)
month_name = _localized_month('%B')
month_abbr = _localized_month('%b')
# Constants for weekdays
(MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7)
def isleap(year):
"""Return True for leap years, False for non-leap years."""
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
def leapdays(y1, y2):
"""Return number of leap years in range [y1, y2).
Assume y1 <= y2."""
y1 -= 1
y2 -= 1
return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400)
def weekday(year, month, day):
"""Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12),
day (1-31)."""
return datetime.date(year, month, day).weekday()
def monthrange(year, month):
"""Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for
year, month."""
if not 1 <= month <= 12:
raise IllegalMonthError(month)
day1 = weekday(year, month, 1)
ndays = mdays[month] + (month == February and isleap(year))
return day1, ndays
class Calendar(object):
"""
Base calendar class. This class doesn't do any formatting. It simply
provides data to subclasses.
"""
def __init__(self, firstweekday=0):
self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday
def getfirstweekday(self):
return self._firstweekday % 7
def setfirstweekday(self, firstweekday):
self._firstweekday = firstweekday
firstweekday = property(getfirstweekday, setfirstweekday)
def iterweekdays(self):
"""
Return a iterator for one week of weekday numbers starting with the
configured first one.
"""
for i in range(self.firstweekday, self.firstweekday + 7):
yield i%7
def itermonthdates(self, year, month):
"""
Return an iterator for one month. The iterator will yield datetime.date
values and will always iterate through complete weeks, so it will yield
dates outside the specified month.
"""
date = datetime.date(year, month, 1)
# Go back to the beginning of the week
days = (date.weekday() - self.firstweekday) % 7
date -= datetime.timedelta(days=days)
oneday = datetime.timedelta(days=1)
while True:
yield date
try:
date += oneday
except OverflowError:
# Adding one day could fail after datetime.MAXYEAR
break
if date.month != month and date.weekday() == self.firstweekday:
break
def itermonthdays2(self, year, month):
"""
Like itermonthdates(), but will yield (day number, weekday number)
tuples. For days outside the specified month the day number is 0.
"""
for date in self.itermonthdates(year, month):
if date.month != month:
yield (0, date.weekday())
else:
yield (date.day, date.weekday())
def itermonthdays(self, year, month):
"""
Like itermonthdates(), but will yield day numbers. For days outside
the specified month the day number is 0.
"""
for date in self.itermonthdates(year, month):
if date.month != month:
yield 0
else:
yield date.day
def monthdatescalendar(self, year, month):
"""
Return a matrix (list of lists) representing a month's calendar.
Each row represents a week; week entries are datetime.date values.
"""
dates = list(self.itermonthdates(year, month))
return [ dates[i:i+7] for i in range(0, len(dates), 7) ]
def monthdays2calendar(self, year, month):
"""
Return a matrix representing a month's calendar.
Each row represents a week; week entries are
(day number, weekday number) tuples. Day numbers outside this month
are zero.
"""
days = list(self.itermonthdays2(year, month))
return [ days[i:i+7] for i in range(0, len(days), 7) ]
def monthdayscalendar(self, year, month):
"""
Return a matrix representing a month's calendar.
Each row represents a week; days outside this month are zero.
"""
days = list(self.itermonthdays(year, month))
return [ days[i:i+7] for i in range(0, len(days), 7) ]
def yeardatescalendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting. The return
value is a list of month rows. Each month row contains up to width months.
Each month contains between 4 and 6 weeks and each week contains 1-7
days. Days are datetime.date objects.
"""
months = [
self.monthdatescalendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
def yeardays2calendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting (similar to
yeardatescalendar()). Entries in the week lists are
(day number, weekday number) tuples. Day numbers outside this month are
zero.
"""
months = [
self.monthdays2calendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
def yeardayscalendar(self, year, width=3):
"""
Return the data for the specified year ready for formatting (similar to
yeardatescalendar()). Entries in the week lists are day numbers.
Day numbers outside this month are zero.
"""
months = [
self.monthdayscalendar(year, i)
for i in range(January, January+12)
]
return [months[i:i+width] for i in range(0, len(months), width) ]
class TextCalendar(Calendar):
"""
Subclass of Calendar that outputs a calendar as a simple plain text
similar to the UNIX program cal.
"""
def prweek(self, theweek, width):
"""
Print a single week (no newline).
"""
print self.formatweek(theweek, width),
def formatday(self, day, weekday, width):
"""
Returns a formatted day.
"""
if day == 0:
s = ''
else:
s = '%2i' % day # right-align single-digit days
return s.center(width)
def formatweek(self, theweek, width):
"""
Returns a single week in a string (no newline).
"""
return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek)
def formatweekday(self, day, width):
"""
Returns a formatted week day name.
"""
if width >= 9:
names = day_name
else:
names = day_abbr
return names[day][:width].center(width)
def formatweekheader(self, width):
"""
Return a header for a week.
"""
return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays())
def formatmonthname(self, theyear, themonth, width, withyear=True):
"""
Return a formatted month name.
"""
s = month_name[themonth]
if withyear:
s = "%s %r" % (s, theyear)
return s.center(width)
def prmonth(self, theyear, themonth, w=0, l=0):
"""
Print a month's calendar.
"""
print self.formatmonth(theyear, themonth, w, l),
def formatmonth(self, theyear, themonth, w=0, l=0):
"""
Return a month's calendar string (multi-line).
"""
w = max(2, w)
l = max(1, l)
s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1)
s = s.rstrip()
s += '\n' * l
s += self.formatweekheader(w).rstrip()
s += '\n' * l
for week in self.monthdays2calendar(theyear, themonth):
s += self.formatweek(week, w).rstrip()
s += '\n' * l
return s
def formatyear(self, theyear, w=2, l=1, c=6, m=3):
"""
Returns a year's calendar as a multi-line string.
"""
w = max(2, w)
l = max(1, l)
c = max(2, c)
colwidth = (w + 1) * 7 - 1
v = []
a = v.append
a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip())
a('\n'*l)
header = self.formatweekheader(w)
for (i, row) in enumerate(self.yeardays2calendar(theyear, m)):
# months in this row
months = range(m*i+1, min(m*(i+1)+1, 13))
a('\n'*l)
names = (self.formatmonthname(theyear, k, colwidth, False)
for k in months)
a(formatstring(names, colwidth, c).rstrip())
a('\n'*l)
headers = (header for k in months)
a(formatstring(headers, colwidth, c).rstrip())
a('\n'*l)
# max number of weeks for this row
height = max(len(cal) for cal in row)
for j in range(height):
weeks = []
for cal in row:
if j >= len(cal):
weeks.append('')
else:
weeks.append(self.formatweek(cal[j], w))
a(formatstring(weeks, colwidth, c).rstrip())
a('\n' * l)
return ''.join(v)
def pryear(self, theyear, w=0, l=0, c=6, m=3):
"""Print a year's calendar."""
print self.formatyear(theyear, w, l, c, m)
class HTMLCalendar(Calendar):
"""
This calendar returns complete HTML pages.
"""
# CSS classes for the day <td>s
cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
def formatday(self, day, weekday):
"""
Return a day as a table cell.
"""
if day == 0:
return '<td class="noday">&nbsp;</td>' # day outside month
else:
return '<td class="%s">%d</td>' % (self.cssclasses[weekday], day)
def formatweek(self, theweek):
"""
Return a complete week as a table row.
"""
s = ''.join(self.formatday(d, wd) for (d, wd) in theweek)
return '<tr>%s</tr>' % s
def formatweekday(self, day):
"""
Return a weekday name as a table header.
"""
return '<th class="%s">%s</th>' % (self.cssclasses[day], day_abbr[day])
def formatweekheader(self):
"""
Return a header for a week as a table row.
"""
s = ''.join(self.formatweekday(i) for i in self.iterweekdays())
return '<tr>%s</tr>' % s
def formatmonthname(self, theyear, themonth, withyear=True):
"""
Return a month name as a table row.
"""
if withyear:
s = '%s %s' % (month_name[themonth], theyear)
else:
s = '%s' % month_name[themonth]
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
def formatmonth(self, theyear, themonth, withyear=True):
"""
Return a formatted month as a table.
"""
v = []
a = v.append
a('<table border="0" cellpadding="0" cellspacing="0" class="month">')
a('\n')
a(self.formatmonthname(theyear, themonth, withyear=withyear))
a('\n')
a(self.formatweekheader())
a('\n')
for week in self.monthdays2calendar(theyear, themonth):
a(self.formatweek(week))
a('\n')
a('</table>')
a('\n')
return ''.join(v)
def formatyear(self, theyear, width=3):
"""
Return a formatted year as a table of tables.
"""
v = []
a = v.append
width = max(width, 1)
a('<table border="0" cellpadding="0" cellspacing="0" class="year">')
a('\n')
a('<tr><th colspan="%d" class="year">%s</th></tr>' % (width, theyear))
for i in range(January, January+12, width):
# months in this row
months = range(i, min(i+width, 13))
a('<tr>')
for m in months:
a('<td>')
a(self.formatmonth(theyear, m, withyear=False))
a('</td>')
a('</tr>')
a('</table>')
return ''.join(v)
def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None):
"""
Return a formatted year as a complete HTML page.
"""
if encoding is None:
encoding = sys.getdefaultencoding()
v = []
a = v.append
a('<?xml version="1.0" encoding="%s"?>\n' % encoding)
a('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n')
a('<html>\n')
a('<head>\n')
a('<meta http-equiv="Content-Type" content="text/html; charset=%s" />\n' % encoding)
if css is not None:
a('<link rel="stylesheet" type="text/css" href="%s" />\n' % css)
a('<title>Calendar for %d</title>\n' % theyear)
a('</head>\n')
a('<body>\n')
a(self.formatyear(theyear, width))
a('</body>\n')
a('</html>\n')
return ''.join(v).encode(encoding, "xmlcharrefreplace")
class TimeEncoding:
def __init__(self, locale):
self.locale = locale
def __enter__(self):
self.oldlocale = _locale.getlocale(_locale.LC_TIME)
_locale.setlocale(_locale.LC_TIME, self.locale)
return _locale.getlocale(_locale.LC_TIME)[1]
def __exit__(self, *args):
_locale.setlocale(_locale.LC_TIME, self.oldlocale)
class LocaleTextCalendar(TextCalendar):
"""
This class can be passed a locale name in the constructor and will return
month and weekday names in the specified locale. If this locale includes
an encoding all strings containing month and weekday names will be returned
as unicode.
"""
def __init__(self, firstweekday=0, locale=None):
TextCalendar.__init__(self, firstweekday)
if locale is None:
locale = _locale.getdefaultlocale()
self.locale = locale
def formatweekday(self, day, width):
with TimeEncoding(self.locale) as encoding:
if width >= 9:
names = day_name
else:
names = day_abbr
name = names[day]
if encoding is not None:
name = name.decode(encoding)
return name[:width].center(width)
def formatmonthname(self, theyear, themonth, width, withyear=True):
with TimeEncoding(self.locale) as encoding:
s = month_name[themonth]
if encoding is not None:
s = s.decode(encoding)
if withyear:
s = "%s %r" % (s, theyear)
return s.center(width)
class LocaleHTMLCalendar(HTMLCalendar):
"""
This class can be passed a locale name in the constructor and will return
month and weekday names in the specified locale. If this locale includes
an encoding all strings containing month and weekday names will be returned
as unicode.
"""
def __init__(self, firstweekday=0, locale=None):
HTMLCalendar.__init__(self, firstweekday)
if locale is None:
locale = _locale.getdefaultlocale()
self.locale = locale
def formatweekday(self, day):
with TimeEncoding(self.locale) as encoding:
s = day_abbr[day]
if encoding is not None:
s = s.decode(encoding)
return '<th class="%s">%s</th>' % (self.cssclasses[day], s)
def formatmonthname(self, theyear, themonth, withyear=True):
with TimeEncoding(self.locale) as encoding:
s = month_name[themonth]
if encoding is not None:
s = s.decode(encoding)
if withyear:
s = '%s %s' % (s, theyear)
return '<tr><th colspan="7" class="month">%s</th></tr>' % s
# Support for old module level interface
c = TextCalendar()
firstweekday = c.getfirstweekday
def setfirstweekday(firstweekday):
try:
firstweekday.__index__
except AttributeError:
raise IllegalWeekdayError(firstweekday)
if not MONDAY <= firstweekday <= SUNDAY:
raise IllegalWeekdayError(firstweekday)
c.firstweekday = firstweekday
monthcalendar = c.monthdayscalendar
prweek = c.prweek
week = c.formatweek
weekheader = c.formatweekheader
prmonth = c.prmonth
month = c.formatmonth
calendar = c.formatyear
prcal = c.pryear
# Spacing of month columns for multi-column year calendar
_colwidth = 7*3 - 1 # Amount printed by prweek()
_spacing = 6 # Number of spaces between columns
def format(cols, colwidth=_colwidth, spacing=_spacing):
"""Prints multi-column formatting for year calendars"""
print formatstring(cols, colwidth, spacing)
def formatstring(cols, colwidth=_colwidth, spacing=_spacing):
"""Returns a string formatted from n strings, centered within n columns."""
spacing *= ' '
return spacing.join(c.center(colwidth) for c in cols)
EPOCH = 1970
_EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal()
def timegm(tuple):
"""Unrelated but handy function to calculate Unix timestamp from GMT."""
year, month, day, hour, minute, second = tuple[:6]
days = datetime.date(year, month, 1).toordinal() - _EPOCH_ORD + day - 1
hours = days*24 + hour
minutes = hours*60 + minute
seconds = minutes*60 + second
return seconds
def main(args):
import optparse
parser = optparse.OptionParser(usage="usage: %prog [options] [year [month]]")
parser.add_option(
"-w", "--width",
dest="width", type="int", default=2,
help="width of date column (default 2, text only)"
)
parser.add_option(
"-l", "--lines",
dest="lines", type="int", default=1,
help="number of lines for each week (default 1, text only)"
)
parser.add_option(
"-s", "--spacing",
dest="spacing", type="int", default=6,
help="spacing between months (default 6, text only)"
)
parser.add_option(
"-m", "--months",
dest="months", type="int", default=3,
help="months per row (default 3, text only)"
)
parser.add_option(
"-c", "--css",
dest="css", default="calendar.css",
help="CSS to use for page (html only)"
)
parser.add_option(
"-L", "--locale",
dest="locale", default=None,
help="locale to be used from month and weekday names"
)
parser.add_option(
"-e", "--encoding",
dest="encoding", default=None,
help="Encoding to use for output"
)
parser.add_option(
"-t", "--type",
dest="type", default="text",
choices=("text", "html"),
help="output type (text or html)"
)
(options, args) = parser.parse_args(args)
if options.locale and not options.encoding:
parser.error("if --locale is specified --encoding is required")
sys.exit(1)
locale = options.locale, options.encoding
if options.type == "html":
if options.locale:
cal = LocaleHTMLCalendar(locale=locale)
else:
cal = HTMLCalendar()
encoding = options.encoding
if encoding is None:
encoding = sys.getdefaultencoding()
optdict = dict(encoding=encoding, css=options.css)
if len(args) == 1:
print cal.formatyearpage(datetime.date.today().year, **optdict)
elif len(args) == 2:
print cal.formatyearpage(int(args[1]), **optdict)
else:
parser.error("incorrect number of arguments")
sys.exit(1)
else:
if options.locale:
cal = LocaleTextCalendar(locale=locale)
else:
cal = TextCalendar()
optdict = dict(w=options.width, l=options.lines)
if len(args) != 3:
optdict["c"] = options.spacing
optdict["m"] = options.months
if len(args) == 1:
result = cal.formatyear(datetime.date.today().year, **optdict)
elif len(args) == 2:
result = cal.formatyear(int(args[1]), **optdict)
elif len(args) == 3:
result = cal.formatmonth(int(args[1]), int(args[2]), **optdict)
else:
parser.error("incorrect number of arguments")
sys.exit(1)
if options.encoding:
result = result.encode(options.encoding)
print result
if __name__ == "__main__":
main(sys.argv)

View File

@ -0,0 +1,404 @@
"""A generic class to build line-oriented command interpreters.
Interpreters constructed with this class obey the following conventions:
1. End of file on input is processed as the command 'EOF'.
2. A command is parsed out of each line by collecting the prefix composed
of characters in the identchars member.
3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
is passed a single argument consisting of the remainder of the line.
4. Typing an empty line repeats the last command. (Actually, it calls the
method `emptyline', which may be overridden in a subclass.)
5. There is a predefined `help' method. Given an argument `topic', it
calls the command `help_topic'. With no arguments, it lists all topics
with defined help_ functions, broken into up to three topics; documented
commands, miscellaneous help topics, and undocumented commands.
6. The command '?' is a synonym for `help'. The command '!' is a synonym
for `shell', if a do_shell method exists.
7. If completion is enabled, completing commands will be done automatically,
and completing of commands args is done by calling complete_foo() with
arguments text, line, begidx, endidx. text is string we are matching
against, all returned matches must begin with it. line is the current
input line (lstripped), begidx and endidx are the beginning and end
indexes of the text being matched, which could be used to provide
different completion depending upon which position the argument is in.
The `default' method may be overridden to intercept commands for which there
is no do_ method.
The `completedefault' method may be overridden to intercept completions for
commands that have no complete_ method.
The data member `self.ruler' sets the character used to draw separator lines
in the help messages. If empty, no ruler line is drawn. It defaults to "=".
If the value of `self.intro' is nonempty when the cmdloop method is called,
it is printed out on interpreter startup. This value may be overridden
via an optional argument to the cmdloop() method.
The data members `self.doc_header', `self.misc_header', and
`self.undoc_header' set the headers used for the help function's
listings of documented functions, miscellaneous topics, and undocumented
functions respectively.
These interpreters use raw_input; thus, if the readline module is loaded,
they automatically support Emacs-like command history and editing features.
"""
import string
__all__ = ["Cmd"]
PROMPT = '(Cmd) '
IDENTCHARS = string.ascii_letters + string.digits + '_'
class Cmd:
"""A simple framework for writing line-oriented command interpreters.
These are often useful for test harnesses, administrative tools, and
prototypes that will later be wrapped in a more sophisticated interface.
A Cmd instance or subclass instance is a line-oriented interpreter
framework. There is no good reason to instantiate Cmd itself; rather,
it's useful as a superclass of an interpreter class you define yourself
in order to inherit Cmd's methods and encapsulate action methods.
"""
prompt = PROMPT
identchars = IDENTCHARS
ruler = '='
lastcmd = ''
intro = None
doc_leader = ""
doc_header = "Documented commands (type help <topic>):"
misc_header = "Miscellaneous help topics:"
undoc_header = "Undocumented commands:"
nohelp = "*** No help on %s"
use_rawinput = 1
def __init__(self, completekey='tab', stdin=None, stdout=None):
"""Instantiate a line-oriented interpreter framework.
The optional argument 'completekey' is the readline name of a
completion key; it defaults to the Tab key. If completekey is
not None and the readline module is available, command completion
is done automatically. The optional arguments stdin and stdout
specify alternate input and output file objects; if not specified,
sys.stdin and sys.stdout are used.
"""
import sys
if stdin is not None:
self.stdin = stdin
else:
self.stdin = sys.stdin
if stdout is not None:
self.stdout = stdout
else:
self.stdout = sys.stdout
self.cmdqueue = []
self.completekey = completekey
def cmdloop(self, intro=None):
"""Repeatedly issue a prompt, accept input, parse an initial prefix
off the received input, and dispatch to action methods, passing them
the remainder of the line as argument.
"""
self.preloop()
if self.use_rawinput and self.completekey:
try:
import readline
self.old_completer = readline.get_completer()
readline.set_completer(self.complete)
readline.parse_and_bind(self.completekey+": complete")
except ImportError:
pass
try:
if intro is not None:
self.intro = intro
if self.intro:
self.stdout.write(str(self.intro)+"\n")
stop = None
while not stop:
if self.cmdqueue:
line = self.cmdqueue.pop(0)
else:
if self.use_rawinput:
try:
line = raw_input(self.prompt)
except EOFError:
line = 'EOF'
else:
self.stdout.write(self.prompt)
self.stdout.flush()
line = self.stdin.readline()
if not len(line):
line = 'EOF'
else:
line = line.rstrip('\r\n')
line = self.precmd(line)
stop = self.onecmd(line)
stop = self.postcmd(stop, line)
self.postloop()
finally:
if self.use_rawinput and self.completekey:
try:
import readline
readline.set_completer(self.old_completer)
except ImportError:
pass
def precmd(self, line):
"""Hook method executed just before the command line is
interpreted, but after the input prompt is generated and issued.
"""
return line
def postcmd(self, stop, line):
"""Hook method executed just after a command dispatch is finished."""
return stop
def preloop(self):
"""Hook method executed once when the cmdloop() method is called."""
pass
def postloop(self):
"""Hook method executed once when the cmdloop() method is about to
return.
"""
pass
def parseline(self, line):
"""Parse the line into a command name and a string containing
the arguments. Returns a tuple containing (command, args, line).
'command' and 'args' may be None if the line couldn't be parsed.
"""
line = line.strip()
if not line:
return None, None, line
elif line[0] == '?':
line = 'help ' + line[1:]
elif line[0] == '!':
if hasattr(self, 'do_shell'):
line = 'shell ' + line[1:]
else:
return None, None, line
i, n = 0, len(line)
while i < n and line[i] in self.identchars: i = i+1
cmd, arg = line[:i], line[i:].strip()
return cmd, arg, line
def onecmd(self, line):
"""Interpret the argument as though it had been typed in response
to the prompt.
This may be overridden, but should not normally need to be;
see the precmd() and postcmd() methods for useful execution hooks.
The return value is a flag indicating whether interpretation of
commands by the interpreter should stop.
"""
cmd, arg, line = self.parseline(line)
if not line:
return self.emptyline()
if cmd is None:
return self.default(line)
self.lastcmd = line
if line == 'EOF' :
self.lastcmd = ''
if cmd == '':
return self.default(line)
else:
try:
func = getattr(self, 'do_' + cmd)
except AttributeError:
return self.default(line)
return func(arg)
def emptyline(self):
"""Called when an empty line is entered in response to the prompt.
If this method is not overridden, it repeats the last nonempty
command entered.
"""
if self.lastcmd:
return self.onecmd(self.lastcmd)
def default(self, line):
"""Called on an input line when the command prefix is not recognized.
If this method is not overridden, it prints an error message and
returns.
"""
self.stdout.write('*** Unknown syntax: %s\n'%line)
def completedefault(self, *ignored):
"""Method called to complete an input line when no command-specific
complete_*() method is available.
By default, it returns an empty list.
"""
return []
def completenames(self, text, *ignored):
dotext = 'do_'+text
return [a[3:] for a in self.get_names() if a.startswith(dotext)]
def complete(self, text, state):
"""Return the next possible completion for 'text'.
If a command has not been entered, then complete against command list.
Otherwise try to call complete_<command> to get list of completions.
"""
if state == 0:
import readline
origline = readline.get_line_buffer()
line = origline.lstrip()
stripped = len(origline) - len(line)
begidx = readline.get_begidx() - stripped
endidx = readline.get_endidx() - stripped
if begidx>0:
cmd, args, foo = self.parseline(line)
if cmd == '':
compfunc = self.completedefault
else:
try:
compfunc = getattr(self, 'complete_' + cmd)
except AttributeError:
compfunc = self.completedefault
else:
compfunc = self.completenames
self.completion_matches = compfunc(text, line, begidx, endidx)
try:
return self.completion_matches[state]
except IndexError:
return None
def get_names(self):
# This method used to pull in base class attributes
# at a time dir() didn't do it yet.
return dir(self.__class__)
def complete_help(self, *args):
commands = set(self.completenames(*args))
topics = set(a[5:] for a in self.get_names()
if a.startswith('help_' + args[0]))
return list(commands | topics)
def do_help(self, arg):
'List available commands with "help" or detailed help with "help cmd".'
if arg:
# XXX check arg syntax
try:
func = getattr(self, 'help_' + arg)
except AttributeError:
try:
doc=getattr(self, 'do_' + arg).__doc__
if doc:
self.stdout.write("%s\n"%str(doc))
return
except AttributeError:
pass
self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
return
func()
else:
names = self.get_names()
cmds_doc = []
cmds_undoc = []
help = {}
for name in names:
if name[:5] == 'help_':
help[name[5:]]=1
names.sort()
# There can be duplicates if routines overridden
prevname = ''
for name in names:
if name[:3] == 'do_':
if name == prevname:
continue
prevname = name
cmd=name[3:]
if cmd in help:
cmds_doc.append(cmd)
del help[cmd]
elif getattr(self, name).__doc__:
cmds_doc.append(cmd)
else:
cmds_undoc.append(cmd)
self.stdout.write("%s\n"%str(self.doc_leader))
self.print_topics(self.doc_header, cmds_doc, 15,80)
self.print_topics(self.misc_header, help.keys(),15,80)
self.print_topics(self.undoc_header, cmds_undoc, 15,80)
def print_topics(self, header, cmds, cmdlen, maxcol):
if cmds:
self.stdout.write("%s\n"%str(header))
if self.ruler:
self.stdout.write("%s\n"%str(self.ruler * len(header)))
self.columnize(cmds, maxcol-1)
self.stdout.write("\n")
def columnize(self, list, displaywidth=80):
"""Display a list of strings as a compact set of columns.
Each column is only as wide as necessary.
Columns are separated by two spaces (one was not legible enough).
"""
if not list:
self.stdout.write("<empty>\n")
return
nonstrings = [i for i in range(len(list))
if not isinstance(list[i], str)]
if nonstrings:
raise TypeError, ("list[i] not a string for i in %s" %
", ".join(map(str, nonstrings)))
size = len(list)
if size == 1:
self.stdout.write('%s\n'%str(list[0]))
return
# Try every row count from 1 upwards
for nrows in range(1, len(list)):
ncols = (size+nrows-1) // nrows
colwidths = []
totwidth = -2
for col in range(ncols):
colwidth = 0
for row in range(nrows):
i = row + nrows*col
if i >= size:
break
x = list[i]
colwidth = max(colwidth, len(x))
colwidths.append(colwidth)
totwidth += colwidth + 2
if totwidth > displaywidth:
break
if totwidth <= displaywidth:
break
else:
nrows = len(list)
ncols = 1
colwidths = [0]
for row in range(nrows):
texts = []
for col in range(ncols):
i = row + nrows*col
if i >= size:
x = ""
else:
x = list[i]
texts.append(x)
while texts and not texts[-1]:
del texts[-1]
for col in range(len(texts)):
texts[col] = texts[col].ljust(colwidths[col])
self.stdout.write("%s\n"%str(" ".join(texts)))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,730 @@
__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
# They should however be considered an integral part of collections.py.
from _abcoll import *
import _abcoll
__all__ += _abcoll.__all__
from _collections import deque, defaultdict
from operator import itemgetter as _itemgetter, eq as _eq
from keyword import iskeyword as _iskeyword
import sys as _sys
import heapq as _heapq
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
from itertools import imap as _imap
try:
from thread import get_ident as _get_ident
except ImportError:
from dummy_thread import get_ident as _get_ident
################################################################################
### OrderedDict
################################################################################
class OrderedDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
# Big-O running times for all methods are the same as regular dictionaries.
# The internal self.__map dict maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
def __init__(*args, **kwds):
'''Initialize an ordered dictionary. The signature is the same as
regular dictionaries, but keyword arguments are not recommended because
their insertion order is arbitrary.
'''
if not args:
raise TypeError("descriptor '__init__' of 'OrderedDict' object "
"needs an argument")
self = args[0]
args = args[1:]
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__root
except AttributeError:
self.__root = root = [] # sentinel node
root[:] = [root, root, None]
self.__map = {}
self.__update(*args, **kwds)
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked list,
# and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[0]
last[1] = root[0] = self.__map[key] = [last, root, key]
return dict_setitem(self, key, value)
def __delitem__(self, key, dict_delitem=dict.__delitem__):
'od.__delitem__(y) <==> del od[y]'
# Deleting an existing item uses self.__map to find the link which gets
# removed by updating the links in the predecessor and successor nodes.
dict_delitem(self, key)
link_prev, link_next, _ = self.__map.pop(key)
link_prev[1] = link_next # update link_prev[NEXT]
link_next[0] = link_prev # update link_next[PREV]
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
root = self.__root
curr = root[1] # start at the first node
while curr is not root:
yield curr[2] # yield the curr[KEY]
curr = curr[1] # move to next node
def __reversed__(self):
'od.__reversed__() <==> reversed(od)'
# Traverse the linked list in reverse order.
root = self.__root
curr = root[0] # start at the last node
while curr is not root:
yield curr[2] # yield the curr[KEY]
curr = curr[0] # move to previous node
def clear(self):
'od.clear() -> None. Remove all items from od.'
root = self.__root
root[:] = [root, root, None]
self.__map.clear()
dict.clear(self)
# -- the following methods do not depend on the internal structure --
def keys(self):
'od.keys() -> list of keys in od'
return list(self)
def values(self):
'od.values() -> list of values in od'
return [self[key] for key in self]
def items(self):
'od.items() -> list of (key, value) pairs in od'
return [(key, self[key]) for key in self]
def iterkeys(self):
'od.iterkeys() -> an iterator over the keys in od'
return iter(self)
def itervalues(self):
'od.itervalues -> an iterator over the values in od'
for k in self:
yield self[k]
def iteritems(self):
'od.iteritems -> an iterator over the (key, value) pairs in od'
for k in self:
yield (k, self[k])
update = MutableMapping.update
__update = update # let subclasses override update without breaking __init__
__marker = object()
def pop(self, key, default=__marker):
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding
value. If key is not found, d is returned if given, otherwise KeyError
is raised.
'''
if key in self:
result = self[key]
del self[key]
return result
if default is self.__marker:
raise KeyError(key)
return default
def setdefault(self, key, default=None):
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
if key in self:
return self[key]
self[key] = default
return default
def popitem(self, last=True):
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
Pairs are returned in LIFO order if last is true or FIFO order if false.
'''
if not self:
raise KeyError('dictionary is empty')
key = next(reversed(self) if last else iter(self))
value = self.pop(key)
return key, value
def __repr__(self, _repr_running={}):
'od.__repr__() <==> repr(od)'
call_key = id(self), _get_ident()
if call_key in _repr_running:
return '...'
_repr_running[call_key] = 1
try:
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
finally:
del _repr_running[call_key]
def __reduce__(self):
'Return state information for pickling'
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
for k in vars(OrderedDict()):
inst_dict.pop(k, None)
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def copy(self):
'od.copy() -> a shallow copy of od'
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
If not specified, the value defaults to None.
'''
self = cls()
for key in iterable:
self[key] = value
return self
def __eq__(self, other):
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
while comparison to a regular mapping is order-insensitive.
'''
if isinstance(other, OrderedDict):
return dict.__eq__(self, other) and all(_imap(_eq, self, other))
return dict.__eq__(self, other)
def __ne__(self, other):
'od.__ne__(y) <==> od!=y'
return not self == other
# -- the following methods support python 3.x style dictionary views --
def viewkeys(self):
"od.viewkeys() -> a set-like object providing a view on od's keys"
return KeysView(self)
def viewvalues(self):
"od.viewvalues() -> an object providing a view on od's values"
return ValuesView(self)
def viewitems(self):
"od.viewitems() -> a set-like object providing a view on od's items"
return ItemsView(self)
################################################################################
### namedtuple
################################################################################
_class_template = '''\
class {typename}(tuple):
'{typename}({arg_list})'
__slots__ = ()
_fields = {field_names!r}
def __new__(_cls, {arg_list}):
'Create new instance of {typename}({arg_list})'
return _tuple.__new__(_cls, ({arg_list}))
@classmethod
def _make(cls, iterable, new=tuple.__new__, len=len):
'Make a new {typename} object from a sequence or iterable'
result = new(cls, iterable)
if len(result) != {num_fields:d}:
raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
return result
def __repr__(self):
'Return a nicely formatted representation string'
return '{typename}({repr_fmt})' % self
def _asdict(self):
'Return a new OrderedDict which maps field names to their values'
return OrderedDict(zip(self._fields, self))
def _replace(_self, **kwds):
'Return a new {typename} object replacing specified fields with new values'
result = _self._make(map(kwds.pop, {field_names!r}, _self))
if kwds:
raise ValueError('Got unexpected field names: %r' % kwds.keys())
return result
def __getnewargs__(self):
'Return self as a plain tuple. Used by copy and pickle.'
return tuple(self)
__dict__ = _property(_asdict)
def __getstate__(self):
'Exclude the OrderedDict from pickling'
pass
{field_defs}
'''
_repr_template = '{name}=%r'
_field_template = '''\
{name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
'''
def namedtuple(typename, field_names, verbose=False, rename=False):
"""Returns a new subclass of tuple with named fields.
>>> Point = namedtuple('Point', ['x', 'y'])
>>> Point.__doc__ # docstring for the new class
'Point(x, y)'
>>> p = Point(11, y=22) # instantiate with positional args or keywords
>>> p[0] + p[1] # indexable like a plain tuple
33
>>> x, y = p # unpack like a regular tuple
>>> x, y
(11, 22)
>>> p.x + p.y # fields also accessable by name
33
>>> d = p._asdict() # convert to a dictionary
>>> d['x']
11
>>> Point(**d) # convert from a dictionary
Point(x=11, y=22)
>>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Point(x=100, y=22)
"""
# Validate the field names. At the user's option, either generate an error
# message or automatically replace the field name with a valid name.
if isinstance(field_names, basestring):
field_names = field_names.replace(',', ' ').split()
field_names = map(str, field_names)
typename = str(typename)
if rename:
seen = set()
for index, name in enumerate(field_names):
if (not all(c.isalnum() or c=='_' for c in name)
or _iskeyword(name)
or not name
or name[0].isdigit()
or name.startswith('_')
or name in seen):
field_names[index] = '_%d' % index
seen.add(name)
for name in [typename] + field_names:
if type(name) != str:
raise TypeError('Type names and field names must be strings')
if not all(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain '
'alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a '
'keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with '
'a number: %r' % name)
seen = set()
for name in field_names:
if name.startswith('_') and not rename:
raise ValueError('Field names cannot start with an underscore: '
'%r' % name)
if name in seen:
raise ValueError('Encountered duplicate field name: %r' % name)
seen.add(name)
# Fill-in the class template
class_definition = _class_template.format(
typename = typename,
field_names = tuple(field_names),
num_fields = len(field_names),
arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
repr_fmt = ', '.join(_repr_template.format(name=name)
for name in field_names),
field_defs = '\n'.join(_field_template.format(index=index, name=name)
for index, name in enumerate(field_names))
)
if verbose:
print class_definition
# Execute the template string in a temporary namespace and support
# tracing utilities by setting a value for frame.f_globals['__name__']
namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
OrderedDict=OrderedDict, _property=property, _tuple=tuple)
try:
exec class_definition in namespace
except SyntaxError as e:
raise SyntaxError(e.message + ':\n' + class_definition)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in environments where
# sys._getframe is not defined (Jython for example) or sys._getframe is not
# defined for arguments greater than 0 (IronPython).
try:
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
except (AttributeError, ValueError):
pass
return result
########################################################################
### Counter
########################################################################
class Counter(dict):
'''Dict subclass for counting hashable items. Sometimes called a bag
or multiset. Elements are stored as dictionary keys and their counts
are stored as dictionary values.
>>> c = Counter('abcdeabcdabcaba') # count elements from a string
>>> c.most_common(3) # three most common elements
[('a', 5), ('b', 4), ('c', 3)]
>>> sorted(c) # list all unique elements
['a', 'b', 'c', 'd', 'e']
>>> ''.join(sorted(c.elements())) # list elements with repetitions
'aaaaabbbbcccdde'
>>> sum(c.values()) # total of all counts
15
>>> c['a'] # count of letter 'a'
5
>>> for elem in 'shazam': # update counts from an iterable
... c[elem] += 1 # by adding 1 to each element's count
>>> c['a'] # now there are seven 'a'
7
>>> del c['b'] # remove all 'b'
>>> c['b'] # now there are zero 'b'
0
>>> d = Counter('simsalabim') # make another counter
>>> c.update(d) # add in the second counter
>>> c['a'] # now there are nine 'a'
9
>>> c.clear() # empty the counter
>>> c
Counter()
Note: If a count is set to zero or reduced to zero, it will remain
in the counter until the entry is deleted or the counter is cleared:
>>> c = Counter('aaabbc')
>>> c['b'] -= 2 # reduce the count of 'b' by two
>>> c.most_common() # 'b' is still in, but its count is zero
[('a', 3), ('c', 1), ('b', 0)]
'''
# References:
# http://en.wikipedia.org/wiki/Multiset
# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
# http://code.activestate.com/recipes/259174/
# Knuth, TAOCP Vol. II section 4.6.3
def __init__(*args, **kwds):
'''Create a new, empty Counter object. And if given, count elements
from an input iterable. Or, initialize the count from another mapping
of elements to their counts.
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
if not args:
raise TypeError("descriptor '__init__' of 'Counter' object "
"needs an argument")
self = args[0]
args = args[1:]
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
super(Counter, self).__init__()
self.update(*args, **kwds)
def __missing__(self, key):
'The count of elements not in the Counter is zero.'
# Needed so that self[missing_item] does not raise KeyError
return 0
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
def elements(self):
'''Iterator over elements repeating each as many times as its count.
>>> c = Counter('ABCABC')
>>> sorted(c.elements())
['A', 'A', 'B', 'B', 'C', 'C']
# Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
>>> product = 1
>>> for factor in prime_factors.elements(): # loop over factors
... product *= factor # and multiply them
>>> product
1836
Note, if an element's count has been set to zero or is a negative
number, elements() will ignore it.
'''
# Emulate Bag.do from Smalltalk and Multiset.begin from C++.
return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
# Override dict methods where necessary
@classmethod
def fromkeys(cls, iterable, v=None):
# There is no equivalent method for counters because setting v=1
# means that no element can have a count greater than one.
raise NotImplementedError(
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
def update(*args, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
>>> d = Counter('watch')
>>> c.update(d) # add elements from another counter
>>> c['h'] # four 'h' in which, witch, and watch
4
'''
# The regular dict.update() operation makes no sense here because the
# replace behavior results in the some of original untouched counts
# being mixed-in with all of the other counts for a mismash that
# doesn't have a straight-forward interpretation in most counting
# contexts. Instead, we implement straight-addition. Both the inputs
# and outputs are allowed to contain zero and negative counts.
if not args:
raise TypeError("descriptor 'update' of 'Counter' object "
"needs an argument")
self = args[0]
args = args[1:]
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
iterable = args[0] if args else None
if iterable is not None:
if isinstance(iterable, Mapping):
if self:
self_get = self.get
for elem, count in iterable.iteritems():
self[elem] = self_get(elem, 0) + count
else:
super(Counter, self).update(iterable) # fast path when counter is empty
else:
self_get = self.get
for elem in iterable:
self[elem] = self_get(elem, 0) + 1
if kwds:
self.update(kwds)
def subtract(*args, **kwds):
'''Like dict.update() but subtracts counts instead of replacing them.
Counts can be reduced below zero. Both the inputs and outputs are
allowed to contain zero and negative counts.
Source can be an iterable, a dictionary, or another Counter instance.
>>> c = Counter('which')
>>> c.subtract('witch') # subtract elements from another iterable
>>> c.subtract(Counter('watch')) # subtract elements from another counter
>>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
0
>>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
-1
'''
if not args:
raise TypeError("descriptor 'subtract' of 'Counter' object "
"needs an argument")
self = args[0]
args = args[1:]
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
iterable = args[0] if args else None
if iterable is not None:
self_get = self.get
if isinstance(iterable, Mapping):
for elem, count in iterable.items():
self[elem] = self_get(elem, 0) - count
else:
for elem in iterable:
self[elem] = self_get(elem, 0) - 1
if kwds:
self.subtract(kwds)
def copy(self):
'Return a shallow copy.'
return self.__class__(self)
def __reduce__(self):
return self.__class__, (dict(self),)
def __delitem__(self, elem):
'Like dict.__delitem__() but does not raise KeyError for missing values.'
if elem in self:
super(Counter, self).__delitem__(elem)
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count + other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count - other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count < 0:
result[elem] = 0 - count
return result
def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = other_count if count < other_count else count
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = count if count < other_count else other_count
if newcount > 0:
result[elem] = newcount
return result
if __name__ == '__main__':
# verify that instances can be pickled
from cPickle import loads, dumps
Point = namedtuple('Point', 'x, y', True)
p = Point(x=10, y=20)
assert p == loads(dumps(p))
# test and demonstrate ability to override methods
class Point(namedtuple('Point', 'x y')):
__slots__ = ()
@property
def hypot(self):
return (self.x ** 2 + self.y ** 2) ** 0.5
def __str__(self):
return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
for p in Point(3, 4), Point(14, 5/7.):
print p
class Point(namedtuple('Point', 'x y')):
'Point class with optimized _make() and _replace() without error-checking'
__slots__ = ()
_make = classmethod(tuple.__new__)
def _replace(self, _map=map, **kwds):
return self._make(_map(kwds.get, ('x', 'y'), self))
print Point(11, 22)._replace(x=100)
Point3D = namedtuple('Point3D', Point._fields + ('z',))
print Point3D.__doc__
import doctest
TestResults = namedtuple('TestResults', 'failed attempted')
print TestResults(*doctest.testmod())

View File

@ -0,0 +1,227 @@
"""Module/script to byte-compile all .py files to .pyc (or .pyo) files.
When called as a script with arguments, this compiles the directories
given as arguments recursively; the -l option prevents it from
recursing into directories.
Without arguments, if compiles all modules on sys.path, without
recursing into subdirectories. (Even though it should do so for
packages -- for now, you'll have to deal with packages separately.)
See module py_compile for details of the actual byte-compilation.
"""
import os
import sys
import py_compile
import struct
import imp
__all__ = ["compile_dir","compile_file","compile_path"]
def compile_dir(dir, maxlevels=10, ddir=None,
force=0, rx=None, quiet=0):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
dir: the directory to byte-compile
maxlevels: maximum recursion level (default 10)
ddir: the directory that will be prepended to the path to the
file as it is compiled into each byte-code file.
force: if 1, force compilation, even if timestamps are up-to-date
quiet: if 1, be quiet during compilation
"""
if not quiet:
print 'Listing', dir, '...'
try:
names = os.listdir(dir)
except os.error:
print "Can't list", dir
names = []
names.sort()
success = 1
for name in names:
fullname = os.path.join(dir, name)
if ddir is not None:
dfile = os.path.join(ddir, name)
else:
dfile = None
if not os.path.isdir(fullname):
if not compile_file(fullname, ddir, force, rx, quiet):
success = 0
elif maxlevels > 0 and \
name != os.curdir and name != os.pardir and \
os.path.isdir(fullname) and \
not os.path.islink(fullname):
if not compile_dir(fullname, maxlevels - 1, dfile, force, rx,
quiet):
success = 0
return success
def compile_file(fullname, ddir=None, force=0, rx=None, quiet=0):
"""Byte-compile one file.
Arguments (only fullname is required):
fullname: the file to byte-compile
ddir: if given, the directory name compiled in to the
byte-code file.
force: if 1, force compilation, even if timestamps are up-to-date
quiet: if 1, be quiet during compilation
"""
success = 1
name = os.path.basename(fullname)
if ddir is not None:
dfile = os.path.join(ddir, name)
else:
dfile = None
if rx is not None:
mo = rx.search(fullname)
if mo:
return success
if os.path.isfile(fullname):
head, tail = name[:-3], name[-3:]
if tail == '.py':
if not force:
try:
mtime = int(os.stat(fullname).st_mtime)
expect = struct.pack('<4sl', imp.get_magic(), mtime)
cfile = fullname + (__debug__ and 'c' or 'o')
with open(cfile, 'rb') as chandle:
actual = chandle.read(8)
if expect == actual:
return success
except IOError:
pass
if not quiet:
print 'Compiling', fullname, '...'
try:
ok = py_compile.compile(fullname, None, dfile, True)
except py_compile.PyCompileError,err:
if quiet:
print 'Compiling', fullname, '...'
print err.msg
success = 0
except IOError, e:
print "Sorry", e
success = 0
else:
if ok == 0:
success = 0
return success
def compile_path(skip_curdir=1, maxlevels=0, force=0, quiet=0):
"""Byte-compile all module on sys.path.
Arguments (all optional):
skip_curdir: if true, skip current directory (default true)
maxlevels: max recursion level (default 0)
force: as for compile_dir() (default 0)
quiet: as for compile_dir() (default 0)
"""
success = 1
for dir in sys.path:
if (not dir or dir == os.curdir) and skip_curdir:
print 'Skipping current directory'
else:
success = success and compile_dir(dir, maxlevels, None,
force, quiet=quiet)
return success
def expand_args(args, flist):
"""read names in flist and append to args"""
expanded = args[:]
if flist:
try:
if flist == '-':
fd = sys.stdin
else:
fd = open(flist)
while 1:
line = fd.readline()
if not line:
break
expanded.append(line[:-1])
except IOError:
print "Error reading file list %s" % flist
raise
return expanded
def main():
"""Script main program."""
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'lfqd:x:i:')
except getopt.error, msg:
print msg
print "usage: python compileall.py [-l] [-f] [-q] [-d destdir] " \
"[-x regexp] [-i list] [directory|file ...]"
print
print "arguments: zero or more file and directory names to compile; " \
"if no arguments given, "
print " defaults to the equivalent of -l sys.path"
print
print "options:"
print "-l: don't recurse into subdirectories"
print "-f: force rebuild even if timestamps are up-to-date"
print "-q: output only error messages"
print "-d destdir: directory to prepend to file paths for use in " \
"compile-time tracebacks and in"
print " runtime tracebacks in cases where the source " \
"file is unavailable"
print "-x regexp: skip files matching the regular expression regexp; " \
"the regexp is searched for"
print " in the full path of each file considered for " \
"compilation"
print "-i file: add all the files and directories listed in file to " \
"the list considered for"
print ' compilation; if "-", names are read from stdin'
sys.exit(2)
maxlevels = 10
ddir = None
force = 0
quiet = 0
rx = None
flist = None
for o, a in opts:
if o == '-l': maxlevels = 0
if o == '-d': ddir = a
if o == '-f': force = 1
if o == '-q': quiet = 1
if o == '-x':
import re
rx = re.compile(a)
if o == '-i': flist = a
if ddir:
if len(args) != 1 and not os.path.isdir(args[0]):
print "-d destdir require exactly one directory argument"
sys.exit(2)
success = 1
try:
if args or flist:
try:
if flist:
args = expand_args(args, flist)
except IOError:
success = 0
if success:
for arg in args:
if os.path.isdir(arg):
if not compile_dir(arg, maxlevels, ddir,
force, rx, quiet):
success = 0
else:
if not compile_file(arg, ddir, force, rx, quiet):
success = 0
else:
success = compile_path()
except KeyboardInterrupt:
print "\n[interrupted]"
success = 0
return success
if __name__ == '__main__':
exit_status = int(not main())
sys.exit(exit_status)

View File

@ -0,0 +1,433 @@
"""Generic (shallow and deep) copying operations.
Interface summary:
import copy
x = copy.copy(y) # make a shallow copy of y
x = copy.deepcopy(y) # make a deep copy of y
For module specific errors, copy.Error is raised.
The difference between shallow and deep copying is only relevant for
compound objects (objects that contain other objects, like lists or
class instances).
- A shallow copy constructs a new compound object and then (to the
extent possible) inserts *the same objects* into it that the
original contains.
- A deep copy constructs a new compound object and then, recursively,
inserts *copies* into it of the objects found in the original.
Two problems often exist with deep copy operations that don't exist
with shallow copy operations:
a) recursive objects (compound objects that, directly or indirectly,
contain a reference to themselves) may cause a recursive loop
b) because deep copy copies *everything* it may copy too much, e.g.
administrative data structures that should be shared even between
copies
Python's deep copy operation avoids these problems by:
a) keeping a table of objects already copied during the current
copying pass
b) letting user-defined classes override the copying operation or the
set of components copied
This version does not copy types like module, class, function, method,
nor stack trace, stack frame, nor file, socket, window, nor array, nor
any similar types.
Classes can use the same interfaces to control copying that they use
to control pickling: they can define methods called __getinitargs__(),
__getstate__() and __setstate__(). See the documentation for module
"pickle" for information on these methods.
"""
import types
import weakref
from copy_reg import dispatch_table
class Error(Exception):
pass
error = Error # backward compatibility
try:
from org.python.core import PyStringMap
except ImportError:
PyStringMap = None
__all__ = ["Error", "copy", "deepcopy"]
def copy(x):
"""Shallow copy operation on arbitrary Python objects.
See the module's __doc__ string for more info.
"""
cls = type(x)
copier = _copy_dispatch.get(cls)
if copier:
return copier(x)
copier = getattr(cls, "__copy__", None)
if copier:
return copier(x)
reductor = dispatch_table.get(cls)
if reductor:
rv = reductor(x)
else:
reductor = getattr(x, "__reduce_ex__", None)
if reductor:
rv = reductor(2)
else:
reductor = getattr(x, "__reduce__", None)
if reductor:
rv = reductor()
else:
raise Error("un(shallow)copyable object of type %s" % cls)
return _reconstruct(x, rv, 0)
_copy_dispatch = d = {}
def _copy_immutable(x):
return x
for t in (type(None), int, long, float, bool, str, tuple,
frozenset, type, xrange, types.ClassType,
types.BuiltinFunctionType, type(Ellipsis),
types.FunctionType, weakref.ref):
d[t] = _copy_immutable
for name in ("ComplexType", "UnicodeType", "CodeType"):
t = getattr(types, name, None)
if t is not None:
d[t] = _copy_immutable
def _copy_with_constructor(x):
return type(x)(x)
for t in (list, dict, set):
d[t] = _copy_with_constructor
def _copy_with_copy_method(x):
return x.copy()
if PyStringMap is not None:
d[PyStringMap] = _copy_with_copy_method
def _copy_inst(x):
if hasattr(x, '__copy__'):
return x.__copy__()
if hasattr(x, '__getinitargs__'):
args = x.__getinitargs__()
y = x.__class__(*args)
else:
y = _EmptyClass()
y.__class__ = x.__class__
if hasattr(x, '__getstate__'):
state = x.__getstate__()
else:
state = x.__dict__
if hasattr(y, '__setstate__'):
y.__setstate__(state)
else:
y.__dict__.update(state)
return y
d[types.InstanceType] = _copy_inst
del d
def deepcopy(x, memo=None, _nil=[]):
"""Deep copy operation on arbitrary Python objects.
See the module's __doc__ string for more info.
"""
if memo is None:
memo = {}
d = id(x)
y = memo.get(d, _nil)
if y is not _nil:
return y
cls = type(x)
copier = _deepcopy_dispatch.get(cls)
if copier:
y = copier(x, memo)
else:
try:
issc = issubclass(cls, type)
except TypeError: # cls is not a class (old Boost; see SF #502085)
issc = 0
if issc:
y = _deepcopy_atomic(x, memo)
else:
copier = getattr(x, "__deepcopy__", None)
if copier:
y = copier(memo)
else:
reductor = dispatch_table.get(cls)
if reductor:
rv = reductor(x)
else:
reductor = getattr(x, "__reduce_ex__", None)
if reductor:
rv = reductor(2)
else:
reductor = getattr(x, "__reduce__", None)
if reductor:
rv = reductor()
else:
raise Error(
"un(deep)copyable object of type %s" % cls)
y = _reconstruct(x, rv, 1, memo)
memo[d] = y
_keep_alive(x, memo) # Make sure x lives at least as long as d
return y
_deepcopy_dispatch = d = {}
def _deepcopy_atomic(x, memo):
return x
d[type(None)] = _deepcopy_atomic
d[type(Ellipsis)] = _deepcopy_atomic
d[int] = _deepcopy_atomic
d[long] = _deepcopy_atomic
d[float] = _deepcopy_atomic
d[bool] = _deepcopy_atomic
try:
d[complex] = _deepcopy_atomic
except NameError:
pass
d[str] = _deepcopy_atomic
try:
d[unicode] = _deepcopy_atomic
except NameError:
pass
try:
d[types.CodeType] = _deepcopy_atomic
except AttributeError:
pass
d[type] = _deepcopy_atomic
d[xrange] = _deepcopy_atomic
d[types.ClassType] = _deepcopy_atomic
d[types.BuiltinFunctionType] = _deepcopy_atomic
d[types.FunctionType] = _deepcopy_atomic
d[weakref.ref] = _deepcopy_atomic
def _deepcopy_list(x, memo):
y = []
memo[id(x)] = y
for a in x:
y.append(deepcopy(a, memo))
return y
d[list] = _deepcopy_list
def _deepcopy_tuple(x, memo):
y = []
for a in x:
y.append(deepcopy(a, memo))
d = id(x)
try:
return memo[d]
except KeyError:
pass
for i in range(len(x)):
if x[i] is not y[i]:
y = tuple(y)
break
else:
y = x
memo[d] = y
return y
d[tuple] = _deepcopy_tuple
def _deepcopy_dict(x, memo):
y = {}
memo[id(x)] = y
for key, value in x.iteritems():
y[deepcopy(key, memo)] = deepcopy(value, memo)
return y
d[dict] = _deepcopy_dict
if PyStringMap is not None:
d[PyStringMap] = _deepcopy_dict
def _deepcopy_method(x, memo): # Copy instance methods
return type(x)(x.im_func, deepcopy(x.im_self, memo), x.im_class)
_deepcopy_dispatch[types.MethodType] = _deepcopy_method
def _keep_alive(x, memo):
"""Keeps a reference to the object x in the memo.
Because we remember objects by their id, we have
to assure that possibly temporary objects are kept
alive by referencing them.
We store a reference at the id of the memo, which should
normally not be used unless someone tries to deepcopy
the memo itself...
"""
try:
memo[id(memo)].append(x)
except KeyError:
# aha, this is the first one :-)
memo[id(memo)]=[x]
def _deepcopy_inst(x, memo):
if hasattr(x, '__deepcopy__'):
return x.__deepcopy__(memo)
if hasattr(x, '__getinitargs__'):
args = x.__getinitargs__()
args = deepcopy(args, memo)
y = x.__class__(*args)
else:
y = _EmptyClass()
y.__class__ = x.__class__
memo[id(x)] = y
if hasattr(x, '__getstate__'):
state = x.__getstate__()
else:
state = x.__dict__
state = deepcopy(state, memo)
if hasattr(y, '__setstate__'):
y.__setstate__(state)
else:
y.__dict__.update(state)
return y
d[types.InstanceType] = _deepcopy_inst
def _reconstruct(x, info, deep, memo=None):
if isinstance(info, str):
return x
assert isinstance(info, tuple)
if memo is None:
memo = {}
n = len(info)
assert n in (2, 3, 4, 5)
callable, args = info[:2]
if n > 2:
state = info[2]
else:
state = {}
if n > 3:
listiter = info[3]
else:
listiter = None
if n > 4:
dictiter = info[4]
else:
dictiter = None
if deep:
args = deepcopy(args, memo)
y = callable(*args)
memo[id(x)] = y
if state:
if deep:
state = deepcopy(state, memo)
if hasattr(y, '__setstate__'):
y.__setstate__(state)
else:
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
else:
slotstate = None
if state is not None:
y.__dict__.update(state)
if slotstate is not None:
for key, value in slotstate.iteritems():
setattr(y, key, value)
if listiter is not None:
for item in listiter:
if deep:
item = deepcopy(item, memo)
y.append(item)
if dictiter is not None:
for key, value in dictiter:
if deep:
key = deepcopy(key, memo)
value = deepcopy(value, memo)
y[key] = value
return y
del d
del types
# Helper for instance creation without calling __init__
class _EmptyClass:
pass
def _test():
l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'],
{'abc': 'ABC'}, (), [], {}]
l1 = copy(l)
print l1==l
l1 = map(copy, l)
print l1==l
l1 = deepcopy(l)
print l1==l
class C:
def __init__(self, arg=None):
self.a = 1
self.arg = arg
if __name__ == '__main__':
import sys
file = sys.argv[0]
else:
file = __file__
self.fp = open(file)
self.fp.close()
def __getstate__(self):
return {'a': self.a, 'arg': self.arg}
def __setstate__(self, state):
for key, value in state.iteritems():
setattr(self, key, value)
def __deepcopy__(self, memo=None):
new = self.__class__(deepcopy(self.arg, memo))
new.a = self.a
return new
c = C('argument sketch')
l.append(c)
l2 = copy(l)
print l == l2
print l
print l2
l2 = deepcopy(l)
print l == l2
print l
print l2
l.append({l[1]: l, 'xyz': l[2]})
l3 = copy(l)
import repr
print map(repr.repr, l)
print map(repr.repr, l1)
print map(repr.repr, l2)
print map(repr.repr, l3)
l3 = deepcopy(l)
import repr
print map(repr.repr, l)
print map(repr.repr, l1)
print map(repr.repr, l2)
print map(repr.repr, l3)
class odict(dict):
def __init__(self, d = {}):
self.a = 99
dict.__init__(self, d)
def __setitem__(self, k, i):
dict.__setitem__(self, k, i)
self.a
o = odict({"A" : "B"})
x = deepcopy(o)
print(o, x)
if __name__ == '__main__':
_test()

View File

@ -0,0 +1,201 @@
"""Helper to provide extensibility for pickle/cPickle.
This is only useful to add pickle support for extension types defined in
C, not for instances of user-defined classes.
"""
from types import ClassType as _ClassType
__all__ = ["pickle", "constructor",
"add_extension", "remove_extension", "clear_extension_cache"]
dispatch_table = {}
def pickle(ob_type, pickle_function, constructor_ob=None):
if type(ob_type) is _ClassType:
raise TypeError("copy_reg is not intended for use with classes")
if not hasattr(pickle_function, '__call__'):
raise TypeError("reduction functions must be callable")
dispatch_table[ob_type] = pickle_function
# The constructor_ob function is a vestige of safe for unpickling.
# There is no reason for the caller to pass it anymore.
if constructor_ob is not None:
constructor(constructor_ob)
def constructor(object):
if not hasattr(object, '__call__'):
raise TypeError("constructors must be callable")
# Example: provide pickling support for complex numbers.
try:
complex
except NameError:
pass
else:
def pickle_complex(c):
return complex, (c.real, c.imag)
pickle(complex, pickle_complex, complex)
# Support for pickling new-style objects
def _reconstructor(cls, base, state):
if base is object:
obj = object.__new__(cls)
else:
obj = base.__new__(cls, state)
if base.__init__ != object.__init__:
base.__init__(obj, state)
return obj
_HEAPTYPE = 1<<9
# Python code for object.__reduce_ex__ for protocols 0 and 1
def _reduce_ex(self, proto):
assert proto < 2
for base in self.__class__.__mro__:
if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
break
else:
base = object # not really reachable
if base is object:
state = None
else:
if base is self.__class__:
raise TypeError, "can't pickle %s objects" % base.__name__
state = base(self)
args = (self.__class__, base, state)
try:
getstate = self.__getstate__
except AttributeError:
if getattr(self, "__slots__", None):
raise TypeError("a class that defines __slots__ without "
"defining __getstate__ cannot be pickled")
try:
dict = self.__dict__
except AttributeError:
dict = None
else:
dict = getstate()
if dict:
return _reconstructor, args, dict
else:
return _reconstructor, args
# Helper for __reduce_ex__ protocol 2
def __newobj__(cls, *args):
return cls.__new__(cls, *args)
def _slotnames(cls):
"""Return a list of slot names for a given class.
This needs to find slots defined by the class and its bases, so we
can't simply return the __slots__ attribute. We must walk down
the Method Resolution Order and concatenate the __slots__ of each
class found there. (This assumes classes don't modify their
__slots__ attribute to misrepresent their slots after the class is
defined.)
"""
# Get the value from a cache in the class if possible
names = cls.__dict__.get("__slotnames__")
if names is not None:
return names
# Not cached -- calculate the value
names = []
if not hasattr(cls, "__slots__"):
# This class has no slots
pass
else:
# Slots found -- gather slot names from all base classes
for c in cls.__mro__:
if "__slots__" in c.__dict__:
slots = c.__dict__['__slots__']
# if class has a single slot, it can be given as a string
if isinstance(slots, basestring):
slots = (slots,)
for name in slots:
# special descriptors
if name in ("__dict__", "__weakref__"):
continue
# mangled names
elif name.startswith('__') and not name.endswith('__'):
names.append('_%s%s' % (c.__name__, name))
else:
names.append(name)
# Cache the outcome in the class if at all possible
try:
cls.__slotnames__ = names
except:
pass # But don't die if we can't
return names
# A registry of extension codes. This is an ad-hoc compression
# mechanism. Whenever a global reference to <module>, <name> is about
# to be pickled, the (<module>, <name>) tuple is looked up here to see
# if it is a registered extension code for it. Extension codes are
# universal, so that the meaning of a pickle does not depend on
# context. (There are also some codes reserved for local use that
# don't have this restriction.) Codes are positive ints; 0 is
# reserved.
_extension_registry = {} # key -> code
_inverted_registry = {} # code -> key
_extension_cache = {} # code -> object
# Don't ever rebind those names: cPickle grabs a reference to them when
# it's initialized, and won't see a rebinding.
def add_extension(module, name, code):
"""Register an extension code."""
code = int(code)
if not 1 <= code <= 0x7fffffff:
raise ValueError, "code out of range"
key = (module, name)
if (_extension_registry.get(key) == code and
_inverted_registry.get(code) == key):
return # Redundant registrations are benign
if key in _extension_registry:
raise ValueError("key %s is already registered with code %s" %
(key, _extension_registry[key]))
if code in _inverted_registry:
raise ValueError("code %s is already in use for key %s" %
(code, _inverted_registry[code]))
_extension_registry[key] = code
_inverted_registry[code] = key
def remove_extension(module, name, code):
"""Unregister an extension code. For testing only."""
key = (module, name)
if (_extension_registry.get(key) != code or
_inverted_registry.get(code) != key):
raise ValueError("key %s is not registered with code %s" %
(key, code))
del _extension_registry[key]
del _inverted_registry[code]
if code in _extension_cache:
del _extension_cache[code]
def clear_extension_cache():
_extension_cache.clear()
# Standard extension code assignments
# Reserved ranges
# First Last Count Purpose
# 1 127 127 Reserved for Python standard library
# 128 191 64 Reserved for Zope
# 192 239 48 Reserved for 3rd parties
# 240 255 16 Reserved for private use (will never be assigned)
# 256 Inf Inf Reserved for future assignment
# Extension codes are assigned by the Python Software Foundation.

View File

@ -0,0 +1,456 @@
"""
csv.py - read/write/investigate CSV files
"""
import re
from functools import reduce
from _csv import Error, __version__, writer, reader, register_dialect, \
unregister_dialect, get_dialect, list_dialects, \
field_size_limit, \
QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
__doc__
from _csv import Dialect as _Dialect
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
"Error", "Dialect", "__doc__", "excel", "excel_tab",
"field_size_limit", "reader", "writer",
"register_dialect", "get_dialect", "list_dialects", "Sniffer",
"unregister_dialect", "__version__", "DictReader", "DictWriter" ]
class Dialect:
"""Describe an Excel dialect.
This must be subclassed (see csv.excel). Valid attributes are:
delimiter, quotechar, escapechar, doublequote, skipinitialspace,
lineterminator, quoting.
"""
_name = ""
_valid = False
# placeholders
delimiter = None
quotechar = None
escapechar = None
doublequote = None
skipinitialspace = None
lineterminator = None
quoting = None
def __init__(self):
if self.__class__ != Dialect:
self._valid = True
self._validate()
def _validate(self):
try:
_Dialect(self)
except TypeError, e:
# We do this for compatibility with py2.3
raise Error(str(e))
class excel(Dialect):
"""Describe the usual properties of Excel-generated CSV files."""
delimiter = ','
quotechar = '"'
doublequote = True
skipinitialspace = False
lineterminator = '\r\n'
quoting = QUOTE_MINIMAL
register_dialect("excel", excel)
class excel_tab(excel):
"""Describe the usual properties of Excel-generated TAB-delimited files."""
delimiter = '\t'
register_dialect("excel-tab", excel_tab)
class DictReader:
def __init__(self, f, fieldnames=None, restkey=None, restval=None,
dialect="excel", *args, **kwds):
self._fieldnames = fieldnames # list of keys for the dict
self.restkey = restkey # key to catch long rows
self.restval = restval # default value for short rows
self.reader = reader(f, dialect, *args, **kwds)
self.dialect = dialect
self.line_num = 0
def __iter__(self):
return self
@property
def fieldnames(self):
if self._fieldnames is None:
try:
self._fieldnames = self.reader.next()
except StopIteration:
pass
self.line_num = self.reader.line_num
return self._fieldnames
# Issue 20004: Because DictReader is a classic class, this setter is
# ignored. At this point in 2.7's lifecycle, it is too late to change the
# base class for fear of breaking working code. If you want to change
# fieldnames without overwriting the getter, set _fieldnames directly.
@fieldnames.setter
def fieldnames(self, value):
self._fieldnames = value
def next(self):
if self.line_num == 0:
# Used only for its side effect.
self.fieldnames
row = self.reader.next()
self.line_num = self.reader.line_num
# unlike the basic reader, we prefer not to return blanks,
# because we will typically wind up with a dict full of None
# values
while row == []:
row = self.reader.next()
d = dict(zip(self.fieldnames, row))
lf = len(self.fieldnames)
lr = len(row)
if lf < lr:
d[self.restkey] = row[lf:]
elif lf > lr:
for key in self.fieldnames[lr:]:
d[key] = self.restval
return d
class DictWriter:
def __init__(self, f, fieldnames, restval="", extrasaction="raise",
dialect="excel", *args, **kwds):
self.fieldnames = fieldnames # list of keys for the dict
self.restval = restval # for writing short dicts
if extrasaction.lower() not in ("raise", "ignore"):
raise ValueError, \
("extrasaction (%s) must be 'raise' or 'ignore'" %
extrasaction)
self.extrasaction = extrasaction
self.writer = writer(f, dialect, *args, **kwds)
def writeheader(self):
header = dict(zip(self.fieldnames, self.fieldnames))
self.writerow(header)
def _dict_to_list(self, rowdict):
if self.extrasaction == "raise":
wrong_fields = [k for k in rowdict if k not in self.fieldnames]
if wrong_fields:
raise ValueError("dict contains fields not in fieldnames: "
+ ", ".join([repr(x) for x in wrong_fields]))
return [rowdict.get(key, self.restval) for key in self.fieldnames]
def writerow(self, rowdict):
return self.writer.writerow(self._dict_to_list(rowdict))
def writerows(self, rowdicts):
rows = []
for rowdict in rowdicts:
rows.append(self._dict_to_list(rowdict))
return self.writer.writerows(rows)
# Guard Sniffer's type checking against builds that exclude complex()
try:
complex
except NameError:
complex = float
class Sniffer:
'''
"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
Returns a Dialect object.
'''
def __init__(self):
# in case there is more than one possible delimiter
self.preferred = [',', '\t', ';', ' ', ':']
def sniff(self, sample, delimiters=None):
"""
Returns a dialect (or None) corresponding to the sample
"""
quotechar, doublequote, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters)
if not delimiter:
raise Error, "Could not determine delimiter"
class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
quoting = QUOTE_MINIMAL
# escapechar = ''
dialect.doublequote = doublequote
dialect.delimiter = delimiter
# _csv.reader won't accept a quotechar of ''
dialect.quotechar = quotechar or '"'
dialect.skipinitialspace = skipinitialspace
return dialect
def _guess_quote_and_delimiter(self, data, delimiters):
"""
Looks for text enclosed between two identical quotes
(the probable quotechar) which are preceded and followed
by the same character (the probable delimiter).
For example:
,'some text',
The quote with the most wins, same with the delimiter.
If there is no quotechar the delimiter can't be determined
this way.
"""
matches = []
for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
'(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
matches = regexp.findall(data)
if matches:
break
if not matches:
# (quotechar, doublequote, delimiter, skipinitialspace)
return ('', False, None, 0)
quotes = {}
delims = {}
spaces = 0
for m in matches:
n = regexp.groupindex['quote'] - 1
key = m[n]
if key:
quotes[key] = quotes.get(key, 0) + 1
try:
n = regexp.groupindex['delim'] - 1
key = m[n]
except KeyError:
continue
if key and (delimiters is None or key in delimiters):
delims[key] = delims.get(key, 0) + 1
try:
n = regexp.groupindex['space'] - 1
except KeyError:
continue
if m[n]:
spaces += 1
quotechar = reduce(lambda a, b, quotes = quotes:
(quotes[a] > quotes[b]) and a or b, quotes.keys())
if delims:
delim = reduce(lambda a, b, delims = delims:
(delims[a] > delims[b]) and a or b, delims.keys())
skipinitialspace = delims[delim] == spaces
if delim == '\n': # most likely a file with a single column
delim = ''
else:
# there is *no* delimiter, it's a single column of quoted data
delim = ''
skipinitialspace = 0
# if we see an extra quote between delimiters, we've got a
# double quoted format
dq_regexp = re.compile(
r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
if dq_regexp.search(data):
doublequote = True
else:
doublequote = False
return (quotechar, doublequote, delim, skipinitialspace)
def _guess_delimiter(self, data, delimiters):
"""
The delimiter /should/ occur the same number of times on
each row. However, due to malformed data, it may not. We don't want
an all or nothing approach, so we allow for small variations in this
number.
1) build a table of the frequency of each character on every line.
2) build a table of frequencies of this frequency (meta-frequency?),
e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
7 times in 2 rows'
3) use the mode of the meta-frequency to determine the /expected/
frequency for that character
4) find out how often the character actually meets that goal
5) the character that best meets its goal is the delimiter
For performance reasons, the data is evaluated in chunks, so it can
try and evaluate the smallest portion of the data possible, evaluating
additional chunks as necessary.
"""
data = filter(None, data.split('\n'))
ascii = [chr(c) for c in range(127)] # 7-bit ASCII
# build frequency tables
chunkLength = min(10, len(data))
iteration = 0
charFrequency = {}
modes = {}
delims = {}
start, end = 0, min(chunkLength, len(data))
while start < len(data):
iteration += 1
for line in data[start:end]:
for char in ascii:
metaFrequency = charFrequency.get(char, {})
# must count even if frequency is 0
freq = line.count(char)
# value is the mode
metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
charFrequency[char] = metaFrequency
for char in charFrequency.keys():
items = charFrequency[char].items()
if len(items) == 1 and items[0][0] == 0:
continue
# get the mode of the frequencies
if len(items) > 1:
modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b,
items)
# adjust the mode - subtract the sum of all
# other frequencies
items.remove(modes[char])
modes[char] = (modes[char][0], modes[char][1]
- reduce(lambda a, b: (0, a[1] + b[1]),
items)[1])
else:
modes[char] = items[0]
# build a list of possible delimiters
modeList = modes.items()
total = float(chunkLength * iteration)
# (rows of consistent data) / (number of rows) = 100%
consistency = 1.0
# minimum consistency threshold
threshold = 0.9
while len(delims) == 0 and consistency >= threshold:
for k, v in modeList:
if v[0] > 0 and v[1] > 0:
if ((v[1]/total) >= consistency and
(delimiters is None or k in delimiters)):
delims[k] = v
consistency -= 0.01
if len(delims) == 1:
delim = delims.keys()[0]
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)
# analyze another chunkLength lines
start = end
end += chunkLength
if not delims:
return ('', 0)
# if there's more than one, fall back to a 'preferred' list
if len(delims) > 1:
for d in self.preferred:
if d in delims.keys():
skipinitialspace = (data[0].count(d) ==
data[0].count("%c " % d))
return (d, skipinitialspace)
# nothing else indicates a preference, pick the character that
# dominates(?)
items = [(v,k) for (k,v) in delims.items()]
items.sort()
delim = items[-1][1]
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)
def has_header(self, sample):
# Creates a dictionary of types of data in each column. If any
# column is of a single type (say, integers), *except* for the first
# row, then the first row is presumed to be labels. If the type
# can't be determined, it is assumed to be a string in which case
# the length of the string is the determining factor: if all of the
# rows except for the first are the same length, it's a header.
# Finally, a 'vote' is taken at the end for each column, adding or
# subtracting from the likelihood of the first row being a header.
rdr = reader(StringIO(sample), self.sniff(sample))
header = rdr.next() # assume first row is header
columns = len(header)
columnTypes = {}
for i in range(columns): columnTypes[i] = None
checked = 0
for row in rdr:
# arbitrary number of rows to check, to keep it sane
if checked > 20:
break
checked += 1
if len(row) != columns:
continue # skip rows that have irregular number of columns
for col in columnTypes.keys():
for thisType in [int, long, float, complex]:
try:
thisType(row[col])
break
except (ValueError, OverflowError):
pass
else:
# fallback to length of string
thisType = len(row[col])
# treat longs as ints
if thisType == long:
thisType = int
if thisType != columnTypes[col]:
if columnTypes[col] is None: # add new column type
columnTypes[col] = thisType
else:
# type is inconsistent, remove column from
# consideration
del columnTypes[col]
# finally, compare results against first row and "vote"
# on whether it's a header
hasHeader = 0
for col, colType in columnTypes.items():
if type(colType) == type(0): # it's a length
if len(header[col]) != colType:
hasHeader += 1
else:
hasHeader -= 1
else: # attempt typecast
try:
colType(header[col])
except (ValueError, TypeError):
hasHeader += 1
else:
hasHeader -= 1
return hasHeader > 0

View File

@ -0,0 +1,224 @@
"""Disassembler of Python byte code into mnemonics."""
import sys
import types
from opcode import *
from opcode import __all__ as _opcodes_all
__all__ = ["dis", "disassemble", "distb", "disco",
"findlinestarts", "findlabels"] + _opcodes_all
del _opcodes_all
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
types.ClassType, type)
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
tb = sys.last_traceback
except AttributeError:
raise RuntimeError, "no last traceback to disassemble"
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
code = co.co_code
labels = findlabels(code)
linestarts = dict(findlinestarts(co))
n = len(code)
i = 0
extended_arg = 0
free = None
while i < n:
c = code[i]
op = ord(c)
if i in linestarts:
if i > 0:
print
print "%3d" % linestarts[i],
else:
print ' ',
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(20),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
print repr(oparg).rjust(5),
if op in hasconst:
print '(' + repr(co.co_consts[oparg]) + ')',
elif op in hasname:
print '(' + co.co_names[oparg] + ')',
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
print '(' + co.co_varnames[oparg] + ')',
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
print '(' + free[oparg] + ')',
print
def disassemble_string(code, lasti=-1, varnames=None, names=None,
constants=None):
labels = findlabels(code)
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(15),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
print repr(oparg).rjust(5),
if op in hasconst:
if constants:
print '(' + repr(constants[oparg]) + ')',
else:
print '(%d)'%oparg,
elif op in hasname:
if names is not None:
print '(' + names[oparg] + ')',
else:
print '(%d)'%oparg,
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
if varnames:
print '(' + varnames[oparg] + ')',
else:
print '(%d)' % oparg,
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
print
disco = disassemble # XXX For backwards compatibility
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
label = -1
if op in hasjrel:
label = i+oparg
elif op in hasjabs:
label = oparg
if label >= 0:
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
"""Find the offsets in a byte code which are start of lines in the source.
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
line_increments = [ord(c) for c in code.co_lnotab[1::2]]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
lineno += line_incr
if lineno != lastlineno:
yield (addr, lineno)
def _test():
"""Simple test program to disassemble a file."""
if sys.argv[1:]:
if sys.argv[2:]:
sys.stderr.write("usage: python dis.py [-|file]\n")
sys.exit(2)
fn = sys.argv[1]
if not fn or fn == "-":
fn = None
else:
fn = None
if fn is None:
f = sys.stdin
else:
f = open(fn)
source = f.read()
if fn is not None:
f.close()
else:
fn = "<stdin>"
code = compile(source, fn, "exec")
dis(code)
if __name__ == "__main__":
_test()

View File

@ -0,0 +1,145 @@
"""Drop-in replacement for the thread module.
Meant to be used as a brain-dead substitute so that threaded code does
not need to be rewritten for when the thread module is not present.
Suggested usage is::
try:
import thread
except ImportError:
import dummy_thread as thread
"""
# Exports only things specified by thread documentation;
# skipping obsolete synonyms allocate(), start_new(), exit_thread().
__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock',
'interrupt_main', 'LockType']
import traceback as _traceback
class error(Exception):
"""Dummy implementation of thread.error."""
def __init__(self, *args):
self.args = args
def start_new_thread(function, args, kwargs={}):
"""Dummy implementation of thread.start_new_thread().
Compatibility is maintained by making sure that ``args`` is a
tuple and ``kwargs`` is a dictionary. If an exception is raised
and it is SystemExit (which can be done by thread.exit()) it is
caught and nothing is done; all other exceptions are printed out
by using traceback.print_exc().
If the executed function calls interrupt_main the KeyboardInterrupt will be
raised when the function returns.
"""
if type(args) != type(tuple()):
raise TypeError("2nd arg must be a tuple")
if type(kwargs) != type(dict()):
raise TypeError("3rd arg must be a dict")
global _main
_main = False
try:
function(*args, **kwargs)
except SystemExit:
pass
except:
_traceback.print_exc()
_main = True
global _interrupt
if _interrupt:
_interrupt = False
raise KeyboardInterrupt
def exit():
"""Dummy implementation of thread.exit()."""
raise SystemExit
def get_ident():
"""Dummy implementation of thread.get_ident().
Since this module should only be used when threadmodule is not
available, it is safe to assume that the current process is the
only thread. Thus a constant can be safely returned.
"""
return -1
def allocate_lock():
"""Dummy implementation of thread.allocate_lock()."""
return LockType()
def stack_size(size=None):
"""Dummy implementation of thread.stack_size()."""
if size is not None:
raise error("setting thread stack size not supported")
return 0
class LockType(object):
"""Class implementing dummy implementation of thread.LockType.
Compatibility is maintained by maintaining self.locked_status
which is a boolean that stores the state of the lock. Pickling of
the lock, though, should not be done since if the thread module is
then used with an unpickled ``lock()`` from here problems could
occur from this class not having atomic methods.
"""
def __init__(self):
self.locked_status = False
def acquire(self, waitflag=None):
"""Dummy implementation of acquire().
For blocking calls, self.locked_status is automatically set to
True and returned appropriately based on value of
``waitflag``. If it is non-blocking, then the value is
actually checked and not set if it is already acquired. This
is all done so that threading.Condition's assert statements
aren't triggered and throw a little fit.
"""
if waitflag is None or waitflag:
self.locked_status = True
return True
else:
if not self.locked_status:
self.locked_status = True
return True
else:
return False
__enter__ = acquire
def __exit__(self, typ, val, tb):
self.release()
def release(self):
"""Release the dummy lock."""
# XXX Perhaps shouldn't actually bother to test? Could lead
# to problems for complex, threaded code.
if not self.locked_status:
raise error
self.locked_status = False
return True
def locked(self):
return self.locked_status
# Used to signal that interrupt_main was called in a "thread"
_interrupt = False
# True when not executing in a "thread"
_main = True
def interrupt_main():
"""Set _interrupt flag to True to have start_new_thread raise
KeyboardInterrupt upon exiting."""
if _main:
raise KeyboardInterrupt
else:
global _interrupt
_interrupt = True

View File

@ -0,0 +1,157 @@
""" Standard "encodings" Package
Standard Python encoding modules are stored in this package
directory.
Codec modules must have names corresponding to normalized encoding
names as defined in the normalize_encoding() function below, e.g.
'utf-8' must be implemented by the module 'utf_8.py'.
Each codec module must export the following interface:
* getregentry() -> codecs.CodecInfo object
The getregentry() API must a CodecInfo object with encoder, decoder,
incrementalencoder, incrementaldecoder, streamwriter and streamreader
atttributes which adhere to the Python Codec Interface Standard.
In addition, a module may optionally also define the following
APIs which are then used by the package's codec search function:
* getaliases() -> sequence of encoding name strings to use as aliases
Alias names returned by getaliases() must be normalized encoding
names as defined by normalize_encoding().
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
from encodings import aliases
import __builtin__
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
_norm_encoding_map = (' . '
'0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
' abcdefghijklmnopqrstuvwxyz '
' '
' '
' ')
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
pass
def normalize_encoding(encoding):
""" Normalize an encoding name.
Normalization works as follows: all non-alphanumeric
characters except the dot used for Python package names are
collapsed and replaced with a single underscore, e.g. ' -;#'
becomes '_'. Leading and trailing underscores are removed.
Note that encoding names should be ASCII only; if they do use
non-ASCII characters, these must be Latin-1 compatible.
"""
# Make sure we have an 8-bit string, because .translate() works
# differently for Unicode strings.
if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode):
# Note that .encode('latin-1') does *not* use the codec
# registry, so this call doesn't recurse. (See unicodeobject.c
# PyUnicode_AsEncodedString() for details)
encoding = encoding.encode('latin-1')
return '_'.join(encoding.translate(_norm_encoding_map).split())
def search_function(encoding):
# Cache lookup
entry = _cache.get(encoding, _unknown)
if entry is not _unknown:
return entry
# Import the module:
#
# First try to find an alias for the normalized encoding
# name and lookup the module using the aliased name, then try to
# lookup the module using the standard import scheme, i.e. first
# try in the encodings package, then at top-level.
#
norm_encoding = normalize_encoding(encoding)
aliased_encoding = _aliases.get(norm_encoding) or \
_aliases.get(norm_encoding.replace('.', '_'))
if aliased_encoding is not None:
modnames = [aliased_encoding,
norm_encoding]
else:
modnames = [norm_encoding]
for modname in modnames:
if not modname or '.' in modname:
continue
try:
# Import is absolute to prevent the possibly malicious import of a
# module with side-effects that is not in the 'encodings' package.
mod = __import__('encodings.' + modname, fromlist=_import_tail,
level=0)
except ImportError:
pass
else:
break
else:
mod = None
try:
getregentry = mod.getregentry
except AttributeError:
# Not a codec module
mod = None
if mod is None:
# Cache misses
_cache[encoding] = None
return None
# Now ask the module for the registry entry
entry = getregentry()
if not isinstance(entry, codecs.CodecInfo):
if not 4 <= len(entry) <= 7:
raise CodecRegistryError,\
'module "%s" (%s) failed to register' % \
(mod.__name__, mod.__file__)
if not hasattr(entry[0], '__call__') or \
not hasattr(entry[1], '__call__') or \
(entry[2] is not None and not hasattr(entry[2], '__call__')) or \
(entry[3] is not None and not hasattr(entry[3], '__call__')) or \
(len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \
(len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):
raise CodecRegistryError,\
'incompatible codecs in module "%s" (%s)' % \
(mod.__name__, mod.__file__)
if len(entry)<7 or entry[6] is None:
entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
entry = codecs.CodecInfo(*entry)
# Cache the codec registry entry
_cache[encoding] = entry
# Register its aliases (without overwriting previously registered
# aliases)
try:
codecaliases = mod.getaliases()
except AttributeError:
pass
else:
for alias in codecaliases:
if alias not in _aliases:
_aliases[alias] = modname
# Return the registry entry
return entry
# Register the search_function in the Python codec registry
codecs.register(search_function)

View File

@ -0,0 +1,527 @@
""" Encoding Aliases Support
This module is used by the encodings package search function to
map encodings names to module names.
Note that the search function normalizes the encoding names before
doing the lookup, so the mapping will have to map normalized
encoding names to module names.
Contents:
The following aliases dictionary contains mappings of all IANA
character set names for which the Python core library provides
codecs. In addition to these, a few Python specific codec
aliases have also been added.
"""
aliases = {
# Please keep this list sorted alphabetically by value !
# ascii codec
'646' : 'ascii',
'ansi_x3.4_1968' : 'ascii',
'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
'ansi_x3.4_1986' : 'ascii',
'cp367' : 'ascii',
'csascii' : 'ascii',
'ibm367' : 'ascii',
'iso646_us' : 'ascii',
'iso_646.irv_1991' : 'ascii',
'iso_ir_6' : 'ascii',
'us' : 'ascii',
'us_ascii' : 'ascii',
# base64_codec codec
'base64' : 'base64_codec',
'base_64' : 'base64_codec',
# big5 codec
'big5_tw' : 'big5',
'csbig5' : 'big5',
# big5hkscs codec
'big5_hkscs' : 'big5hkscs',
'hkscs' : 'big5hkscs',
# bz2_codec codec
'bz2' : 'bz2_codec',
# cp037 codec
'037' : 'cp037',
'csibm037' : 'cp037',
'ebcdic_cp_ca' : 'cp037',
'ebcdic_cp_nl' : 'cp037',
'ebcdic_cp_us' : 'cp037',
'ebcdic_cp_wt' : 'cp037',
'ibm037' : 'cp037',
'ibm039' : 'cp037',
# cp1026 codec
'1026' : 'cp1026',
'csibm1026' : 'cp1026',
'ibm1026' : 'cp1026',
# cp1140 codec
'1140' : 'cp1140',
'ibm1140' : 'cp1140',
# cp1250 codec
'1250' : 'cp1250',
'windows_1250' : 'cp1250',
# cp1251 codec
'1251' : 'cp1251',
'windows_1251' : 'cp1251',
# cp1252 codec
'1252' : 'cp1252',
'windows_1252' : 'cp1252',
# cp1253 codec
'1253' : 'cp1253',
'windows_1253' : 'cp1253',
# cp1254 codec
'1254' : 'cp1254',
'windows_1254' : 'cp1254',
# cp1255 codec
'1255' : 'cp1255',
'windows_1255' : 'cp1255',
# cp1256 codec
'1256' : 'cp1256',
'windows_1256' : 'cp1256',
# cp1257 codec
'1257' : 'cp1257',
'windows_1257' : 'cp1257',
# cp1258 codec
'1258' : 'cp1258',
'windows_1258' : 'cp1258',
# cp424 codec
'424' : 'cp424',
'csibm424' : 'cp424',
'ebcdic_cp_he' : 'cp424',
'ibm424' : 'cp424',
# cp437 codec
'437' : 'cp437',
'cspc8codepage437' : 'cp437',
'ibm437' : 'cp437',
# cp500 codec
'500' : 'cp500',
'csibm500' : 'cp500',
'ebcdic_cp_be' : 'cp500',
'ebcdic_cp_ch' : 'cp500',
'ibm500' : 'cp500',
# cp775 codec
'775' : 'cp775',
'cspc775baltic' : 'cp775',
'ibm775' : 'cp775',
# cp850 codec
'850' : 'cp850',
'cspc850multilingual' : 'cp850',
'ibm850' : 'cp850',
# cp852 codec
'852' : 'cp852',
'cspcp852' : 'cp852',
'ibm852' : 'cp852',
# cp855 codec
'855' : 'cp855',
'csibm855' : 'cp855',
'ibm855' : 'cp855',
# cp857 codec
'857' : 'cp857',
'csibm857' : 'cp857',
'ibm857' : 'cp857',
# cp858 codec
'858' : 'cp858',
'csibm858' : 'cp858',
'ibm858' : 'cp858',
# cp860 codec
'860' : 'cp860',
'csibm860' : 'cp860',
'ibm860' : 'cp860',
# cp861 codec
'861' : 'cp861',
'cp_is' : 'cp861',
'csibm861' : 'cp861',
'ibm861' : 'cp861',
# cp862 codec
'862' : 'cp862',
'cspc862latinhebrew' : 'cp862',
'ibm862' : 'cp862',
# cp863 codec
'863' : 'cp863',
'csibm863' : 'cp863',
'ibm863' : 'cp863',
# cp864 codec
'864' : 'cp864',
'csibm864' : 'cp864',
'ibm864' : 'cp864',
# cp865 codec
'865' : 'cp865',
'csibm865' : 'cp865',
'ibm865' : 'cp865',
# cp866 codec
'866' : 'cp866',
'csibm866' : 'cp866',
'ibm866' : 'cp866',
# cp869 codec
'869' : 'cp869',
'cp_gr' : 'cp869',
'csibm869' : 'cp869',
'ibm869' : 'cp869',
# cp932 codec
'932' : 'cp932',
'ms932' : 'cp932',
'mskanji' : 'cp932',
'ms_kanji' : 'cp932',
# cp949 codec
'949' : 'cp949',
'ms949' : 'cp949',
'uhc' : 'cp949',
# cp950 codec
'950' : 'cp950',
'ms950' : 'cp950',
# euc_jis_2004 codec
'jisx0213' : 'euc_jis_2004',
'eucjis2004' : 'euc_jis_2004',
'euc_jis2004' : 'euc_jis_2004',
# euc_jisx0213 codec
'eucjisx0213' : 'euc_jisx0213',
# euc_jp codec
'eucjp' : 'euc_jp',
'ujis' : 'euc_jp',
'u_jis' : 'euc_jp',
# euc_kr codec
'euckr' : 'euc_kr',
'korean' : 'euc_kr',
'ksc5601' : 'euc_kr',
'ks_c_5601' : 'euc_kr',
'ks_c_5601_1987' : 'euc_kr',
'ksx1001' : 'euc_kr',
'ks_x_1001' : 'euc_kr',
# gb18030 codec
'gb18030_2000' : 'gb18030',
# gb2312 codec
'chinese' : 'gb2312',
'csiso58gb231280' : 'gb2312',
'euc_cn' : 'gb2312',
'euccn' : 'gb2312',
'eucgb2312_cn' : 'gb2312',
'gb2312_1980' : 'gb2312',
'gb2312_80' : 'gb2312',
'iso_ir_58' : 'gb2312',
# gbk codec
'936' : 'gbk',
'cp936' : 'gbk',
'ms936' : 'gbk',
# hex_codec codec
'hex' : 'hex_codec',
# hp_roman8 codec
'roman8' : 'hp_roman8',
'r8' : 'hp_roman8',
'csHPRoman8' : 'hp_roman8',
# hz codec
'hzgb' : 'hz',
'hz_gb' : 'hz',
'hz_gb_2312' : 'hz',
# iso2022_jp codec
'csiso2022jp' : 'iso2022_jp',
'iso2022jp' : 'iso2022_jp',
'iso_2022_jp' : 'iso2022_jp',
# iso2022_jp_1 codec
'iso2022jp_1' : 'iso2022_jp_1',
'iso_2022_jp_1' : 'iso2022_jp_1',
# iso2022_jp_2 codec
'iso2022jp_2' : 'iso2022_jp_2',
'iso_2022_jp_2' : 'iso2022_jp_2',
# iso2022_jp_2004 codec
'iso_2022_jp_2004' : 'iso2022_jp_2004',
'iso2022jp_2004' : 'iso2022_jp_2004',
# iso2022_jp_3 codec
'iso2022jp_3' : 'iso2022_jp_3',
'iso_2022_jp_3' : 'iso2022_jp_3',
# iso2022_jp_ext codec
'iso2022jp_ext' : 'iso2022_jp_ext',
'iso_2022_jp_ext' : 'iso2022_jp_ext',
# iso2022_kr codec
'csiso2022kr' : 'iso2022_kr',
'iso2022kr' : 'iso2022_kr',
'iso_2022_kr' : 'iso2022_kr',
# iso8859_10 codec
'csisolatin6' : 'iso8859_10',
'iso_8859_10' : 'iso8859_10',
'iso_8859_10_1992' : 'iso8859_10',
'iso_ir_157' : 'iso8859_10',
'l6' : 'iso8859_10',
'latin6' : 'iso8859_10',
# iso8859_11 codec
'thai' : 'iso8859_11',
'iso_8859_11' : 'iso8859_11',
'iso_8859_11_2001' : 'iso8859_11',
# iso8859_13 codec
'iso_8859_13' : 'iso8859_13',
'l7' : 'iso8859_13',
'latin7' : 'iso8859_13',
# iso8859_14 codec
'iso_8859_14' : 'iso8859_14',
'iso_8859_14_1998' : 'iso8859_14',
'iso_celtic' : 'iso8859_14',
'iso_ir_199' : 'iso8859_14',
'l8' : 'iso8859_14',
'latin8' : 'iso8859_14',
# iso8859_15 codec
'iso_8859_15' : 'iso8859_15',
'l9' : 'iso8859_15',
'latin9' : 'iso8859_15',
# iso8859_16 codec
'iso_8859_16' : 'iso8859_16',
'iso_8859_16_2001' : 'iso8859_16',
'iso_ir_226' : 'iso8859_16',
'l10' : 'iso8859_16',
'latin10' : 'iso8859_16',
# iso8859_2 codec
'csisolatin2' : 'iso8859_2',
'iso_8859_2' : 'iso8859_2',
'iso_8859_2_1987' : 'iso8859_2',
'iso_ir_101' : 'iso8859_2',
'l2' : 'iso8859_2',
'latin2' : 'iso8859_2',
# iso8859_3 codec
'csisolatin3' : 'iso8859_3',
'iso_8859_3' : 'iso8859_3',
'iso_8859_3_1988' : 'iso8859_3',
'iso_ir_109' : 'iso8859_3',
'l3' : 'iso8859_3',
'latin3' : 'iso8859_3',
# iso8859_4 codec
'csisolatin4' : 'iso8859_4',
'iso_8859_4' : 'iso8859_4',
'iso_8859_4_1988' : 'iso8859_4',
'iso_ir_110' : 'iso8859_4',
'l4' : 'iso8859_4',
'latin4' : 'iso8859_4',
# iso8859_5 codec
'csisolatincyrillic' : 'iso8859_5',
'cyrillic' : 'iso8859_5',
'iso_8859_5' : 'iso8859_5',
'iso_8859_5_1988' : 'iso8859_5',
'iso_ir_144' : 'iso8859_5',
# iso8859_6 codec
'arabic' : 'iso8859_6',
'asmo_708' : 'iso8859_6',
'csisolatinarabic' : 'iso8859_6',
'ecma_114' : 'iso8859_6',
'iso_8859_6' : 'iso8859_6',
'iso_8859_6_1987' : 'iso8859_6',
'iso_ir_127' : 'iso8859_6',
# iso8859_7 codec
'csisolatingreek' : 'iso8859_7',
'ecma_118' : 'iso8859_7',
'elot_928' : 'iso8859_7',
'greek' : 'iso8859_7',
'greek8' : 'iso8859_7',
'iso_8859_7' : 'iso8859_7',
'iso_8859_7_1987' : 'iso8859_7',
'iso_ir_126' : 'iso8859_7',
# iso8859_8 codec
'csisolatinhebrew' : 'iso8859_8',
'hebrew' : 'iso8859_8',
'iso_8859_8' : 'iso8859_8',
'iso_8859_8_1988' : 'iso8859_8',
'iso_ir_138' : 'iso8859_8',
# iso8859_9 codec
'csisolatin5' : 'iso8859_9',
'iso_8859_9' : 'iso8859_9',
'iso_8859_9_1989' : 'iso8859_9',
'iso_ir_148' : 'iso8859_9',
'l5' : 'iso8859_9',
'latin5' : 'iso8859_9',
# johab codec
'cp1361' : 'johab',
'ms1361' : 'johab',
# koi8_r codec
'cskoi8r' : 'koi8_r',
# latin_1 codec
#
# Note that the latin_1 codec is implemented internally in C and a
# lot faster than the charmap codec iso8859_1 which uses the same
# encoding. This is why we discourage the use of the iso8859_1
# codec and alias it to latin_1 instead.
#
'8859' : 'latin_1',
'cp819' : 'latin_1',
'csisolatin1' : 'latin_1',
'ibm819' : 'latin_1',
'iso8859' : 'latin_1',
'iso8859_1' : 'latin_1',
'iso_8859_1' : 'latin_1',
'iso_8859_1_1987' : 'latin_1',
'iso_ir_100' : 'latin_1',
'l1' : 'latin_1',
'latin' : 'latin_1',
'latin1' : 'latin_1',
# mac_cyrillic codec
'maccyrillic' : 'mac_cyrillic',
# mac_greek codec
'macgreek' : 'mac_greek',
# mac_iceland codec
'maciceland' : 'mac_iceland',
# mac_latin2 codec
'maccentraleurope' : 'mac_latin2',
'maclatin2' : 'mac_latin2',
# mac_roman codec
'macroman' : 'mac_roman',
# mac_turkish codec
'macturkish' : 'mac_turkish',
# mbcs codec
'dbcs' : 'mbcs',
# ptcp154 codec
'csptcp154' : 'ptcp154',
'pt154' : 'ptcp154',
'cp154' : 'ptcp154',
'cyrillic_asian' : 'ptcp154',
# quopri_codec codec
'quopri' : 'quopri_codec',
'quoted_printable' : 'quopri_codec',
'quotedprintable' : 'quopri_codec',
# rot_13 codec
'rot13' : 'rot_13',
# shift_jis codec
'csshiftjis' : 'shift_jis',
'shiftjis' : 'shift_jis',
'sjis' : 'shift_jis',
's_jis' : 'shift_jis',
# shift_jis_2004 codec
'shiftjis2004' : 'shift_jis_2004',
'sjis_2004' : 'shift_jis_2004',
's_jis_2004' : 'shift_jis_2004',
# shift_jisx0213 codec
'shiftjisx0213' : 'shift_jisx0213',
'sjisx0213' : 'shift_jisx0213',
's_jisx0213' : 'shift_jisx0213',
# tactis codec
'tis260' : 'tactis',
# tis_620 codec
'tis620' : 'tis_620',
'tis_620_0' : 'tis_620',
'tis_620_2529_0' : 'tis_620',
'tis_620_2529_1' : 'tis_620',
'iso_ir_166' : 'tis_620',
# utf_16 codec
'u16' : 'utf_16',
'utf16' : 'utf_16',
# utf_16_be codec
'unicodebigunmarked' : 'utf_16_be',
'utf_16be' : 'utf_16_be',
# utf_16_le codec
'unicodelittleunmarked' : 'utf_16_le',
'utf_16le' : 'utf_16_le',
# utf_32 codec
'u32' : 'utf_32',
'utf32' : 'utf_32',
# utf_32_be codec
'utf_32be' : 'utf_32_be',
# utf_32_le codec
'utf_32le' : 'utf_32_le',
# utf_7 codec
'u7' : 'utf_7',
'utf7' : 'utf_7',
'unicode_1_1_utf_7' : 'utf_7',
# utf_8 codec
'u8' : 'utf_8',
'utf' : 'utf_8',
'utf8' : 'utf_8',
'utf8_ucs2' : 'utf_8',
'utf8_ucs4' : 'utf_8',
# uu_codec codec
'uu' : 'uu_codec',
# zlib_codec codec
'zip' : 'zlib_codec',
'zlib' : 'zlib_codec',
}

View File

@ -0,0 +1,50 @@
""" Python 'ascii' Codec
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.ascii_encode
decode = codecs.ascii_decode
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.ascii_encode(input, self.errors)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.ascii_decode(input, self.errors)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
class StreamConverter(StreamWriter,StreamReader):
encode = codecs.ascii_decode
decode = codecs.ascii_encode
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='ascii',
encode=Codec.encode,
decode=Codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,79 @@
""" Python 'base64_codec' Codec - base64 content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, base64
### Codec APIs
def base64_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = base64.encodestring(input)
return (output, len(input))
def base64_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = base64.decodestring(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input,errors='strict'):
return base64_encode(input,errors)
def decode(self, input,errors='strict'):
return base64_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return base64.encodestring(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return base64.decodestring(input)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='base64',
encode=base64_encode,
decode=base64_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,39 @@
#
# big5.py: Python Unicode Codec for BIG5
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_tw, codecs
import _multibytecodec as mbc
codec = _codecs_tw.getcodec('big5')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='big5',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# big5hkscs.py: Python Unicode Codec for BIG5HKSCS
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_hk, codecs
import _multibytecodec as mbc
codec = _codecs_hk.getcodec('big5hkscs')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='big5hkscs',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,102 @@
""" Python 'bz2_codec' Codec - bz2 compression encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Adapted by Raymond Hettinger from zlib_codec.py which was written
by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs
import bz2 # this codec needs the optional bz2 module !
### Codec APIs
def bz2_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = bz2.compress(input)
return (output, len(input))
def bz2_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = bz2.decompress(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return bz2_encode(input, errors)
def decode(self, input, errors='strict'):
return bz2_decode(input, errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.compressobj = bz2.BZ2Compressor()
def encode(self, input, final=False):
if final:
c = self.compressobj.compress(input)
return c + self.compressobj.flush()
else:
return self.compressobj.compress(input)
def reset(self):
self.compressobj = bz2.BZ2Compressor()
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors='strict'):
assert errors == 'strict'
self.errors = errors
self.decompressobj = bz2.BZ2Decompressor()
def decode(self, input, final=False):
try:
return self.decompressobj.decompress(input)
except EOFError:
return ''
def reset(self):
self.decompressobj = bz2.BZ2Decompressor()
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name="bz2",
encode=bz2_encode,
decode=bz2_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,69 @@
""" Generic Python Character Mapping Codec.
Use this codec directly rather than through the automatic
conversion mechanisms supplied by unicode() and .encode().
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
# Note: Binding these as C functions will result in the class not
# converting them to methods. This is intended.
encode = codecs.charmap_encode
decode = codecs.charmap_decode
class IncrementalEncoder(codecs.IncrementalEncoder):
def __init__(self, errors='strict', mapping=None):
codecs.IncrementalEncoder.__init__(self, errors)
self.mapping = mapping
def encode(self, input, final=False):
return codecs.charmap_encode(input, self.errors, self.mapping)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def __init__(self, errors='strict', mapping=None):
codecs.IncrementalDecoder.__init__(self, errors)
self.mapping = mapping
def decode(self, input, final=False):
return codecs.charmap_decode(input, self.errors, self.mapping)[0]
class StreamWriter(Codec,codecs.StreamWriter):
def __init__(self,stream,errors='strict',mapping=None):
codecs.StreamWriter.__init__(self,stream,errors)
self.mapping = mapping
def encode(self,input,errors='strict'):
return Codec.encode(input,errors,self.mapping)
class StreamReader(Codec,codecs.StreamReader):
def __init__(self,stream,errors='strict',mapping=None):
codecs.StreamReader.__init__(self,stream,errors)
self.mapping = mapping
def decode(self,input,errors='strict'):
return Codec.decode(input,errors,self.mapping)
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='charmap',
encode=Codec.encode,
decode=Codec.decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp037',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0A -> CONTROL
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> CONTROL
u'\x8f' # 0x1B -> CONTROL
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2A -> CONTROL
u'\x8b' # 0x2B -> CONTROL
u'\x8c' # 0x2C -> CONTROL
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3A -> CONTROL
u'\x9b' # 0x3B -> CONTROL
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> CONTROL
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\xa0' # 0x41 -> NO-BREAK SPACE
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
u'\xa2' # 0x4A -> CENT SIGN
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'|' # 0x4F -> VERTICAL LINE
u'&' # 0x50 -> AMPERSAND
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'!' # 0x5A -> EXCLAMATION MARK
u'$' # 0x5B -> DOLLAR SIGN
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'\xac' # 0x5F -> NOT SIGN
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xa6' # 0x6A -> BROKEN BAR
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7A -> COLON
u'#' # 0x7B -> NUMBER SIGN
u'@' # 0x7C -> COMMERCIAL AT
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'"' # 0x7F -> QUOTATION MARK
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
u'\xb1' # 0x8F -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
u'\xb8' # 0x9D -> CEDILLA
u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
u'\xa4' # 0x9F -> CURRENCY SIGN
u'\xb5' # 0xA0 -> MICRO SIGN
u'~' # 0xA1 -> TILDE
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
u'\xbf' # 0xAB -> INVERTED QUESTION MARK
u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
u'\xae' # 0xAF -> REGISTERED SIGN
u'^' # 0xB0 -> CIRCUMFLEX ACCENT
u'\xa3' # 0xB1 -> POUND SIGN
u'\xa5' # 0xB2 -> YEN SIGN
u'\xb7' # 0xB3 -> MIDDLE DOT
u'\xa9' # 0xB4 -> COPYRIGHT SIGN
u'\xa7' # 0xB5 -> SECTION SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
u'[' # 0xBA -> LEFT SQUARE BRACKET
u']' # 0xBB -> RIGHT SQUARE BRACKET
u'\xaf' # 0xBC -> MACRON
u'\xa8' # 0xBD -> DIAERESIS
u'\xb4' # 0xBE -> ACUTE ACCENT
u'\xd7' # 0xBF -> MULTIPLICATION SIGN
u'{' # 0xC0 -> LEFT CURLY BRACKET
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
u'}' # 0xD0 -> RIGHT CURLY BRACKET
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xDA -> SUPERSCRIPT ONE
u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\\' # 0xE0 -> REVERSE SOLIDUS
u'\xf7' # 0xE1 -> DIVISION SIGN
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
u'\x9f' # 0xFF -> CONTROL
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1006',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO
u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE
u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO
u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE
u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR
u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE
u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX
u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN
u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT
u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE
u'\u060c' # 0xAB -> ARABIC COMMA
u'\u061b' # 0xAC -> ARABIC SEMICOLON
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u061f' # 0xAE -> ARABIC QUESTION MARK
u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM
u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM
u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM
u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM
u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM
u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM
u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM
u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM
u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM
u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM
u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM
u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM
u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM
u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM
u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM
u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM
u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM
u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM
u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM
u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM
u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM
u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM
u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN
u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM
u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM
u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM
u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM
u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM
u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM
u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM
u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM
u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM
u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM
u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM
u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM
u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM
u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM
u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM
u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM
u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM
u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM
u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM
u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM
u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM
u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM
u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM
u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM
u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM
u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM
u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM
u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM
u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM
u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM
u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM
u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM
u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM
u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM
u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM
u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM
u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM
u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM
u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM
u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM
u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM
u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM
u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM
u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM
u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM
u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM
u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM
u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM
u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM
u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1026',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0A -> CONTROL
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> CONTROL
u'\x8f' # 0x1B -> CONTROL
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2A -> CONTROL
u'\x8b' # 0x2B -> CONTROL
u'\x8c' # 0x2C -> CONTROL
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3A -> CONTROL
u'\x9b' # 0x3B -> CONTROL
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> CONTROL
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\xa0' # 0x41 -> NO-BREAK SPACE
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
u'{' # 0x48 -> LEFT CURLY BRACKET
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'!' # 0x4F -> EXCLAMATION MARK
u'&' # 0x50 -> AMPERSAND
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE
u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'^' # 0x5F -> CIRCUMFLEX ACCENT
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'[' # 0x68 -> LEFT SQUARE BRACKET
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I
u':' # 0x7A -> COLON
u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'}' # 0x8C -> RIGHT CURLY BRACKET
u'`' # 0x8D -> GRAVE ACCENT
u'\xa6' # 0x8E -> BROKEN BAR
u'\xb1' # 0x8F -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
u'\xb8' # 0x9D -> CEDILLA
u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
u'\xa4' # 0x9F -> CURRENCY SIGN
u'\xb5' # 0xA0 -> MICRO SIGN
u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
u'\xbf' # 0xAB -> INVERTED QUESTION MARK
u']' # 0xAC -> RIGHT SQUARE BRACKET
u'$' # 0xAD -> DOLLAR SIGN
u'@' # 0xAE -> COMMERCIAL AT
u'\xae' # 0xAF -> REGISTERED SIGN
u'\xa2' # 0xB0 -> CENT SIGN
u'\xa3' # 0xB1 -> POUND SIGN
u'\xa5' # 0xB2 -> YEN SIGN
u'\xb7' # 0xB3 -> MIDDLE DOT
u'\xa9' # 0xB4 -> COPYRIGHT SIGN
u'\xa7' # 0xB5 -> SECTION SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
u'\xac' # 0xBA -> NOT SIGN
u'|' # 0xBB -> VERTICAL LINE
u'\xaf' # 0xBC -> MACRON
u'\xa8' # 0xBD -> DIAERESIS
u'\xb4' # 0xBE -> ACUTE ACCENT
u'\xd7' # 0xBF -> MULTIPLICATION SIGN
u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'~' # 0xCC -> TILDE
u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xDA -> SUPERSCRIPT ONE
u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\\' # 0xDC -> REVERSE SOLIDUS
u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xf7' # 0xE1 -> DIVISION SIGN
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'#' # 0xEC -> NUMBER SIGN
u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'"' # 0xFC -> QUOTATION MARK
u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
u'\x9f' # 0xFF -> CONTROL
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1140',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0A -> CONTROL
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> CONTROL
u'\x8f' # 0x1B -> CONTROL
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2A -> CONTROL
u'\x8b' # 0x2B -> CONTROL
u'\x8c' # 0x2C -> CONTROL
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3A -> CONTROL
u'\x9b' # 0x3B -> CONTROL
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> CONTROL
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\xa0' # 0x41 -> NO-BREAK SPACE
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
u'\xa2' # 0x4A -> CENT SIGN
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'|' # 0x4F -> VERTICAL LINE
u'&' # 0x50 -> AMPERSAND
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'!' # 0x5A -> EXCLAMATION MARK
u'$' # 0x5B -> DOLLAR SIGN
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'\xac' # 0x5F -> NOT SIGN
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xa6' # 0x6A -> BROKEN BAR
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7A -> COLON
u'#' # 0x7B -> NUMBER SIGN
u'@' # 0x7C -> COMMERCIAL AT
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'"' # 0x7F -> QUOTATION MARK
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
u'\xb1' # 0x8F -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
u'\xb8' # 0x9D -> CEDILLA
u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
u'\u20ac' # 0x9F -> EURO SIGN
u'\xb5' # 0xA0 -> MICRO SIGN
u'~' # 0xA1 -> TILDE
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
u'\xbf' # 0xAB -> INVERTED QUESTION MARK
u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
u'\xae' # 0xAF -> REGISTERED SIGN
u'^' # 0xB0 -> CIRCUMFLEX ACCENT
u'\xa3' # 0xB1 -> POUND SIGN
u'\xa5' # 0xB2 -> YEN SIGN
u'\xb7' # 0xB3 -> MIDDLE DOT
u'\xa9' # 0xB4 -> COPYRIGHT SIGN
u'\xa7' # 0xB5 -> SECTION SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
u'[' # 0xBA -> LEFT SQUARE BRACKET
u']' # 0xBB -> RIGHT SQUARE BRACKET
u'\xaf' # 0xBC -> MACRON
u'\xa8' # 0xBD -> DIAERESIS
u'\xb4' # 0xBE -> ACUTE ACCENT
u'\xd7' # 0xBF -> MULTIPLICATION SIGN
u'{' # 0xC0 -> LEFT CURLY BRACKET
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
u'}' # 0xD0 -> RIGHT CURLY BRACKET
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xDA -> SUPERSCRIPT ONE
u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\\' # 0xE0 -> REVERSE SOLIDUS
u'\xf7' # 0xE1 -> DIVISION SIGN
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
u'\x9f' # 0xFF -> CONTROL
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1250',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\ufffe' # 0x83 -> UNDEFINED
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\ufffe' # 0x88 -> UNDEFINED
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON
u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\ufffe' # 0x98 -> UNDEFINED
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE
u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON
u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u02c7' # 0xA1 -> CARON
u'\u02d8' # 0xA2 -> BREVE
u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\u02db' # 0xB2 -> OGONEK
u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK
u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON
u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT
u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON
u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON
u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON
u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE
u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON
u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON
u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA
u'\u02d9' # 0xFF -> DOT ABOVE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1251',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE
u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u20ac' # 0x88 -> EURO SIGN
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE
u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE
u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE
u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE
u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\ufffe' # 0x98 -> UNDEFINED
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE
u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE
u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE
u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U
u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U
u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO
u'\u2116' # 0xB9 -> NUMERO SIGN
u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE
u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE
u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE
u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI
u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A
u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE
u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE
u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE
u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE
u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE
u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE
u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE
u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I
u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA
u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL
u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM
u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN
u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O
u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE
u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER
u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES
u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE
u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U
u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF
u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA
u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE
u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE
u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA
u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU
u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E
u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU
u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA
u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A
u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE
u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE
u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE
u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE
u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE
u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE
u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE
u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I
u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I
u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA
u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL
u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM
u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN
u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O
u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE
u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER
u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES
u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE
u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U
u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF
u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA
u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE
u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE
u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA
u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA
u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN
u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU
u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E
u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU
u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1252',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
u'\ufffe' # 0x8D -> UNDEFINED
u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\u02dc' # 0x98 -> SMALL TILDE
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
u'\ufffe' # 0x9D -> UNDEFINED
u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1253',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\ufffe' # 0x88 -> UNDEFINED
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\ufffe' # 0x8A -> UNDEFINED
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x8C -> UNDEFINED
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\ufffe' # 0x98 -> UNDEFINED
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\ufffe' # 0x9A -> UNDEFINED
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x9C -> UNDEFINED
u'\ufffe' # 0x9D -> UNDEFINED
u'\ufffe' # 0x9E -> UNDEFINED
u'\ufffe' # 0x9F -> UNDEFINED
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS
u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\ufffe' # 0xAA -> UNDEFINED
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\u2015' # 0xAF -> HORIZONTAL BAR
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\u0384' # 0xB4 -> GREEK TONOS
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA
u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA
u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA
u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA
u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON
u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA
u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA
u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA
u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA
u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA
u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA
u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU
u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU
u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI
u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON
u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI
u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO
u'\ufffe' # 0xD2 -> UNDEFINED
u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA
u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU
u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON
u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI
u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI
u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI
u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA
u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS
u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA
u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA
u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA
u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA
u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON
u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA
u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA
u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA
u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA
u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA
u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA
u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU
u'\u03bd' # 0xED -> GREEK SMALL LETTER NU
u'\u03be' # 0xEE -> GREEK SMALL LETTER XI
u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON
u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI
u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO
u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA
u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA
u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU
u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON
u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI
u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI
u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI
u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA
u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
u'\ufffe' # 0xFF -> UNDEFINED
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1254',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\u02dc' # 0x98 -> SMALL TILDE
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
u'\ufffe' # 0x9D -> UNDEFINED
u'\ufffe' # 0x9E -> UNDEFINED
u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I
u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1255',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\ufffe' # 0x8A -> UNDEFINED
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x8C -> UNDEFINED
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\u02dc' # 0x98 -> SMALL TILDE
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\ufffe' # 0x9A -> UNDEFINED
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x9C -> UNDEFINED
u'\ufffe' # 0x9D -> UNDEFINED
u'\ufffe' # 0x9E -> UNDEFINED
u'\ufffe' # 0x9F -> UNDEFINED
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xd7' # 0xAA -> MULTIPLICATION SIGN
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xf7' # 0xBA -> DIVISION SIGN
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA
u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL
u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH
u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS
u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ
u'\u05b5' # 0xC5 -> HEBREW POINT TSERE
u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL
u'\u05b7' # 0xC7 -> HEBREW POINT PATAH
u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS
u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM
u'\ufffe' # 0xCA -> UNDEFINED
u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS
u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ
u'\u05bd' # 0xCD -> HEBREW POINT METEG
u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF
u'\u05bf' # 0xCF -> HEBREW POINT RAFE
u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ
u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT
u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT
u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ
u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV
u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD
u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD
u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH
u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM
u'\ufffe' # 0xD9 -> UNDEFINED
u'\ufffe' # 0xDA -> UNDEFINED
u'\ufffe' # 0xDB -> UNDEFINED
u'\ufffe' # 0xDC -> UNDEFINED
u'\ufffe' # 0xDD -> UNDEFINED
u'\ufffe' # 0xDE -> UNDEFINED
u'\ufffe' # 0xDF -> UNDEFINED
u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF
u'\u05d1' # 0xE1 -> HEBREW LETTER BET
u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL
u'\u05d3' # 0xE3 -> HEBREW LETTER DALET
u'\u05d4' # 0xE4 -> HEBREW LETTER HE
u'\u05d5' # 0xE5 -> HEBREW LETTER VAV
u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN
u'\u05d7' # 0xE7 -> HEBREW LETTER HET
u'\u05d8' # 0xE8 -> HEBREW LETTER TET
u'\u05d9' # 0xE9 -> HEBREW LETTER YOD
u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF
u'\u05db' # 0xEB -> HEBREW LETTER KAF
u'\u05dc' # 0xEC -> HEBREW LETTER LAMED
u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM
u'\u05de' # 0xEE -> HEBREW LETTER MEM
u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN
u'\u05e0' # 0xF0 -> HEBREW LETTER NUN
u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH
u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN
u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE
u'\u05e4' # 0xF4 -> HEBREW LETTER PE
u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI
u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI
u'\u05e7' # 0xF7 -> HEBREW LETTER QOF
u'\u05e8' # 0xF8 -> HEBREW LETTER RESH
u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN
u'\u05ea' # 0xFA -> HEBREW LETTER TAV
u'\ufffe' # 0xFB -> UNDEFINED
u'\ufffe' # 0xFC -> UNDEFINED
u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
u'\ufffe' # 0xFF -> UNDEFINED
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1256',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\u067e' # 0x81 -> ARABIC LETTER PEH
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\u0679' # 0x8A -> ARABIC LETTER TTEH
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
u'\u0686' # 0x8D -> ARABIC LETTER TCHEH
u'\u0698' # 0x8E -> ARABIC LETTER JEH
u'\u0688' # 0x8F -> ARABIC LETTER DDAL
u'\u06af' # 0x90 -> ARABIC LETTER GAF
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\u0691' # 0x9A -> ARABIC LETTER RREH
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER
u'\u200d' # 0x9E -> ZERO WIDTH JOINER
u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u060c' # 0xA1 -> ARABIC COMMA
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\u061b' # 0xBA -> ARABIC SEMICOLON
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\u061f' # 0xBF -> ARABIC QUESTION MARK
u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL
u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA
u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
u'\u0627' # 0xC7 -> ARABIC LETTER ALEF
u'\u0628' # 0xC8 -> ARABIC LETTER BEH
u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA
u'\u062a' # 0xCA -> ARABIC LETTER TEH
u'\u062b' # 0xCB -> ARABIC LETTER THEH
u'\u062c' # 0xCC -> ARABIC LETTER JEEM
u'\u062d' # 0xCD -> ARABIC LETTER HAH
u'\u062e' # 0xCE -> ARABIC LETTER KHAH
u'\u062f' # 0xCF -> ARABIC LETTER DAL
u'\u0630' # 0xD0 -> ARABIC LETTER THAL
u'\u0631' # 0xD1 -> ARABIC LETTER REH
u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN
u'\u0633' # 0xD3 -> ARABIC LETTER SEEN
u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN
u'\u0635' # 0xD5 -> ARABIC LETTER SAD
u'\u0636' # 0xD6 -> ARABIC LETTER DAD
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u0637' # 0xD8 -> ARABIC LETTER TAH
u'\u0638' # 0xD9 -> ARABIC LETTER ZAH
u'\u0639' # 0xDA -> ARABIC LETTER AIN
u'\u063a' # 0xDB -> ARABIC LETTER GHAIN
u'\u0640' # 0xDC -> ARABIC TATWEEL
u'\u0641' # 0xDD -> ARABIC LETTER FEH
u'\u0642' # 0xDE -> ARABIC LETTER QAF
u'\u0643' # 0xDF -> ARABIC LETTER KAF
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\u0644' # 0xE1 -> ARABIC LETTER LAM
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\u0645' # 0xE3 -> ARABIC LETTER MEEM
u'\u0646' # 0xE4 -> ARABIC LETTER NOON
u'\u0647' # 0xE5 -> ARABIC LETTER HEH
u'\u0648' # 0xE6 -> ARABIC LETTER WAW
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA
u'\u064a' # 0xED -> ARABIC LETTER YEH
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\u064b' # 0xF0 -> ARABIC FATHATAN
u'\u064c' # 0xF1 -> ARABIC DAMMATAN
u'\u064d' # 0xF2 -> ARABIC KASRATAN
u'\u064e' # 0xF3 -> ARABIC FATHA
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u064f' # 0xF5 -> ARABIC DAMMA
u'\u0650' # 0xF6 -> ARABIC KASRA
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u0651' # 0xF8 -> ARABIC SHADDA
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\u0652' # 0xFA -> ARABIC SUKUN
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1257',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\ufffe' # 0x83 -> UNDEFINED
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\ufffe' # 0x88 -> UNDEFINED
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\ufffe' # 0x8A -> UNDEFINED
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x8C -> UNDEFINED
u'\xa8' # 0x8D -> DIAERESIS
u'\u02c7' # 0x8E -> CARON
u'\xb8' # 0x8F -> CEDILLA
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\ufffe' # 0x98 -> UNDEFINED
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\ufffe' # 0x9A -> UNDEFINED
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\ufffe' # 0x9C -> UNDEFINED
u'\xaf' # 0x9D -> MACRON
u'\u02db' # 0x9E -> OGONEK
u'\ufffe' # 0x9F -> UNDEFINED
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\ufffe' # 0xA1 -> UNDEFINED
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\ufffe' # 0xA5 -> UNDEFINED
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xe6' # 0xBF -> LATIN SMALL LETTER AE
u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON
u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON
u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK
u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK
u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON
u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK
u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE
u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA
u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA
u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON
u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA
u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON
u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK
u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE
u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE
u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON
u'\u02d9' # 0xFF -> DOT ABOVE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp1258',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\u2020' # 0x86 -> DAGGER
u'\u2021' # 0x87 -> DOUBLE DAGGER
u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
u'\u2030' # 0x89 -> PER MILLE SIGN
u'\ufffe' # 0x8A -> UNDEFINED
u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\u02dc' # 0x98 -> SMALL TILDE
u'\u2122' # 0x99 -> TRADE MARK SIGN
u'\ufffe' # 0x9A -> UNDEFINED
u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
u'\ufffe' # 0x9D -> UNDEFINED
u'\ufffe' # 0x9E -> UNDEFINED
u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN
u'\u0303' # 0xDE -> COMBINING TILDE
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\u0323' # 0xF2 -> COMBINING DOT BELOW
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN
u'\u20ab' # 0xFE -> DONG SIGN
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp424',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> SELECT
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> REQUIRED NEW LINE
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> GRAPHIC ESCAPE
u'\x8d' # 0x09 -> SUPERSCRIPT
u'\x8e' # 0x0A -> REPEAT
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION
u'\x85' # 0x15 -> NEW LINE
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> UNIT BACK SPACE
u'\x8f' # 0x1B -> CUSTOMER USE ONE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> DIGIT SELECT
u'\x81' # 0x21 -> START OF SIGNIFICANCE
u'\x82' # 0x22 -> FIELD SEPARATOR
u'\x83' # 0x23 -> WORD UNDERSCORE
u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> SET ATTRIBUTE
u'\x89' # 0x29 -> START FIELD EXTENDED
u'\x8a' # 0x2A -> SET MODE OR SWITCH
u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX
u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> <reserved>
u'\x91' # 0x31 -> <reserved>
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> INDEX RETURN
u'\x94' # 0x34 -> PRESENTATION POSITION
u'\x95' # 0x35 -> TRANSPARENT
u'\x96' # 0x36 -> NUMERIC BACKSPACE
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> SUBSCRIPT
u'\x99' # 0x39 -> INDENT TABULATION
u'\x9a' # 0x3A -> REVERSE FORM FEED
u'\x9b' # 0x3B -> CUSTOMER USE THREE
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> <reserved>
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\u05d0' # 0x41 -> HEBREW LETTER ALEF
u'\u05d1' # 0x42 -> HEBREW LETTER BET
u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL
u'\u05d3' # 0x44 -> HEBREW LETTER DALET
u'\u05d4' # 0x45 -> HEBREW LETTER HE
u'\u05d5' # 0x46 -> HEBREW LETTER VAV
u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN
u'\u05d7' # 0x48 -> HEBREW LETTER HET
u'\u05d8' # 0x49 -> HEBREW LETTER TET
u'\xa2' # 0x4A -> CENT SIGN
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'|' # 0x4F -> VERTICAL LINE
u'&' # 0x50 -> AMPERSAND
u'\u05d9' # 0x51 -> HEBREW LETTER YOD
u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF
u'\u05db' # 0x53 -> HEBREW LETTER KAF
u'\u05dc' # 0x54 -> HEBREW LETTER LAMED
u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM
u'\u05de' # 0x56 -> HEBREW LETTER MEM
u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN
u'\u05e0' # 0x58 -> HEBREW LETTER NUN
u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH
u'!' # 0x5A -> EXCLAMATION MARK
u'$' # 0x5B -> DOLLAR SIGN
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'\xac' # 0x5F -> NOT SIGN
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\u05e2' # 0x62 -> HEBREW LETTER AYIN
u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE
u'\u05e4' # 0x64 -> HEBREW LETTER PE
u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI
u'\u05e6' # 0x66 -> HEBREW LETTER TSADI
u'\u05e7' # 0x67 -> HEBREW LETTER QOF
u'\u05e8' # 0x68 -> HEBREW LETTER RESH
u'\u05e9' # 0x69 -> HEBREW LETTER SHIN
u'\xa6' # 0x6A -> BROKEN BAR
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\ufffe' # 0x70 -> UNDEFINED
u'\u05ea' # 0x71 -> HEBREW LETTER TAV
u'\ufffe' # 0x72 -> UNDEFINED
u'\ufffe' # 0x73 -> UNDEFINED
u'\xa0' # 0x74 -> NO-BREAK SPACE
u'\ufffe' # 0x75 -> UNDEFINED
u'\ufffe' # 0x76 -> UNDEFINED
u'\ufffe' # 0x77 -> UNDEFINED
u'\u2017' # 0x78 -> DOUBLE LOW LINE
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7A -> COLON
u'#' # 0x7B -> NUMBER SIGN
u'@' # 0x7C -> COMMERCIAL AT
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'"' # 0x7F -> QUOTATION MARK
u'\ufffe' # 0x80 -> UNDEFINED
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\ufffe' # 0x8C -> UNDEFINED
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\xb1' # 0x8F -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\ufffe' # 0x9A -> UNDEFINED
u'\ufffe' # 0x9B -> UNDEFINED
u'\ufffe' # 0x9C -> UNDEFINED
u'\xb8' # 0x9D -> CEDILLA
u'\ufffe' # 0x9E -> UNDEFINED
u'\xa4' # 0x9F -> CURRENCY SIGN
u'\xb5' # 0xA0 -> MICRO SIGN
u'~' # 0xA1 -> TILDE
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\ufffe' # 0xAA -> UNDEFINED
u'\ufffe' # 0xAB -> UNDEFINED
u'\ufffe' # 0xAC -> UNDEFINED
u'\ufffe' # 0xAD -> UNDEFINED
u'\ufffe' # 0xAE -> UNDEFINED
u'\xae' # 0xAF -> REGISTERED SIGN
u'^' # 0xB0 -> CIRCUMFLEX ACCENT
u'\xa3' # 0xB1 -> POUND SIGN
u'\xa5' # 0xB2 -> YEN SIGN
u'\xb7' # 0xB3 -> MIDDLE DOT
u'\xa9' # 0xB4 -> COPYRIGHT SIGN
u'\xa7' # 0xB5 -> SECTION SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
u'[' # 0xBA -> LEFT SQUARE BRACKET
u']' # 0xBB -> RIGHT SQUARE BRACKET
u'\xaf' # 0xBC -> MACRON
u'\xa8' # 0xBD -> DIAERESIS
u'\xb4' # 0xBE -> ACUTE ACCENT
u'\xd7' # 0xBF -> MULTIPLICATION SIGN
u'{' # 0xC0 -> LEFT CURLY BRACKET
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\ufffe' # 0xCB -> UNDEFINED
u'\ufffe' # 0xCC -> UNDEFINED
u'\ufffe' # 0xCD -> UNDEFINED
u'\ufffe' # 0xCE -> UNDEFINED
u'\ufffe' # 0xCF -> UNDEFINED
u'}' # 0xD0 -> RIGHT CURLY BRACKET
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xDA -> SUPERSCRIPT ONE
u'\ufffe' # 0xDB -> UNDEFINED
u'\ufffe' # 0xDC -> UNDEFINED
u'\ufffe' # 0xDD -> UNDEFINED
u'\ufffe' # 0xDE -> UNDEFINED
u'\ufffe' # 0xDF -> UNDEFINED
u'\\' # 0xE0 -> REVERSE SOLIDUS
u'\xf7' # 0xE1 -> DIVISION SIGN
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\ufffe' # 0xEB -> UNDEFINED
u'\ufffe' # 0xEC -> UNDEFINED
u'\ufffe' # 0xED -> UNDEFINED
u'\ufffe' # 0xEE -> UNDEFINED
u'\ufffe' # 0xEF -> UNDEFINED
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\ufffe' # 0xFB -> UNDEFINED
u'\ufffe' # 0xFC -> UNDEFINED
u'\ufffe' # 0xFD -> UNDEFINED
u'\ufffe' # 0xFE -> UNDEFINED
u'\x9f' # 0xFF -> EIGHT ONES
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp437',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00a5, # YEN SIGN
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xa2' # 0x009b -> CENT SIGN
u'\xa3' # 0x009c -> POUND SIGN
u'\xa5' # 0x009d -> YEN SIGN
u'\u20a7' # 0x009e -> PESETA SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x009b, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a5: 0x009d, # YEN SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b5: 0x00e6, # MICRO SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x20a7: 0x009e, # PESETA SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2310: 0x00a9, # REVERSED NOT SIGN
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp500',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0A -> CONTROL
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> CONTROL
u'\x8f' # 0x1B -> CONTROL
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2A -> CONTROL
u'\x8b' # 0x2B -> CONTROL
u'\x8c' # 0x2C -> CONTROL
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3A -> CONTROL
u'\x9b' # 0x3B -> CONTROL
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> CONTROL
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\xa0' # 0x41 -> NO-BREAK SPACE
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
u'[' # 0x4A -> LEFT SQUARE BRACKET
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'!' # 0x4F -> EXCLAMATION MARK
u'&' # 0x50 -> AMPERSAND
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
u']' # 0x5A -> RIGHT SQUARE BRACKET
u'$' # 0x5B -> DOLLAR SIGN
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'^' # 0x5F -> CIRCUMFLEX ACCENT
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xa6' # 0x6A -> BROKEN BAR
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7A -> COLON
u'#' # 0x7B -> NUMBER SIGN
u'@' # 0x7C -> COMMERCIAL AT
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'"' # 0x7F -> QUOTATION MARK
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
u'\xb1' # 0x8F -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
u'\xb8' # 0x9D -> CEDILLA
u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
u'\xa4' # 0x9F -> CURRENCY SIGN
u'\xb5' # 0xA0 -> MICRO SIGN
u'~' # 0xA1 -> TILDE
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
u'\xbf' # 0xAB -> INVERTED QUESTION MARK
u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
u'\xae' # 0xAF -> REGISTERED SIGN
u'\xa2' # 0xB0 -> CENT SIGN
u'\xa3' # 0xB1 -> POUND SIGN
u'\xa5' # 0xB2 -> YEN SIGN
u'\xb7' # 0xB3 -> MIDDLE DOT
u'\xa9' # 0xB4 -> COPYRIGHT SIGN
u'\xa7' # 0xB5 -> SECTION SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
u'\xac' # 0xBA -> NOT SIGN
u'|' # 0xBB -> VERTICAL LINE
u'\xaf' # 0xBC -> MACRON
u'\xa8' # 0xBD -> DIAERESIS
u'\xb4' # 0xBE -> ACUTE ACCENT
u'\xd7' # 0xBF -> MULTIPLICATION SIGN
u'{' # 0xC0 -> LEFT CURLY BRACKET
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
u'}' # 0xD0 -> RIGHT CURLY BRACKET
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xDA -> SUPERSCRIPT ONE
u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\\' # 0xE0 -> REVERSE SOLIDUS
u'\xf7' # 0xE1 -> DIVISION SIGN
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
u'\x9f' # 0xFF -> CONTROL
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,309 @@
"""Python Character Mapping Codec cp720 generated on Windows:
Vista 6.0.6002 SP2 Multiprocessor Free with the command:
python Tools/unicode/genwincodec.py 720
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp720',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> CONTROL CHARACTER
u'\x01' # 0x01 -> CONTROL CHARACTER
u'\x02' # 0x02 -> CONTROL CHARACTER
u'\x03' # 0x03 -> CONTROL CHARACTER
u'\x04' # 0x04 -> CONTROL CHARACTER
u'\x05' # 0x05 -> CONTROL CHARACTER
u'\x06' # 0x06 -> CONTROL CHARACTER
u'\x07' # 0x07 -> CONTROL CHARACTER
u'\x08' # 0x08 -> CONTROL CHARACTER
u'\t' # 0x09 -> CONTROL CHARACTER
u'\n' # 0x0A -> CONTROL CHARACTER
u'\x0b' # 0x0B -> CONTROL CHARACTER
u'\x0c' # 0x0C -> CONTROL CHARACTER
u'\r' # 0x0D -> CONTROL CHARACTER
u'\x0e' # 0x0E -> CONTROL CHARACTER
u'\x0f' # 0x0F -> CONTROL CHARACTER
u'\x10' # 0x10 -> CONTROL CHARACTER
u'\x11' # 0x11 -> CONTROL CHARACTER
u'\x12' # 0x12 -> CONTROL CHARACTER
u'\x13' # 0x13 -> CONTROL CHARACTER
u'\x14' # 0x14 -> CONTROL CHARACTER
u'\x15' # 0x15 -> CONTROL CHARACTER
u'\x16' # 0x16 -> CONTROL CHARACTER
u'\x17' # 0x17 -> CONTROL CHARACTER
u'\x18' # 0x18 -> CONTROL CHARACTER
u'\x19' # 0x19 -> CONTROL CHARACTER
u'\x1a' # 0x1A -> CONTROL CHARACTER
u'\x1b' # 0x1B -> CONTROL CHARACTER
u'\x1c' # 0x1C -> CONTROL CHARACTER
u'\x1d' # 0x1D -> CONTROL CHARACTER
u'\x1e' # 0x1E -> CONTROL CHARACTER
u'\x1f' # 0x1F -> CONTROL CHARACTER
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> CONTROL CHARACTER
u'\x80'
u'\x81'
u'\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\x84'
u'\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE
u'\x86'
u'\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\x8d'
u'\x8e'
u'\x8f'
u'\x90'
u'\u0651' # 0x91 -> ARABIC SHADDA
u'\u0652' # 0x92 -> ARABIC SUKUN
u'\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xa4' # 0x94 -> CURRENCY SIGN
u'\u0640' # 0x95 -> ARABIC TATWEEL
u'\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE
u'\u0621' # 0x98 -> ARABIC LETTER HAMZA
u'\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE
u'\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE
u'\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE
u'\xa3' # 0x9C -> POUND SIGN
u'\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW
u'\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE
u'\u0627' # 0x9F -> ARABIC LETTER ALEF
u'\u0628' # 0xA0 -> ARABIC LETTER BEH
u'\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA
u'\u062a' # 0xA2 -> ARABIC LETTER TEH
u'\u062b' # 0xA3 -> ARABIC LETTER THEH
u'\u062c' # 0xA4 -> ARABIC LETTER JEEM
u'\u062d' # 0xA5 -> ARABIC LETTER HAH
u'\u062e' # 0xA6 -> ARABIC LETTER KHAH
u'\u062f' # 0xA7 -> ARABIC LETTER DAL
u'\u0630' # 0xA8 -> ARABIC LETTER THAL
u'\u0631' # 0xA9 -> ARABIC LETTER REH
u'\u0632' # 0xAA -> ARABIC LETTER ZAIN
u'\u0633' # 0xAB -> ARABIC LETTER SEEN
u'\u0634' # 0xAC -> ARABIC LETTER SHEEN
u'\u0635' # 0xAD -> ARABIC LETTER SAD
u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0xB0 -> LIGHT SHADE
u'\u2592' # 0xB1 -> MEDIUM SHADE
u'\u2593' # 0xB2 -> DARK SHADE
u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0xDB -> FULL BLOCK
u'\u2584' # 0xDC -> LOWER HALF BLOCK
u'\u258c' # 0xDD -> LEFT HALF BLOCK
u'\u2590' # 0xDE -> RIGHT HALF BLOCK
u'\u2580' # 0xDF -> UPPER HALF BLOCK
u'\u0636' # 0xE0 -> ARABIC LETTER DAD
u'\u0637' # 0xE1 -> ARABIC LETTER TAH
u'\u0638' # 0xE2 -> ARABIC LETTER ZAH
u'\u0639' # 0xE3 -> ARABIC LETTER AIN
u'\u063a' # 0xE4 -> ARABIC LETTER GHAIN
u'\u0641' # 0xE5 -> ARABIC LETTER FEH
u'\xb5' # 0xE6 -> MICRO SIGN
u'\u0642' # 0xE7 -> ARABIC LETTER QAF
u'\u0643' # 0xE8 -> ARABIC LETTER KAF
u'\u0644' # 0xE9 -> ARABIC LETTER LAM
u'\u0645' # 0xEA -> ARABIC LETTER MEEM
u'\u0646' # 0xEB -> ARABIC LETTER NOON
u'\u0647' # 0xEC -> ARABIC LETTER HEH
u'\u0648' # 0xED -> ARABIC LETTER WAW
u'\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA
u'\u064a' # 0xEF -> ARABIC LETTER YEH
u'\u2261' # 0xF0 -> IDENTICAL TO
u'\u064b' # 0xF1 -> ARABIC FATHATAN
u'\u064c' # 0xF2 -> ARABIC DAMMATAN
u'\u064d' # 0xF3 -> ARABIC KASRATAN
u'\u064e' # 0xF4 -> ARABIC FATHA
u'\u064f' # 0xF5 -> ARABIC DAMMA
u'\u0650' # 0xF6 -> ARABIC KASRA
u'\u2248' # 0xF7 -> ALMOST EQUAL TO
u'\xb0' # 0xF8 -> DEGREE SIGN
u'\u2219' # 0xF9 -> BULLET OPERATOR
u'\xb7' # 0xFA -> MIDDLE DOT
u'\u221a' # 0xFB -> SQUARE ROOT
u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0xFD -> SUPERSCRIPT TWO
u'\u25a0' # 0xFE -> BLACK SQUARE
u'\xa0' # 0xFF -> NO-BREAK SPACE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp737',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA
0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA
0x0086: 0x0397, # GREEK CAPITAL LETTER ETA
0x0087: 0x0398, # GREEK CAPITAL LETTER THETA
0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA
0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x008b: 0x039c, # GREEK CAPITAL LETTER MU
0x008c: 0x039d, # GREEK CAPITAL LETTER NU
0x008d: 0x039e, # GREEK CAPITAL LETTER XI
0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x008f: 0x03a0, # GREEK CAPITAL LETTER PI
0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO
0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU
0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI
0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI
0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI
0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA
0x0099: 0x03b2, # GREEK SMALL LETTER BETA
0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA
0x009b: 0x03b4, # GREEK SMALL LETTER DELTA
0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON
0x009d: 0x03b6, # GREEK SMALL LETTER ZETA
0x009e: 0x03b7, # GREEK SMALL LETTER ETA
0x009f: 0x03b8, # GREEK SMALL LETTER THETA
0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA
0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00a3: 0x03bc, # GREEK SMALL LETTER MU
0x00a4: 0x03bd, # GREEK SMALL LETTER NU
0x00a5: 0x03be, # GREEK SMALL LETTER XI
0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00a7: 0x03c0, # GREEK SMALL LETTER PI
0x00a8: 0x03c1, # GREEK SMALL LETTER RHO
0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00ab: 0x03c4, # GREEK SMALL LETTER TAU
0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00ad: 0x03c6, # GREEK SMALL LETTER PHI
0x00ae: 0x03c7, # GREEK SMALL LETTER CHI
0x00af: 0x03c8, # GREEK SMALL LETTER PSI
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA
u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA
u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA
u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA
u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON
u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA
u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA
u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA
u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA
u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA
u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA
u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU
u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU
u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI
u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON
u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI
u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO
u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA
u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU
u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON
u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI
u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI
u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI
u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA
u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA
u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA
u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA
u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA
u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON
u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA
u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA
u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA
u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA
u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA
u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA
u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU
u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU
u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI
u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON
u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI
u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO
u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA
u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA
u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU
u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON
u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI
u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI
u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA
u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS
u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS
u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS
u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS
u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS
u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS
u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS
u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS
u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS
u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS
u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS
u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS
u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS
u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b7: 0x00fa, # MIDDLE DOT
0x00f7: 0x00f6, # DIVISION SIGN
0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS
0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA
0x0392: 0x0081, # GREEK CAPITAL LETTER BETA
0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA
0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA
0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON
0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA
0x0397: 0x0086, # GREEK CAPITAL LETTER ETA
0x0398: 0x0087, # GREEK CAPITAL LETTER THETA
0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA
0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA
0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA
0x039c: 0x008b, # GREEK CAPITAL LETTER MU
0x039d: 0x008c, # GREEK CAPITAL LETTER NU
0x039e: 0x008d, # GREEK CAPITAL LETTER XI
0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON
0x03a0: 0x008f, # GREEK CAPITAL LETTER PI
0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO
0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA
0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU
0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON
0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI
0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI
0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI
0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA
0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS
0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS
0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS
0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS
0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA
0x03b2: 0x0099, # GREEK SMALL LETTER BETA
0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA
0x03b4: 0x009b, # GREEK SMALL LETTER DELTA
0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON
0x03b6: 0x009d, # GREEK SMALL LETTER ZETA
0x03b7: 0x009e, # GREEK SMALL LETTER ETA
0x03b8: 0x009f, # GREEK SMALL LETTER THETA
0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA
0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA
0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA
0x03bc: 0x00a3, # GREEK SMALL LETTER MU
0x03bd: 0x00a4, # GREEK SMALL LETTER NU
0x03be: 0x00a5, # GREEK SMALL LETTER XI
0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON
0x03c0: 0x00a7, # GREEK SMALL LETTER PI
0x03c1: 0x00a8, # GREEK SMALL LETTER RHO
0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA
0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00ab, # GREEK SMALL LETTER TAU
0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON
0x03c6: 0x00ad, # GREEK SMALL LETTER PHI
0x03c7: 0x00ae, # GREEK SMALL LETTER CHI
0x03c8: 0x00af, # GREEK SMALL LETTER PSI
0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA
0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS
0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS
0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,697 @@
""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp775',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON
0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x0096: 0x00a2, # CENT SIGN
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x00a4, # CURRENCY SIGN
0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00a7: 0x00a6, # BROKEN BAR
0x00a8: 0x00a9, # COPYRIGHT SIGN
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE
u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON
u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA
u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA
u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON
u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA
u'\xa2' # 0x0096 -> CENT SIGN
u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd7' # 0x009e -> MULTIPLICATION SIGN
u'\xa4' # 0x009f -> CURRENCY SIGN
u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON
u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE
u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK
u'\xa6' # 0x00a7 -> BROKEN BAR
u'\xa9' # 0x00a8 -> COPYRIGHT SIGN
u'\xae' # 0x00a9 -> REGISTERED SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON
u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK
u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK
u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK
u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON
u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK
u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON
u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK
u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE
u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK
u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON
u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK
u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON
u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON
u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA
u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA
u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA
u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA
u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA
u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON
u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA
u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK
u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
u'\xb6' # 0x00f4 -> PILCROW SIGN
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a2: 0x0096, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x009f, # CURRENCY SIGN
0x00a6: 0x00a7, # BROKEN BAR
0x00a7: 0x00f5, # SECTION SIGN
0x00a9: 0x00a8, # COPYRIGHT SIGN
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00ae: 0x00a9, # REGISTERED SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b3: 0x00fc, # SUPERSCRIPT THREE
0x00b5: 0x00e6, # MICRO SIGN
0x00b6: 0x00f4, # PILCROW SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00b9: 0x00fb, # SUPERSCRIPT ONE
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0x009e, # MULTIPLICATION SIGN
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON
0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON
0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK
0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK
0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE
0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE
0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON
0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON
0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON
0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON
0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE
0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE
0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK
0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK
0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA
0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA
0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON
0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON
0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK
0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK
0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA
0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA
0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA
0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA
0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE
0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE
0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA
0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA
0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON
0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON
0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA
0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA
0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON
0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON
0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON
0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON
0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK
0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK
0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE
0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON
0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON
0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK
0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK
0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK
0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK
0x2219: 0x00f9, # BULLET OPERATOR
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp850',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2017, # DOUBLE LOW LINE
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd7' # 0x009e -> MULTIPLICATION SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\xae' # 0x00a9 -> REGISTERED SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\xa2' # 0x00bd -> CENT SIGN
u'\xa5' # 0x00be -> YEN SIGN
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0x00cf -> CURRENCY SIGN
u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I
u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\xa6' # 0x00dd -> BROKEN BAR
u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xaf' # 0x00ee -> MACRON
u'\xb4' # 0x00ef -> ACUTE ACCENT
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2017' # 0x00f2 -> DOUBLE LOW LINE
u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
u'\xb6' # 0x00f4 -> PILCROW SIGN
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\xb8' # 0x00f7 -> CEDILLA
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\xa8' # 0x00f9 -> DIAERESIS
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x00bd, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x00cf, # CURRENCY SIGN
0x00a5: 0x00be, # YEN SIGN
0x00a6: 0x00dd, # BROKEN BAR
0x00a7: 0x00f5, # SECTION SIGN
0x00a8: 0x00f9, # DIAERESIS
0x00a9: 0x00b8, # COPYRIGHT SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00ae: 0x00a9, # REGISTERED SIGN
0x00af: 0x00ee, # MACRON
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b3: 0x00fc, # SUPERSCRIPT THREE
0x00b4: 0x00ef, # ACUTE ACCENT
0x00b5: 0x00e6, # MICRO SIGN
0x00b6: 0x00f4, # PILCROW SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00b8: 0x00f7, # CEDILLA
0x00b9: 0x00fb, # SUPERSCRIPT ONE
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0x009e, # MULTIPLICATION SIGN
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x2017: 0x00f2, # DOUBLE LOW LINE
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp852',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON
0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON
0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON
0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON
0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT
0x00f2: 0x02db, # OGONEK
0x00f3: 0x02c7, # CARON
0x00f4: 0x02d8, # BREVE
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x02d9, # DOT ABOVE
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE
u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE
u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON
u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON
u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON
u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON
u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE
u'\xd7' # 0x009e -> MULTIPLICATION SIGN
u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK
u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON
u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON
u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK
u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK
u'\xac' # 0x00aa -> NOT SIGN
u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE
u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON
u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON
u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE
u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0x00cf -> CURRENCY SIGN
u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE
u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON
u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON
u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON
u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA
u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON
u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON
u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON
u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE
u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE
u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA
u'\xb4' # 0x00ef -> ACUTE ACCENT
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT
u'\u02db' # 0x00f2 -> OGONEK
u'\u02c7' # 0x00f3 -> CARON
u'\u02d8' # 0x00f4 -> BREVE
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\xb8' # 0x00f7 -> CEDILLA
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\xa8' # 0x00f9 -> DIAERESIS
u'\u02d9' # 0x00fa -> DOT ABOVE
u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON
u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a4: 0x00cf, # CURRENCY SIGN
0x00a7: 0x00f5, # SECTION SIGN
0x00a8: 0x00f9, # DIAERESIS
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b4: 0x00ef, # ACUTE ACCENT
0x00b8: 0x00f7, # CEDILLA
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0x009e, # MULTIPLICATION SIGN
0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE
0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE
0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK
0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK
0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE
0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE
0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON
0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON
0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON
0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON
0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE
0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE
0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK
0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK
0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON
0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON
0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE
0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE
0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON
0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON
0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE
0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE
0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON
0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON
0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE
0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE
0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON
0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON
0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA
0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA
0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON
0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON
0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA
0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA
0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON
0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON
0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE
0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE
0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE
0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON
0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON
0x02c7: 0x00f3, # CARON
0x02d8: 0x00f4, # BREVE
0x02d9: 0x00fa, # DOT ABOVE
0x02db: 0x00f2, # OGONEK
0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp855',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x0084: 0x0451, # CYRILLIC SMALL LETTER IO
0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE
0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE
0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x008c: 0x0457, # CYRILLIC SMALL LETTER YI
0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x008e: 0x0458, # CYRILLIC SMALL LETTER JE
0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE
0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE
0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE
0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE
0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE
0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE
0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE
0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE
0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE
0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
0x009c: 0x044e, # CYRILLIC SMALL LETTER YU
0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A
0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE
0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE
0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE
0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF
0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x00b7: 0x0438, # CYRILLIC SMALL LETTER I
0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA
0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL
0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM
0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN
0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x00d6: 0x043e, # CYRILLIC SMALL LETTER O
0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O
0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x00de: 0x044f, # CYRILLIC SMALL LETTER YA
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER
0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES
0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE
0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x00e7: 0x0443, # CYRILLIC SMALL LETTER U
0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U
0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE
0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x00ef: 0x2116, # NUMERO SIGN
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x00f7: 0x044d, # CYRILLIC SMALL LETTER E
0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E
0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE
u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE
u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE
u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE
u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO
u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO
u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE
u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE
u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE
u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI
u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI
u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE
u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE
u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE
u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE
u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE
u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE
u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE
u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE
u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE
u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE
u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U
u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U
u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE
u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE
u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU
u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU
u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN
u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN
u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A
u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE
u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE
u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE
u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE
u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE
u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE
u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE
u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE
u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF
u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF
u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE
u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA
u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA
u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I
u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I
u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA
u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0x00cf -> CURRENCY SIGN
u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL
u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL
u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM
u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM
u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN
u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN
u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O
u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O
u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE
u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA
u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER
u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER
u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES
u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES
u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE
u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE
u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U
u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U
u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE
u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE
u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE
u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE
u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN
u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN
u'\u2116' # 0x00ef -> NUMERO SIGN
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU
u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU
u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE
u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE
u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA
u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA
u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E
u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E
u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA
u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA
u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE
u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE
u'\xa7' # 0x00fd -> SECTION SIGN
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a4: 0x00cf, # CURRENCY SIGN
0x00a7: 0x00fd, # SECTION SIGN
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ad: 0x00f0, # SOFT HYPHEN
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO
0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE
0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE
0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE
0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI
0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE
0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE
0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE
0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE
0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE
0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U
0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE
0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A
0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE
0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE
0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE
0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE
0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE
0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE
0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE
0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I
0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I
0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA
0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL
0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM
0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN
0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O
0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE
0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER
0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES
0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE
0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U
0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF
0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA
0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE
0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE
0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA
0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA
0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN
0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU
0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E
0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU
0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA
0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE
0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE
0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE
0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE
0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE
0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE
0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE
0x0438: 0x00b7, # CYRILLIC SMALL LETTER I
0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I
0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA
0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL
0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM
0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN
0x043e: 0x00d6, # CYRILLIC SMALL LETTER O
0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE
0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER
0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES
0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE
0x0443: 0x00e7, # CYRILLIC SMALL LETTER U
0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF
0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA
0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE
0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE
0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA
0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA
0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN
0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU
0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN
0x044d: 0x00f7, # CYRILLIC SMALL LETTER E
0x044e: 0x009c, # CYRILLIC SMALL LETTER YU
0x044f: 0x00de, # CYRILLIC SMALL LETTER YA
0x0451: 0x0084, # CYRILLIC SMALL LETTER IO
0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE
0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE
0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE
0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x0457: 0x008c, # CYRILLIC SMALL LETTER YI
0x0458: 0x008e, # CYRILLIC SMALL LETTER JE
0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE
0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE
0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE
0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE
0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U
0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE
0x2116: 0x00ef, # NUMERO SIGN
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp856',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u05d0' # 0x80 -> HEBREW LETTER ALEF
u'\u05d1' # 0x81 -> HEBREW LETTER BET
u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL
u'\u05d3' # 0x83 -> HEBREW LETTER DALET
u'\u05d4' # 0x84 -> HEBREW LETTER HE
u'\u05d5' # 0x85 -> HEBREW LETTER VAV
u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN
u'\u05d7' # 0x87 -> HEBREW LETTER HET
u'\u05d8' # 0x88 -> HEBREW LETTER TET
u'\u05d9' # 0x89 -> HEBREW LETTER YOD
u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF
u'\u05db' # 0x8B -> HEBREW LETTER KAF
u'\u05dc' # 0x8C -> HEBREW LETTER LAMED
u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM
u'\u05de' # 0x8E -> HEBREW LETTER MEM
u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN
u'\u05e0' # 0x90 -> HEBREW LETTER NUN
u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH
u'\u05e2' # 0x92 -> HEBREW LETTER AYIN
u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE
u'\u05e4' # 0x94 -> HEBREW LETTER PE
u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI
u'\u05e6' # 0x96 -> HEBREW LETTER TSADI
u'\u05e7' # 0x97 -> HEBREW LETTER QOF
u'\u05e8' # 0x98 -> HEBREW LETTER RESH
u'\u05e9' # 0x99 -> HEBREW LETTER SHIN
u'\u05ea' # 0x9A -> HEBREW LETTER TAV
u'\ufffe' # 0x9B -> UNDEFINED
u'\xa3' # 0x9C -> POUND SIGN
u'\ufffe' # 0x9D -> UNDEFINED
u'\xd7' # 0x9E -> MULTIPLICATION SIGN
u'\ufffe' # 0x9F -> UNDEFINED
u'\ufffe' # 0xA0 -> UNDEFINED
u'\ufffe' # 0xA1 -> UNDEFINED
u'\ufffe' # 0xA2 -> UNDEFINED
u'\ufffe' # 0xA3 -> UNDEFINED
u'\ufffe' # 0xA4 -> UNDEFINED
u'\ufffe' # 0xA5 -> UNDEFINED
u'\ufffe' # 0xA6 -> UNDEFINED
u'\ufffe' # 0xA7 -> UNDEFINED
u'\ufffe' # 0xA8 -> UNDEFINED
u'\xae' # 0xA9 -> REGISTERED SIGN
u'\xac' # 0xAA -> NOT SIGN
u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF
u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER
u'\ufffe' # 0xAD -> UNDEFINED
u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0xB0 -> LIGHT SHADE
u'\u2592' # 0xB1 -> MEDIUM SHADE
u'\u2593' # 0xB2 -> DARK SHADE
u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\ufffe' # 0xB5 -> UNDEFINED
u'\ufffe' # 0xB6 -> UNDEFINED
u'\ufffe' # 0xB7 -> UNDEFINED
u'\xa9' # 0xB8 -> COPYRIGHT SIGN
u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\xa2' # 0xBD -> CENT SIGN
u'\xa5' # 0xBE -> YEN SIGN
u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\ufffe' # 0xC6 -> UNDEFINED
u'\ufffe' # 0xC7 -> UNDEFINED
u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0xCF -> CURRENCY SIGN
u'\ufffe' # 0xD0 -> UNDEFINED
u'\ufffe' # 0xD1 -> UNDEFINED
u'\ufffe' # 0xD2 -> UNDEFINED
u'\ufffe' # 0xD3 -> UNDEFINEDS
u'\ufffe' # 0xD4 -> UNDEFINED
u'\ufffe' # 0xD5 -> UNDEFINED
u'\ufffe' # 0xD6 -> UNDEFINEDE
u'\ufffe' # 0xD7 -> UNDEFINED
u'\ufffe' # 0xD8 -> UNDEFINED
u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0xDB -> FULL BLOCK
u'\u2584' # 0xDC -> LOWER HALF BLOCK
u'\xa6' # 0xDD -> BROKEN BAR
u'\ufffe' # 0xDE -> UNDEFINED
u'\u2580' # 0xDF -> UPPER HALF BLOCK
u'\ufffe' # 0xE0 -> UNDEFINED
u'\ufffe' # 0xE1 -> UNDEFINED
u'\ufffe' # 0xE2 -> UNDEFINED
u'\ufffe' # 0xE3 -> UNDEFINED
u'\ufffe' # 0xE4 -> UNDEFINED
u'\ufffe' # 0xE5 -> UNDEFINED
u'\xb5' # 0xE6 -> MICRO SIGN
u'\ufffe' # 0xE7 -> UNDEFINED
u'\ufffe' # 0xE8 -> UNDEFINED
u'\ufffe' # 0xE9 -> UNDEFINED
u'\ufffe' # 0xEA -> UNDEFINED
u'\ufffe' # 0xEB -> UNDEFINED
u'\ufffe' # 0xEC -> UNDEFINED
u'\ufffe' # 0xED -> UNDEFINED
u'\xaf' # 0xEE -> MACRON
u'\xb4' # 0xEF -> ACUTE ACCENT
u'\xad' # 0xF0 -> SOFT HYPHEN
u'\xb1' # 0xF1 -> PLUS-MINUS SIGN
u'\u2017' # 0xF2 -> DOUBLE LOW LINE
u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS
u'\xb6' # 0xF4 -> PILCROW SIGN
u'\xa7' # 0xF5 -> SECTION SIGN
u'\xf7' # 0xF6 -> DIVISION SIGN
u'\xb8' # 0xF7 -> CEDILLA
u'\xb0' # 0xF8 -> DEGREE SIGN
u'\xa8' # 0xF9 -> DIAERESIS
u'\xb7' # 0xFA -> MIDDLE DOT
u'\xb9' # 0xFB -> SUPERSCRIPT ONE
u'\xb3' # 0xFC -> SUPERSCRIPT THREE
u'\xb2' # 0xFD -> SUPERSCRIPT TWO
u'\u25a0' # 0xFE -> BLACK SQUARE
u'\xa0' # 0xFF -> NO-BREAK SPACE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,694 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp857',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00d5: None, # UNDEFINED
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: None, # UNDEFINED
0x00e8: 0x00d7, # MULTIPLICATION SIGN
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: None, # UNDEFINED
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE
u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\xae' # 0x00a9 -> REGISTERED SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\xa2' # 0x00bd -> CENT SIGN
u'\xa5' # 0x00be -> YEN SIGN
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0x00cf -> CURRENCY SIGN
u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR
u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR
u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\ufffe' # 0x00d5 -> UNDEFINED
u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\xa6' # 0x00dd -> BROKEN BAR
u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\ufffe' # 0x00e7 -> UNDEFINED
u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN
u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE
u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xaf' # 0x00ee -> MACRON
u'\xb4' # 0x00ef -> ACUTE ACCENT
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\ufffe' # 0x00f2 -> UNDEFINED
u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
u'\xb6' # 0x00f4 -> PILCROW SIGN
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\xb8' # 0x00f7 -> CEDILLA
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\xa8' # 0x00f9 -> DIAERESIS
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x00bd, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x00cf, # CURRENCY SIGN
0x00a5: 0x00be, # YEN SIGN
0x00a6: 0x00dd, # BROKEN BAR
0x00a7: 0x00f5, # SECTION SIGN
0x00a8: 0x00f9, # DIAERESIS
0x00a9: 0x00b8, # COPYRIGHT SIGN
0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00ae: 0x00a9, # REGISTERED SIGN
0x00af: 0x00ee, # MACRON
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b3: 0x00fc, # SUPERSCRIPT THREE
0x00b4: 0x00ef, # ACUTE ACCENT
0x00b5: 0x00e6, # MICRO SIGN
0x00b6: 0x00f4, # PILCROW SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00b8: 0x00f7, # CEDILLA
0x00b9: 0x00fb, # SUPERSCRIPT ONE
0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0x00e8, # MULTIPLICATION SIGN
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS
0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE
0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE
0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I
0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec for CP858, modified from cp850.
"""
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp858',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x00d7, # MULTIPLICATION SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00ae, # REGISTERED SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00b8: 0x00a9, # COPYRIGHT SIGN
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x00a2, # CENT SIGN
0x00be: 0x00a5, # YEN SIGN
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x00a4, # CURRENCY SIGN
0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00d5: 0x20ac, # EURO SIGN
0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x00a6, # BROKEN BAR
0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00ee: 0x00af, # MACRON
0x00ef: 0x00b4, # ACUTE ACCENT
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2017, # DOUBLE LOW LINE
0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x00b8, # CEDILLA
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x00b9, # SUPERSCRIPT ONE
0x00fc: 0x00b3, # SUPERSCRIPT THREE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd7' # 0x009e -> MULTIPLICATION SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\xae' # 0x00a9 -> REGISTERED SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xa9' # 0x00b8 -> COPYRIGHT SIGN
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\xa2' # 0x00bd -> CENT SIGN
u'\xa5' # 0x00be -> YEN SIGN
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa4' # 0x00cf -> CURRENCY SIGN
u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\u20ac' # 0x00d5 -> EURO SIGN
u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\xa6' # 0x00dd -> BROKEN BAR
u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xaf' # 0x00ee -> MACRON
u'\xb4' # 0x00ef -> ACUTE ACCENT
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2017' # 0x00f2 -> DOUBLE LOW LINE
u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
u'\xb6' # 0x00f4 -> PILCROW SIGN
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\xb8' # 0x00f7 -> CEDILLA
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\xa8' # 0x00f9 -> DIAERESIS
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\xb9' # 0x00fb -> SUPERSCRIPT ONE
u'\xb3' # 0x00fc -> SUPERSCRIPT THREE
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x00bd, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x00cf, # CURRENCY SIGN
0x00a5: 0x00be, # YEN SIGN
0x00a6: 0x00dd, # BROKEN BAR
0x00a7: 0x00f5, # SECTION SIGN
0x00a8: 0x00f9, # DIAERESIS
0x00a9: 0x00b8, # COPYRIGHT SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00ae: 0x00a9, # REGISTERED SIGN
0x00af: 0x00ee, # MACRON
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b3: 0x00fc, # SUPERSCRIPT THREE
0x00b4: 0x00ef, # ACUTE ACCENT
0x00b5: 0x00e6, # MICRO SIGN
0x00b6: 0x00f4, # PILCROW SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00b8: 0x00f7, # CEDILLA
0x00b9: 0x00fb, # SUPERSCRIPT ONE
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0x009e, # MULTIPLICATION SIGN
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
0x20ac: 0x00d5, # EURO SIGN
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x2017: 0x00f2, # DOUBLE LOW LINE
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp860',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xa2' # 0x009b -> CENT SIGN
u'\xa3' # 0x009c -> POUND SIGN
u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
u'\u20a7' # 0x009e -> PESETA SIGN
u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x009b, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b5: 0x00e6, # MICRO SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE
0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE
0x00f7: 0x00f6, # DIVISION SIGN
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x20a7: 0x009e, # PESETA SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp861',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH
0x008c: 0x00f0, # LATIN SMALL LETTER ETH
0x008d: 0x00de, # LATIN CAPITAL LETTER THORN
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00fe, # LATIN SMALL LETTER THORN
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH
u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH
u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\u20a7' # 0x009e -> PESETA SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a3: 0x009c, # POUND SIGN
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b5: 0x00e6, # MICRO SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE
0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH
0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00de: 0x008d, # LATIN CAPITAL LETTER THORN
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00f0: 0x008c, # LATIN SMALL LETTER ETH
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE
0x00fe: 0x0095, # LATIN SMALL LETTER THORN
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x20a7: 0x009e, # PESETA SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2310: 0x00a9, # REVERSED NOT SIGN
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp862',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL
0x0083: 0x05d3, # HEBREW LETTER DALET
0x0084: 0x05d4, # HEBREW LETTER HE
0x0085: 0x05d5, # HEBREW LETTER VAV
0x0086: 0x05d6, # HEBREW LETTER ZAYIN
0x0087: 0x05d7, # HEBREW LETTER HET
0x0088: 0x05d8, # HEBREW LETTER TET
0x0089: 0x05d9, # HEBREW LETTER YOD
0x008a: 0x05da, # HEBREW LETTER FINAL KAF
0x008b: 0x05db, # HEBREW LETTER KAF
0x008c: 0x05dc, # HEBREW LETTER LAMED
0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
0x008e: 0x05de, # HEBREW LETTER MEM
0x008f: 0x05df, # HEBREW LETTER FINAL NUN
0x0090: 0x05e0, # HEBREW LETTER NUN
0x0091: 0x05e1, # HEBREW LETTER SAMEKH
0x0092: 0x05e2, # HEBREW LETTER AYIN
0x0093: 0x05e3, # HEBREW LETTER FINAL PE
0x0094: 0x05e4, # HEBREW LETTER PE
0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
0x0096: 0x05e6, # HEBREW LETTER TSADI
0x0097: 0x05e7, # HEBREW LETTER QOF
0x0098: 0x05e8, # HEBREW LETTER RESH
0x0099: 0x05e9, # HEBREW LETTER SHIN
0x009a: 0x05ea, # HEBREW LETTER TAV
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00a5, # YEN SIGN
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF
u'\u05d1' # 0x0081 -> HEBREW LETTER BET
u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL
u'\u05d3' # 0x0083 -> HEBREW LETTER DALET
u'\u05d4' # 0x0084 -> HEBREW LETTER HE
u'\u05d5' # 0x0085 -> HEBREW LETTER VAV
u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN
u'\u05d7' # 0x0087 -> HEBREW LETTER HET
u'\u05d8' # 0x0088 -> HEBREW LETTER TET
u'\u05d9' # 0x0089 -> HEBREW LETTER YOD
u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF
u'\u05db' # 0x008b -> HEBREW LETTER KAF
u'\u05dc' # 0x008c -> HEBREW LETTER LAMED
u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM
u'\u05de' # 0x008e -> HEBREW LETTER MEM
u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN
u'\u05e0' # 0x0090 -> HEBREW LETTER NUN
u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH
u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN
u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE
u'\u05e4' # 0x0094 -> HEBREW LETTER PE
u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI
u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI
u'\u05e7' # 0x0097 -> HEBREW LETTER QOF
u'\u05e8' # 0x0098 -> HEBREW LETTER RESH
u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN
u'\u05ea' # 0x009a -> HEBREW LETTER TAV
u'\xa2' # 0x009b -> CENT SIGN
u'\xa3' # 0x009c -> POUND SIGN
u'\xa5' # 0x009d -> YEN SIGN
u'\u20a7' # 0x009e -> PESETA SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a2: 0x009b, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a5: 0x009d, # YEN SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b5: 0x00e6, # MICRO SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f7: 0x00f6, # DIVISION SIGN
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x05d0: 0x0080, # HEBREW LETTER ALEF
0x05d1: 0x0081, # HEBREW LETTER BET
0x05d2: 0x0082, # HEBREW LETTER GIMEL
0x05d3: 0x0083, # HEBREW LETTER DALET
0x05d4: 0x0084, # HEBREW LETTER HE
0x05d5: 0x0085, # HEBREW LETTER VAV
0x05d6: 0x0086, # HEBREW LETTER ZAYIN
0x05d7: 0x0087, # HEBREW LETTER HET
0x05d8: 0x0088, # HEBREW LETTER TET
0x05d9: 0x0089, # HEBREW LETTER YOD
0x05da: 0x008a, # HEBREW LETTER FINAL KAF
0x05db: 0x008b, # HEBREW LETTER KAF
0x05dc: 0x008c, # HEBREW LETTER LAMED
0x05dd: 0x008d, # HEBREW LETTER FINAL MEM
0x05de: 0x008e, # HEBREW LETTER MEM
0x05df: 0x008f, # HEBREW LETTER FINAL NUN
0x05e0: 0x0090, # HEBREW LETTER NUN
0x05e1: 0x0091, # HEBREW LETTER SAMEKH
0x05e2: 0x0092, # HEBREW LETTER AYIN
0x05e3: 0x0093, # HEBREW LETTER FINAL PE
0x05e4: 0x0094, # HEBREW LETTER PE
0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI
0x05e6: 0x0096, # HEBREW LETTER TSADI
0x05e7: 0x0097, # HEBREW LETTER QOF
0x05e8: 0x0098, # HEBREW LETTER RESH
0x05e9: 0x0099, # HEBREW LETTER SHIN
0x05ea: 0x009a, # HEBREW LETTER TAV
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x20a7: 0x009e, # PESETA SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2310: 0x00a9, # REVERSED NOT SIGN
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp863',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00b6, # PILCROW SIGN
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x2017, # DOUBLE LOW LINE
0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x008f: 0x00a7, # SECTION SIGN
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00a4, # CURRENCY SIGN
0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00a2, # CENT SIGN
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00a6, # BROKEN BAR
0x00a1: 0x00b4, # ACUTE ACCENT
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00a8, # DIAERESIS
0x00a5: 0x00b8, # CEDILLA
0x00a6: 0x00b3, # SUPERSCRIPT THREE
0x00a7: 0x00af, # MACRON
0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xb6' # 0x0086 -> PILCROW SIGN
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u2017' # 0x008d -> DOUBLE LOW LINE
u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xa7' # 0x008f -> SECTION SIGN
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xa4' # 0x0098 -> CURRENCY SIGN
u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xa2' # 0x009b -> CENT SIGN
u'\xa3' # 0x009c -> POUND SIGN
u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xa6' # 0x00a0 -> BROKEN BAR
u'\xb4' # 0x00a1 -> ACUTE ACCENT
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xa8' # 0x00a4 -> DIAERESIS
u'\xb8' # 0x00a5 -> CEDILLA
u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE
u'\xaf' # 0x00a7 -> MACRON
u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a2: 0x009b, # CENT SIGN
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x0098, # CURRENCY SIGN
0x00a6: 0x00a0, # BROKEN BAR
0x00a7: 0x008f, # SECTION SIGN
0x00a8: 0x00a4, # DIAERESIS
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00af: 0x00a7, # MACRON
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b3: 0x00a6, # SUPERSCRIPT THREE
0x00b4: 0x00a1, # ACUTE ACCENT
0x00b5: 0x00e6, # MICRO SIGN
0x00b6: 0x0086, # PILCROW SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00b8: 0x00a5, # CEDILLA
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS
0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f7: 0x00f6, # DIVISION SIGN
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x2017: 0x008d, # DOUBLE LOW LINE
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2310: 0x00a9, # REVERSED NOT SIGN
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,690 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp864',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0025: 0x066a, # ARABIC PERCENT SIGN
0x0080: 0x00b0, # DEGREE SIGN
0x0081: 0x00b7, # MIDDLE DOT
0x0082: 0x2219, # BULLET OPERATOR
0x0083: 0x221a, # SQUARE ROOT
0x0084: 0x2592, # MEDIUM SHADE
0x0085: 0x2500, # FORMS LIGHT HORIZONTAL
0x0086: 0x2502, # FORMS LIGHT VERTICAL
0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL
0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT
0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL
0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT
0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL
0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT
0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT
0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT
0x008f: 0x2518, # FORMS LIGHT UP AND LEFT
0x0090: 0x03b2, # GREEK SMALL BETA
0x0091: 0x221e, # INFINITY
0x0092: 0x03c6, # GREEK SMALL PHI
0x0093: 0x00b1, # PLUS-OR-MINUS SIGN
0x0094: 0x00bd, # FRACTION 1/2
0x0095: 0x00bc, # FRACTION 1/4
0x0096: 0x2248, # ALMOST EQUAL TO
0x0097: 0x00ab, # LEFT POINTING GUILLEMET
0x0098: 0x00bb, # RIGHT POINTING GUILLEMET
0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
0x009b: None, # UNDEFINED
0x009c: None, # UNDEFINED
0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
0x009f: None, # UNDEFINED
0x00a1: 0x00ad, # SOFT HYPHEN
0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
0x00a6: None, # UNDEFINED
0x00a7: None, # UNDEFINED
0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
0x00ac: 0x060c, # ARABIC COMMA
0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO
0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE
0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO
0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE
0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR
0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE
0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX
0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN
0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT
0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE
0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
0x00bb: 0x061b, # ARABIC SEMICOLON
0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c0: 0x00a2, # CENT SIGN
0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM
0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM
0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
0x00db: 0x00a6, # BROKEN VERTICAL BAR
0x00dc: 0x00ac, # NOT SIGN
0x00dd: 0x00f7, # DIVISION SIGN
0x00de: 0x00d7, # MULTIPLICATION SIGN
0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM
0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
0x00f1: 0x0651, # ARABIC SHADDAH
0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM
0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM
0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM
0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM
0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: None, # UNDEFINED
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xb0' # 0x0080 -> DEGREE SIGN
u'\xb7' # 0x0081 -> MIDDLE DOT
u'\u2219' # 0x0082 -> BULLET OPERATOR
u'\u221a' # 0x0083 -> SQUARE ROOT
u'\u2592' # 0x0084 -> MEDIUM SHADE
u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL
u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL
u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL
u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT
u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT
u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL
u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT
u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT
u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT
u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT
u'\u03b2' # 0x0090 -> GREEK SMALL BETA
u'\u221e' # 0x0091 -> INFINITY
u'\u03c6' # 0x0092 -> GREEK SMALL PHI
u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN
u'\xbd' # 0x0094 -> FRACTION 1/2
u'\xbc' # 0x0095 -> FRACTION 1/4
u'\u2248' # 0x0096 -> ALMOST EQUAL TO
u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET
u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET
u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
u'\ufffe' # 0x009b -> UNDEFINED
u'\ufffe' # 0x009c -> UNDEFINED
u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM
u'\ufffe' # 0x009f -> UNDEFINED
u'\xa0' # 0x00a0 -> NON-BREAKING SPACE
u'\xad' # 0x00a1 -> SOFT HYPHEN
u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
u'\xa3' # 0x00a3 -> POUND SIGN
u'\xa4' # 0x00a4 -> CURRENCY SIGN
u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
u'\ufffe' # 0x00a6 -> UNDEFINED
u'\ufffe' # 0x00a7 -> UNDEFINED
u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM
u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM
u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM
u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM
u'\u060c' # 0x00ac -> ARABIC COMMA
u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM
u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM
u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM
u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO
u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE
u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO
u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE
u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR
u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE
u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX
u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN
u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT
u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE
u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM
u'\u061b' # 0x00bb -> ARABIC SEMICOLON
u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM
u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM
u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM
u'\u061f' # 0x00bf -> ARABIC QUESTION MARK
u'\xa2' # 0x00c0 -> CENT SIGN
u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM
u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM
u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM
u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM
u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM
u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM
u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM
u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM
u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM
u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM
u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM
u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM
u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM
u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM
u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM
u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM
u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM
u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM
u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM
u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM
u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM
u'\xa6' # 0x00db -> BROKEN VERTICAL BAR
u'\xac' # 0x00dc -> NOT SIGN
u'\xf7' # 0x00dd -> DIVISION SIGN
u'\xd7' # 0x00de -> MULTIPLICATION SIGN
u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM
u'\u0640' # 0x00e0 -> ARABIC TATWEEL
u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM
u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM
u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM
u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM
u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM
u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM
u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM
u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM
u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM
u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM
u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM
u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM
u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM
u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM
u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM
u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM
u'\u0651' # 0x00f1 -> ARABIC SHADDAH
u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM
u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM
u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM
u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM
u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM
u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM
u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM
u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM
u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM
u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\ufffe' # 0x00ff -> UNDEFINED
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00a0, # NON-BREAKING SPACE
0x00a2: 0x00c0, # CENT SIGN
0x00a3: 0x00a3, # POUND SIGN
0x00a4: 0x00a4, # CURRENCY SIGN
0x00a6: 0x00db, # BROKEN VERTICAL BAR
0x00ab: 0x0097, # LEFT POINTING GUILLEMET
0x00ac: 0x00dc, # NOT SIGN
0x00ad: 0x00a1, # SOFT HYPHEN
0x00b0: 0x0080, # DEGREE SIGN
0x00b1: 0x0093, # PLUS-OR-MINUS SIGN
0x00b7: 0x0081, # MIDDLE DOT
0x00bb: 0x0098, # RIGHT POINTING GUILLEMET
0x00bc: 0x0095, # FRACTION 1/4
0x00bd: 0x0094, # FRACTION 1/2
0x00d7: 0x00de, # MULTIPLICATION SIGN
0x00f7: 0x00dd, # DIVISION SIGN
0x03b2: 0x0090, # GREEK SMALL BETA
0x03c6: 0x0092, # GREEK SMALL PHI
0x060c: 0x00ac, # ARABIC COMMA
0x061b: 0x00bb, # ARABIC SEMICOLON
0x061f: 0x00bf, # ARABIC QUESTION MARK
0x0640: 0x00e0, # ARABIC TATWEEL
0x0651: 0x00f1, # ARABIC SHADDAH
0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO
0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE
0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO
0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE
0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR
0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE
0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX
0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN
0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT
0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE
0x066a: 0x0025, # ARABIC PERCENT SIGN
0x2219: 0x0082, # BULLET OPERATOR
0x221a: 0x0083, # SQUARE ROOT
0x221e: 0x0091, # INFINITY
0x2248: 0x0096, # ALMOST EQUAL TO
0x2500: 0x0085, # FORMS LIGHT HORIZONTAL
0x2502: 0x0086, # FORMS LIGHT VERTICAL
0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT
0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT
0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT
0x2518: 0x008f, # FORMS LIGHT UP AND LEFT
0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT
0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT
0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL
0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL
0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL
0x2592: 0x0084, # MEDIUM SHADE
0x25a0: 0x00fe, # BLACK SQUARE
0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM
0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM
0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM
0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM
0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM
0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM
0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM
0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM
0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM
0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM
0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM
0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM
0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM
0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM
0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM
0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM
0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM
0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM
0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM
0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM
0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM
0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM
0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM
0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM
0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM
0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM
0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM
0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM
0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM
0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM
0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM
0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM
0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM
0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM
0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM
0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM
0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM
0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM
0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM
0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM
0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM
0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM
0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM
0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM
0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM
0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM
0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM
0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM
0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM
0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM
0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM
0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM
0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM
0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM
0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM
0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM
0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM
0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM
0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp865',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x009e: 0x20a7, # PESETA SIGN
0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00a8: 0x00bf, # INVERTED QUESTION MARK
0x00a9: 0x2310, # REVERSED NOT SIGN
0x00aa: 0x00ac, # NOT SIGN
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00a4, # CURRENCY SIGN
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00e3: 0x03c0, # GREEK SMALL LETTER PI
0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00e6: 0x00b5, # MICRO SIGN
0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
0x00ec: 0x221e, # INFINITY
0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00ef: 0x2229, # INTERSECTION
0x00f0: 0x2261, # IDENTICAL TO
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
0x00f4: 0x2320, # TOP HALF INTEGRAL
0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
0x00f6: 0x00f7, # DIVISION SIGN
0x00f7: 0x2248, # ALMOST EQUAL TO
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
u'\xa3' # 0x009c -> POUND SIGN
u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
u'\u20a7' # 0x009e -> PESETA SIGN
u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK
u'\u2310' # 0x00a9 -> REVERSED NOT SIGN
u'\xac' # 0x00aa -> NOT SIGN
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xa4' # 0x00af -> CURRENCY SIGN
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
u'\xb5' # 0x00e6 -> MICRO SIGN
u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
u'\u221e' # 0x00ec -> INFINITY
u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
u'\u2229' # 0x00ef -> INTERSECTION
u'\u2261' # 0x00f0 -> IDENTICAL TO
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL
u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
u'\xf7' # 0x00f6 -> DIVISION SIGN
u'\u2248' # 0x00f7 -> ALMOST EQUAL TO
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
u'\xb2' # 0x00fd -> SUPERSCRIPT TWO
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
0x00a3: 0x009c, # POUND SIGN
0x00a4: 0x00af, # CURRENCY SIGN
0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00aa, # NOT SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x00fd, # SUPERSCRIPT TWO
0x00b5: 0x00e6, # MICRO SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x00bf: 0x00a8, # INVERTED QUESTION MARK
0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0x00f6, # DIVISION SIGN
0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
0x03c0: 0x00e3, # GREEK SMALL LETTER PI
0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
0x20a7: 0x009e, # PESETA SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x221e: 0x00ec, # INFINITY
0x2229: 0x00ef, # INTERSECTION
0x2248: 0x00f7, # ALMOST EQUAL TO
0x2261: 0x00f0, # IDENTICAL TO
0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
0x2310: 0x00a9, # REVERSED NOT SIGN
0x2320: 0x00f4, # TOP HALF INTEGRAL
0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,698 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp866',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x258c, # LEFT HALF BLOCK
0x00de: 0x2590, # RIGHT HALF BLOCK
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x2219, # BULLET OPERATOR
0x00fa: 0x00b7, # MIDDLE DOT
0x00fb: 0x221a, # SQUARE ROOT
0x00fc: 0x2116, # NUMERO SIGN
0x00fd: 0x00a4, # CURRENCY SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A
u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE
u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE
u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE
u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE
u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE
u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE
u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE
u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I
u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA
u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL
u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM
u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN
u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O
u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE
u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER
u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES
u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE
u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U
u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF
u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA
u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE
u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE
u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA
u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU
u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E
u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU
u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA
u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE
u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE
u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE
u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE
u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE
u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE
u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE
u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I
u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I
u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA
u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL
u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM
u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN
u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O
u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u258c' # 0x00dd -> LEFT HALF BLOCK
u'\u2590' # 0x00de -> RIGHT HALF BLOCK
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER
u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES
u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE
u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U
u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF
u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA
u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE
u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE
u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA
u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA
u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU
u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E
u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU
u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI
u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI
u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\u2219' # 0x00f9 -> BULLET OPERATOR
u'\xb7' # 0x00fa -> MIDDLE DOT
u'\u221a' # 0x00fb -> SQUARE ROOT
u'\u2116' # 0x00fc -> NUMERO SIGN
u'\xa4' # 0x00fd -> CURRENCY SIGN
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a4: 0x00fd, # CURRENCY SIGN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b7: 0x00fa, # MIDDLE DOT
0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI
0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U
0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE
0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE
0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE
0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE
0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE
0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I
0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I
0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA
0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL
0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM
0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN
0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O
0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE
0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER
0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES
0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE
0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U
0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF
0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA
0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE
0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE
0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA
0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA
0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN
0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU
0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E
0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU
0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA
0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE
0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE
0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE
0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE
0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE
0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE
0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE
0x0438: 0x00a8, # CYRILLIC SMALL LETTER I
0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I
0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA
0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL
0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM
0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN
0x043e: 0x00ae, # CYRILLIC SMALL LETTER O
0x043f: 0x00af, # CYRILLIC SMALL LETTER PE
0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER
0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES
0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE
0x0443: 0x00e3, # CYRILLIC SMALL LETTER U
0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF
0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA
0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE
0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE
0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA
0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA
0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN
0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU
0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN
0x044d: 0x00ed, # CYRILLIC SMALL LETTER E
0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI
0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U
0x2116: 0x00fc, # NUMERO SIGN
0x2219: 0x00f9, # BULLET OPERATOR
0x221a: 0x00fb, # SQUARE ROOT
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x258c: 0x00dd, # LEFT HALF BLOCK
0x2590: 0x00de, # RIGHT HALF BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,689 @@
""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp869',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x0080: None, # UNDEFINED
0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED
0x0083: None, # UNDEFINED
0x0084: None, # UNDEFINED
0x0085: None, # UNDEFINED
0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x0087: None, # UNDEFINED
0x0088: 0x00b7, # MIDDLE DOT
0x0089: 0x00ac, # NOT SIGN
0x008a: 0x00a6, # BROKEN BAR
0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK
0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK
0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x008e: 0x2015, # HORIZONTAL BAR
0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x0093: None, # UNDEFINED
0x0094: None, # UNDEFINED
0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x0097: 0x00a9, # COPYRIGHT SIGN
0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x0099: 0x00b2, # SUPERSCRIPT TWO
0x009a: 0x00b3, # SUPERSCRIPT THREE
0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
0x009c: 0x00a3, # POUND SIGN
0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA
0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA
0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON
0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA
0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA
0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA
0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA
0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00b0: 0x2591, # LIGHT SHADE
0x00b1: 0x2592, # MEDIUM SHADE
0x00b2: 0x2593, # DARK SHADE
0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA
0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA
0x00b7: 0x039c, # GREEK CAPITAL LETTER MU
0x00b8: 0x039d, # GREEK CAPITAL LETTER NU
0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
0x00bd: 0x039e, # GREEK CAPITAL LETTER XI
0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI
0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO
0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI
0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI
0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI
0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA
0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA
0x00d7: 0x03b2, # GREEK SMALL LETTER BETA
0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00db: 0x2588, # FULL BLOCK
0x00dc: 0x2584, # LOWER HALF BLOCK
0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA
0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON
0x00df: 0x2580, # UPPER HALF BLOCK
0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA
0x00e1: 0x03b7, # GREEK SMALL LETTER ETA
0x00e2: 0x03b8, # GREEK SMALL LETTER THETA
0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA
0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA
0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA
0x00e6: 0x03bc, # GREEK SMALL LETTER MU
0x00e7: 0x03bd, # GREEK SMALL LETTER NU
0x00e8: 0x03be, # GREEK SMALL LETTER XI
0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON
0x00ea: 0x03c0, # GREEK SMALL LETTER PI
0x00eb: 0x03c1, # GREEK SMALL LETTER RHO
0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA
0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
0x00ee: 0x03c4, # GREEK SMALL LETTER TAU
0x00ef: 0x0384, # GREEK TONOS
0x00f0: 0x00ad, # SOFT HYPHEN
0x00f1: 0x00b1, # PLUS-MINUS SIGN
0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON
0x00f3: 0x03c6, # GREEK SMALL LETTER PHI
0x00f4: 0x03c7, # GREEK SMALL LETTER CHI
0x00f5: 0x00a7, # SECTION SIGN
0x00f6: 0x03c8, # GREEK SMALL LETTER PSI
0x00f7: 0x0385, # GREEK DIALYTIKA TONOS
0x00f8: 0x00b0, # DEGREE SIGN
0x00f9: 0x00a8, # DIAERESIS
0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA
0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
})
### Decoding Table
decoding_table = (
u'\x00' # 0x0000 -> NULL
u'\x01' # 0x0001 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL
u'\x08' # 0x0008 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN
u',' # 0x002c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP
u'/' # 0x002f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE
u':' # 0x003a -> COLON
u';' # 0x003b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE
u'\x7f' # 0x007f -> DELETE
u'\ufffe' # 0x0080 -> UNDEFINED
u'\ufffe' # 0x0081 -> UNDEFINED
u'\ufffe' # 0x0082 -> UNDEFINED
u'\ufffe' # 0x0083 -> UNDEFINED
u'\ufffe' # 0x0084 -> UNDEFINED
u'\ufffe' # 0x0085 -> UNDEFINED
u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
u'\ufffe' # 0x0087 -> UNDEFINED
u'\xb7' # 0x0088 -> MIDDLE DOT
u'\xac' # 0x0089 -> NOT SIGN
u'\xa6' # 0x008a -> BROKEN BAR
u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK
u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS
u'\u2015' # 0x008e -> HORIZONTAL BAR
u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS
u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS
u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
u'\ufffe' # 0x0093 -> UNDEFINED
u'\ufffe' # 0x0094 -> UNDEFINED
u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
u'\xa9' # 0x0097 -> COPYRIGHT SIGN
u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
u'\xb2' # 0x0099 -> SUPERSCRIPT TWO
u'\xb3' # 0x009a -> SUPERSCRIPT THREE
u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS
u'\xa3' # 0x009c -> POUND SIGN
u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS
u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS
u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS
u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS
u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS
u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA
u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA
u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA
u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA
u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON
u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA
u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA
u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA
u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA
u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u2591' # 0x00b0 -> LIGHT SHADE
u'\u2592' # 0x00b1 -> MEDIUM SHADE
u'\u2593' # 0x00b2 -> DARK SHADE
u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA
u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA
u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU
u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU
u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI
u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON
u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI
u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO
u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA
u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU
u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON
u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI
u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI
u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI
u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA
u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA
u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA
u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA
u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2588' # 0x00db -> FULL BLOCK
u'\u2584' # 0x00dc -> LOWER HALF BLOCK
u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA
u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON
u'\u2580' # 0x00df -> UPPER HALF BLOCK
u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA
u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA
u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA
u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA
u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA
u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA
u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU
u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU
u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI
u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON
u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI
u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO
u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA
u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA
u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU
u'\u0384' # 0x00ef -> GREEK TONOS
u'\xad' # 0x00f0 -> SOFT HYPHEN
u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN
u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON
u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI
u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI
u'\xa7' # 0x00f5 -> SECTION SIGN
u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI
u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS
u'\xb0' # 0x00f8 -> DEGREE SIGN
u'\xa8' # 0x00f9 -> DIAERESIS
u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA
u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS
u'\u25a0' # 0x00fe -> BLACK SQUARE
u'\xa0' # 0x00ff -> NO-BREAK SPACE
)
### Encoding Map
encoding_map = {
0x0000: 0x0000, # NULL
0x0001: 0x0001, # START OF HEADING
0x0002: 0x0002, # START OF TEXT
0x0003: 0x0003, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE
0x0007: 0x0007, # BELL
0x0008: 0x0008, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL
0x0019: 0x0019, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN
0x002c: 0x002c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP
0x002f: 0x002f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE
0x003a: 0x003a, # COLON
0x003b: 0x003b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE
0x007f: 0x007f, # DELETE
0x00a0: 0x00ff, # NO-BREAK SPACE
0x00a3: 0x009c, # POUND SIGN
0x00a6: 0x008a, # BROKEN BAR
0x00a7: 0x00f5, # SECTION SIGN
0x00a8: 0x00f9, # DIAERESIS
0x00a9: 0x0097, # COPYRIGHT SIGN
0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x0089, # NOT SIGN
0x00ad: 0x00f0, # SOFT HYPHEN
0x00b0: 0x00f8, # DEGREE SIGN
0x00b1: 0x00f1, # PLUS-MINUS SIGN
0x00b2: 0x0099, # SUPERSCRIPT TWO
0x00b3: 0x009a, # SUPERSCRIPT THREE
0x00b7: 0x0088, # MIDDLE DOT
0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
0x0384: 0x00ef, # GREEK TONOS
0x0385: 0x00f7, # GREEK DIALYTIKA TONOS
0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS
0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS
0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS
0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS
0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS
0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS
0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS
0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA
0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA
0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA
0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA
0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON
0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA
0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA
0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA
0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA
0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA
0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA
0x039c: 0x00b7, # GREEK CAPITAL LETTER MU
0x039d: 0x00b8, # GREEK CAPITAL LETTER NU
0x039e: 0x00bd, # GREEK CAPITAL LETTER XI
0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON
0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI
0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO
0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA
0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU
0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON
0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI
0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI
0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI
0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA
0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS
0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS
0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS
0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS
0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA
0x03b2: 0x00d7, # GREEK SMALL LETTER BETA
0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA
0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA
0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON
0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA
0x03b7: 0x00e1, # GREEK SMALL LETTER ETA
0x03b8: 0x00e2, # GREEK SMALL LETTER THETA
0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA
0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA
0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA
0x03bc: 0x00e6, # GREEK SMALL LETTER MU
0x03bd: 0x00e7, # GREEK SMALL LETTER NU
0x03be: 0x00e8, # GREEK SMALL LETTER XI
0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON
0x03c0: 0x00ea, # GREEK SMALL LETTER PI
0x03c1: 0x00eb, # GREEK SMALL LETTER RHO
0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA
0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA
0x03c4: 0x00ee, # GREEK SMALL LETTER TAU
0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON
0x03c6: 0x00f3, # GREEK SMALL LETTER PHI
0x03c7: 0x00f4, # GREEK SMALL LETTER CHI
0x03c8: 0x00f6, # GREEK SMALL LETTER PSI
0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA
0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS
0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS
0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS
0x2015: 0x008e, # HORIZONTAL BAR
0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK
0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK
0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x00df, # UPPER HALF BLOCK
0x2584: 0x00dc, # LOWER HALF BLOCK
0x2588: 0x00db, # FULL BLOCK
0x2591: 0x00b0, # LIGHT SHADE
0x2592: 0x00b1, # MEDIUM SHADE
0x2593: 0x00b2, # DARK SHADE
0x25a0: 0x00fe, # BLACK SQUARE
}

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp874',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\u20ac' # 0x80 -> EURO SIGN
u'\ufffe' # 0x81 -> UNDEFINED
u'\ufffe' # 0x82 -> UNDEFINED
u'\ufffe' # 0x83 -> UNDEFINED
u'\ufffe' # 0x84 -> UNDEFINED
u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
u'\ufffe' # 0x86 -> UNDEFINED
u'\ufffe' # 0x87 -> UNDEFINED
u'\ufffe' # 0x88 -> UNDEFINED
u'\ufffe' # 0x89 -> UNDEFINED
u'\ufffe' # 0x8A -> UNDEFINED
u'\ufffe' # 0x8B -> UNDEFINED
u'\ufffe' # 0x8C -> UNDEFINED
u'\ufffe' # 0x8D -> UNDEFINED
u'\ufffe' # 0x8E -> UNDEFINED
u'\ufffe' # 0x8F -> UNDEFINED
u'\ufffe' # 0x90 -> UNDEFINED
u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
u'\u2022' # 0x95 -> BULLET
u'\u2013' # 0x96 -> EN DASH
u'\u2014' # 0x97 -> EM DASH
u'\ufffe' # 0x98 -> UNDEFINED
u'\ufffe' # 0x99 -> UNDEFINED
u'\ufffe' # 0x9A -> UNDEFINED
u'\ufffe' # 0x9B -> UNDEFINED
u'\ufffe' # 0x9C -> UNDEFINED
u'\ufffe' # 0x9D -> UNDEFINED
u'\ufffe' # 0x9E -> UNDEFINED
u'\ufffe' # 0x9F -> UNDEFINED
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI
u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI
u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT
u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI
u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON
u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG
u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU
u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN
u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING
u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG
u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO
u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE
u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING
u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA
u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK
u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN
u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO
u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO
u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN
u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK
u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO
u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG
u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN
u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG
u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU
u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI
u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA
u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG
u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA
u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN
u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN
u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO
u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA
u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK
u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA
u'\u0e24' # 0xC4 -> THAI CHARACTER RU
u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING
u'\u0e26' # 0xC6 -> THAI CHARACTER LU
u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN
u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA
u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI
u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA
u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP
u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA
u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG
u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK
u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI
u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A
u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT
u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA
u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM
u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I
u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II
u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE
u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE
u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U
u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU
u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU
u'\ufffe' # 0xDB -> UNDEFINED
u'\ufffe' # 0xDC -> UNDEFINED
u'\ufffe' # 0xDD -> UNDEFINED
u'\ufffe' # 0xDE -> UNDEFINED
u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT
u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E
u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE
u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O
u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN
u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI
u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO
u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK
u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU
u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK
u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO
u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI
u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA
u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT
u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT
u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN
u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN
u'\u0e50' # 0xF0 -> THAI DIGIT ZERO
u'\u0e51' # 0xF1 -> THAI DIGIT ONE
u'\u0e52' # 0xF2 -> THAI DIGIT TWO
u'\u0e53' # 0xF3 -> THAI DIGIT THREE
u'\u0e54' # 0xF4 -> THAI DIGIT FOUR
u'\u0e55' # 0xF5 -> THAI DIGIT FIVE
u'\u0e56' # 0xF6 -> THAI DIGIT SIX
u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN
u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT
u'\u0e59' # 0xF9 -> THAI DIGIT NINE
u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU
u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT
u'\ufffe' # 0xFC -> UNDEFINED
u'\ufffe' # 0xFD -> UNDEFINED
u'\ufffe' # 0xFE -> UNDEFINED
u'\ufffe' # 0xFF -> UNDEFINED
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='cp875',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0A -> CONTROL
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1A -> CONTROL
u'\x8f' # 0x1B -> CONTROL
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2A -> CONTROL
u'\x8b' # 0x2B -> CONTROL
u'\x8c' # 0x2C -> CONTROL
u'\x05' # 0x2D -> ENQUIRY
u'\x06' # 0x2E -> ACKNOWLEDGE
u'\x07' # 0x2F -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3A -> CONTROL
u'\x9b' # 0x3B -> CONTROL
u'\x14' # 0x3C -> DEVICE CONTROL FOUR
u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3E -> CONTROL
u'\x1a' # 0x3F -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA
u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA
u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA
u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA
u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON
u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA
u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA
u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA
u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA
u'[' # 0x4A -> LEFT SQUARE BRACKET
u'.' # 0x4B -> FULL STOP
u'<' # 0x4C -> LESS-THAN SIGN
u'(' # 0x4D -> LEFT PARENTHESIS
u'+' # 0x4E -> PLUS SIGN
u'!' # 0x4F -> EXCLAMATION MARK
u'&' # 0x50 -> AMPERSAND
u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA
u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA
u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU
u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU
u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI
u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON
u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI
u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO
u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA
u']' # 0x5A -> RIGHT SQUARE BRACKET
u'$' # 0x5B -> DOLLAR SIGN
u'*' # 0x5C -> ASTERISK
u')' # 0x5D -> RIGHT PARENTHESIS
u';' # 0x5E -> SEMICOLON
u'^' # 0x5F -> CIRCUMFLEX ACCENT
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU
u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON
u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI
u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI
u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI
u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA
u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
u'|' # 0x6A -> VERTICAL LINE
u',' # 0x6B -> COMMA
u'%' # 0x6C -> PERCENT SIGN
u'_' # 0x6D -> LOW LINE
u'>' # 0x6E -> GREATER-THAN SIGN
u'?' # 0x6F -> QUESTION MARK
u'\xa8' # 0x70 -> DIAERESIS
u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS
u'\xa0' # 0x74 -> NO-BREAK SPACE
u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS
u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7A -> COLON
u'#' # 0x7B -> NUMBER SIGN
u'@' # 0x7C -> COMMERCIAL AT
u"'" # 0x7D -> APOSTROPHE
u'=' # 0x7E -> EQUALS SIGN
u'"' # 0x7F -> QUOTATION MARK
u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA
u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA
u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA
u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA
u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON
u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA
u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA
u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA
u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA
u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA
u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU
u'\xb4' # 0xA0 -> ACUTE ACCENT
u'~' # 0xA1 -> TILDE
u's' # 0xA2 -> LATIN SMALL LETTER S
u't' # 0xA3 -> LATIN SMALL LETTER T
u'u' # 0xA4 -> LATIN SMALL LETTER U
u'v' # 0xA5 -> LATIN SMALL LETTER V
u'w' # 0xA6 -> LATIN SMALL LETTER W
u'x' # 0xA7 -> LATIN SMALL LETTER X
u'y' # 0xA8 -> LATIN SMALL LETTER Y
u'z' # 0xA9 -> LATIN SMALL LETTER Z
u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU
u'\u03be' # 0xAB -> GREEK SMALL LETTER XI
u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON
u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI
u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO
u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA
u'\xa3' # 0xB0 -> POUND SIGN
u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS
u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS
u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS
u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS
u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS
u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS
u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS
u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA
u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU
u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON
u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI
u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI
u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI
u'{' # 0xC0 -> LEFT CURLY BRACKET
u'A' # 0xC1 -> LATIN CAPITAL LETTER A
u'B' # 0xC2 -> LATIN CAPITAL LETTER B
u'C' # 0xC3 -> LATIN CAPITAL LETTER C
u'D' # 0xC4 -> LATIN CAPITAL LETTER D
u'E' # 0xC5 -> LATIN CAPITAL LETTER E
u'F' # 0xC6 -> LATIN CAPITAL LETTER F
u'G' # 0xC7 -> LATIN CAPITAL LETTER G
u'H' # 0xC8 -> LATIN CAPITAL LETTER H
u'I' # 0xC9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xCA -> SOFT HYPHEN
u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA
u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK
u'\u2015' # 0xCF -> HORIZONTAL BAR
u'}' # 0xD0 -> RIGHT CURLY BRACKET
u'J' # 0xD1 -> LATIN CAPITAL LETTER J
u'K' # 0xD2 -> LATIN CAPITAL LETTER K
u'L' # 0xD3 -> LATIN CAPITAL LETTER L
u'M' # 0xD4 -> LATIN CAPITAL LETTER M
u'N' # 0xD5 -> LATIN CAPITAL LETTER N
u'O' # 0xD6 -> LATIN CAPITAL LETTER O
u'P' # 0xD7 -> LATIN CAPITAL LETTER P
u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
u'R' # 0xD9 -> LATIN CAPITAL LETTER R
u'\xb1' # 0xDA -> PLUS-MINUS SIGN
u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF
u'\x1a' # 0xDC -> SUBSTITUTE
u'\u0387' # 0xDD -> GREEK ANO TELEIA
u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK
u'\xa6' # 0xDF -> BROKEN BAR
u'\\' # 0xE0 -> REVERSE SOLIDUS
u'\x1a' # 0xE1 -> SUBSTITUTE
u'S' # 0xE2 -> LATIN CAPITAL LETTER S
u'T' # 0xE3 -> LATIN CAPITAL LETTER T
u'U' # 0xE4 -> LATIN CAPITAL LETTER U
u'V' # 0xE5 -> LATIN CAPITAL LETTER V
u'W' # 0xE6 -> LATIN CAPITAL LETTER W
u'X' # 0xE7 -> LATIN CAPITAL LETTER X
u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xEA -> SUPERSCRIPT TWO
u'\xa7' # 0xEB -> SECTION SIGN
u'\x1a' # 0xEC -> SUBSTITUTE
u'\x1a' # 0xED -> SUBSTITUTE
u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xEF -> NOT SIGN
u'0' # 0xF0 -> DIGIT ZERO
u'1' # 0xF1 -> DIGIT ONE
u'2' # 0xF2 -> DIGIT TWO
u'3' # 0xF3 -> DIGIT THREE
u'4' # 0xF4 -> DIGIT FOUR
u'5' # 0xF5 -> DIGIT FIVE
u'6' # 0xF6 -> DIGIT SIX
u'7' # 0xF7 -> DIGIT SEVEN
u'8' # 0xF8 -> DIGIT EIGHT
u'9' # 0xF9 -> DIGIT NINE
u'\xb3' # 0xFA -> SUPERSCRIPT THREE
u'\xa9' # 0xFB -> COPYRIGHT SIGN
u'\x1a' # 0xFC -> SUBSTITUTE
u'\x1a' # 0xFD -> SUBSTITUTE
u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\x9f' # 0xFF -> CONTROL
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,39 @@
#
# cp932.py: Python Unicode Codec for CP932
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_jp, codecs
import _multibytecodec as mbc
codec = _codecs_jp.getcodec('cp932')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='cp932',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# cp949.py: Python Unicode Codec for CP949
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_kr, codecs
import _multibytecodec as mbc
codec = _codecs_kr.getcodec('cp949')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='cp949',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# cp950.py: Python Unicode Codec for CP950
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_tw, codecs
import _multibytecodec as mbc
codec = _codecs_tw.getcodec('cp950')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='cp950',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_jp, codecs
import _multibytecodec as mbc
codec = _codecs_jp.getcodec('euc_jis_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='euc_jis_2004',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_jp, codecs
import _multibytecodec as mbc
codec = _codecs_jp.getcodec('euc_jisx0213')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='euc_jisx0213',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# euc_jp.py: Python Unicode Codec for EUC_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_jp, codecs
import _multibytecodec as mbc
codec = _codecs_jp.getcodec('euc_jp')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='euc_jp',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# euc_kr.py: Python Unicode Codec for EUC_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_kr, codecs
import _multibytecodec as mbc
codec = _codecs_kr.getcodec('euc_kr')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='euc_kr',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# gb18030.py: Python Unicode Codec for GB18030
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_cn, codecs
import _multibytecodec as mbc
codec = _codecs_cn.getcodec('gb18030')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='gb18030',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# gb2312.py: Python Unicode Codec for GB2312
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_cn, codecs
import _multibytecodec as mbc
codec = _codecs_cn.getcodec('gb2312')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='gb2312',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# gbk.py: Python Unicode Codec for GBK
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_cn, codecs
import _multibytecodec as mbc
codec = _codecs_cn.getcodec('gbk')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='gbk',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,79 @@
""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding
Unlike most of the other codecs which target Unicode, this codec
will return Python string objects for both encode and decode.
Written by Marc-Andre Lemburg (mal@lemburg.com).
"""
import codecs, binascii
### Codec APIs
def hex_encode(input,errors='strict'):
""" Encodes the object input and returns a tuple (output
object, length consumed).
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.b2a_hex(input)
return (output, len(input))
def hex_decode(input,errors='strict'):
""" Decodes the object input and returns a tuple (output
object, length consumed).
input must be an object which provides the bf_getreadbuf
buffer slot. Python strings, buffer objects and memory
mapped files are examples of objects providing this slot.
errors defines the error handling to apply. It defaults to
'strict' handling which is the only currently supported
error handling for this codec.
"""
assert errors == 'strict'
output = binascii.a2b_hex(input)
return (output, len(input))
class Codec(codecs.Codec):
def encode(self, input,errors='strict'):
return hex_encode(input,errors)
def decode(self, input,errors='strict'):
return hex_decode(input,errors)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
assert self.errors == 'strict'
return binascii.b2a_hex(input)
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
assert self.errors == 'strict'
return binascii.a2b_hex(input)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='hex',
encode=hex_encode,
decode=hex_decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,152 @@
""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py.
Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen)
Original source: LaserJet IIP Printer User's Manual HP part no
33471-90901, Hewlet-Packard, June 1989.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_map)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='hp-roman8',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00a8: 0x00b4, # ACUTE ACCENT
0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone)
0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
0x00ab: 0x00a8, # DIAERESIS
0x00ac: 0x02dc, # SMALL TILDE
0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00af: 0x20a4, # LIRA SIGN
0x00b0: 0x00af, # MACRON
0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
0x00b3: 0x00b0, # DEGREE SIGN
0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK
0x00b9: 0x00bf, # INVERTED QUESTION MARK
0x00ba: 0x00a4, # CURRENCY SIGN
0x00bb: 0x00a3, # POUND SIGN
0x00bc: 0x00a5, # YEN SIGN
0x00bd: 0x00a7, # SECTION SIGN
0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK
0x00bf: 0x00a2, # CENT SIGN
0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE
0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
0x00d7: 0x00e6, # LATIN SMALL LETTER AE
0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German)
0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic)
0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic)
0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic)
0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic)
0x00f2: 0x00b7, # MIDDLE DOT
0x00f3: 0x00b5, # MICRO SIGN
0x00f4: 0x00b6, # PILCROW SIGN
0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS
0x00f6: 0x2014, # EM DASH
0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER
0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF
0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR
0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR
0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00fc: 0x25a0, # BLACK SQUARE
0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00fe: 0x00b1, # PLUS-MINUS SIGN
0x00ff: None,
})
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)

View File

@ -0,0 +1,39 @@
#
# hz.py: Python Unicode Codec for HZ
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_cn, codecs
import _multibytecodec as mbc
codec = _codecs_cn.getcodec('hz')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='hz',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,288 @@
# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
import stringprep, re, codecs
from unicodedata import ucd_3_2_0 as unicodedata
# IDNA section 3.1
dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
# IDNA section 5
ace_prefix = "xn--"
uace_prefix = unicode(ace_prefix, "ascii")
# This assumes query strings, so AllowUnassigned is true
def nameprep(label):
# Map
newlabel = []
for c in label:
if stringprep.in_table_b1(c):
# Map to nothing
continue
newlabel.append(stringprep.map_table_b2(c))
label = u"".join(newlabel)
# Normalize
label = unicodedata.normalize("NFKC", label)
# Prohibit
for c in label:
if stringprep.in_table_c12(c) or \
stringprep.in_table_c22(c) or \
stringprep.in_table_c3(c) or \
stringprep.in_table_c4(c) or \
stringprep.in_table_c5(c) or \
stringprep.in_table_c6(c) or \
stringprep.in_table_c7(c) or \
stringprep.in_table_c8(c) or \
stringprep.in_table_c9(c):
raise UnicodeError("Invalid character %r" % c)
# Check bidi
RandAL = map(stringprep.in_table_d1, label)
for c in RandAL:
if c:
# There is a RandAL char in the string. Must perform further
# tests:
# 1) The characters in section 5.8 MUST be prohibited.
# This is table C.8, which was already checked
# 2) If a string contains any RandALCat character, the string
# MUST NOT contain any LCat character.
if filter(stringprep.in_table_d2, label):
raise UnicodeError("Violation of BIDI requirement 2")
# 3) If a string contains any RandALCat character, a
# RandALCat character MUST be the first character of the
# string, and a RandALCat character MUST be the last
# character of the string.
if not RandAL[0] or not RandAL[-1]:
raise UnicodeError("Violation of BIDI requirement 3")
return label
def ToASCII(label):
try:
# Step 1: try ASCII
label = label.encode("ascii")
except UnicodeError:
pass
else:
# Skip to step 3: UseSTD3ASCIIRules is false, so
# Skip to step 8.
if 0 < len(label) < 64:
return label
raise UnicodeError("label empty or too long")
# Step 2: nameprep
label = nameprep(label)
# Step 3: UseSTD3ASCIIRules is false
# Step 4: try ASCII
try:
label = label.encode("ascii")
except UnicodeError:
pass
else:
# Skip to step 8.
if 0 < len(label) < 64:
return label
raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix
if label.startswith(uace_prefix):
raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE
label = label.encode("punycode")
# Step 7: Prepend ACE prefix
label = ace_prefix + label
# Step 8: Check size
if 0 < len(label) < 64:
return label
raise UnicodeError("label empty or too long")
def ToUnicode(label):
# Step 1: Check for ASCII
if isinstance(label, str):
pure_ascii = True
else:
try:
label = label.encode("ascii")
pure_ascii = True
except UnicodeError:
pure_ascii = False
if not pure_ascii:
# Step 2: Perform nameprep
label = nameprep(label)
# It doesn't say this, but apparently, it should be ASCII now
try:
label = label.encode("ascii")
except UnicodeError:
raise UnicodeError("Invalid character in IDN label")
# Step 3: Check for ACE prefix
if not label.startswith(ace_prefix):
return unicode(label, "ascii")
# Step 4: Remove ACE prefix
label1 = label[len(ace_prefix):]
# Step 5: Decode using PUNYCODE
result = label1.decode("punycode")
# Step 6: Apply ToASCII
label2 = ToASCII(result)
# Step 7: Compare the result of step 6 with the one of step 3
# label2 will already be in lower case.
if label.lower() != label2:
raise UnicodeError("IDNA does not round-trip", label, label2)
# Step 8: return the result of step 5
return result
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
if errors != 'strict':
# IDNA is quite clear that implementations must be strict
raise UnicodeError("unsupported error handling "+errors)
if not input:
return "", 0
result = []
labels = dots.split(input)
if labels and len(labels[-1])==0:
trailing_dot = '.'
del labels[-1]
else:
trailing_dot = ''
for label in labels:
result.append(ToASCII(label))
# Join with U+002E
return ".".join(result)+trailing_dot, len(input)
def decode(self,input,errors='strict'):
if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors)
if not input:
return u"", 0
# IDNA allows decoding to operate on Unicode strings, too.
if isinstance(input, unicode):
labels = dots.split(input)
else:
# Must be ASCII string
input = str(input)
unicode(input, "ascii")
labels = input.split(".")
if labels and len(labels[-1]) == 0:
trailing_dot = u'.'
del labels[-1]
else:
trailing_dot = u''
result = []
for label in labels:
result.append(ToUnicode(label))
return u".".join(result)+trailing_dot, len(input)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, input, errors, final):
if errors != 'strict':
# IDNA is quite clear that implementations must be strict
raise UnicodeError("unsupported error handling "+errors)
if not input:
return ("", 0)
labels = dots.split(input)
trailing_dot = u''
if labels:
if not labels[-1]:
trailing_dot = '.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = '.'
result = []
size = 0
for label in labels:
result.append(ToASCII(label))
if size:
size += 1
size += len(label)
# Join with U+002E
result = ".".join(result) + trailing_dot
size += len(trailing_dot)
return (result, size)
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, input, errors, final):
if errors != 'strict':
raise UnicodeError("Unsupported error handling "+errors)
if not input:
return (u"", 0)
# IDNA allows decoding to operate on Unicode strings, too.
if isinstance(input, unicode):
labels = dots.split(input)
else:
# Must be ASCII string
input = str(input)
unicode(input, "ascii")
labels = input.split(".")
trailing_dot = u''
if labels:
if not labels[-1]:
trailing_dot = u'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = u'.'
result = []
size = 0
for label in labels:
result.append(ToUnicode(label))
if size:
size += 1
size += len(label)
result = u".".join(result) + trailing_dot
size += len(trailing_dot)
return (result, size)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='idna',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp.py: Python Unicode Codec for ISO2022_JP
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp_1')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp_1',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp_2')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp_2',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp_2004',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp_3')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp_3',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_jp_ext',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,39 @@
#
# iso2022_kr.py: Python Unicode Codec for ISO2022_KR
#
# Written by Hye-Shik Chang <perky@FreeBSD.org>
#
import _codecs_iso2022, codecs
import _multibytecodec as mbc
codec = _codecs_iso2022.getcodec('iso2022_kr')
class Codec(codecs.Codec):
encode = codec.encode
decode = codec.decode
class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
codecs.IncrementalEncoder):
codec = codec
class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
codecs.IncrementalDecoder):
codec = codec
class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
codec = codec
class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
codec = codec
def getregentry():
return codecs.CodecInfo(
name='iso2022_kr',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-1',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German)
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_10 generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-10',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON
u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA
u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON
u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA
u'\xa7' # 0xA7 -> SECTION SIGN
u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA
u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON
u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE
u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON
u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK
u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON
u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA
u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON
u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE
u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA
u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE
u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON
u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE
u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON
u'\u2015' # 0xBD -> HORIZONTAL BAR
u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON
u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG
u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German)
u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_11 generated from 'MAPPINGS/ISO8859/8859-11.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-11',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI
u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI
u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT
u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI
u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON
u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG
u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU
u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN
u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING
u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG
u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO
u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE
u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING
u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA
u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK
u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN
u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO
u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO
u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN
u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK
u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO
u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG
u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN
u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG
u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU
u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI
u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA
u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG
u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA
u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN
u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN
u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO
u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA
u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK
u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA
u'\u0e24' # 0xC4 -> THAI CHARACTER RU
u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING
u'\u0e26' # 0xC6 -> THAI CHARACTER LU
u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN
u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA
u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI
u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA
u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP
u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA
u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG
u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK
u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI
u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A
u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT
u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA
u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM
u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I
u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II
u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE
u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE
u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U
u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU
u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT
u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E
u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE
u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O
u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN
u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI
u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO
u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK
u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU
u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK
u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO
u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI
u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA
u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT
u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT
u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN
u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN
u'\u0e50' # 0xF0 -> THAI DIGIT ZERO
u'\u0e51' # 0xF1 -> THAI DIGIT ONE
u'\u0e52' # 0xF2 -> THAI DIGIT TWO
u'\u0e53' # 0xF3 -> THAI DIGIT THREE
u'\u0e54' # 0xF4 -> THAI DIGIT FOUR
u'\u0e55' # 0xF5 -> THAI DIGIT FIVE
u'\u0e56' # 0xF6 -> THAI DIGIT SIX
u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN
u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT
u'\u0e59' # 0xF9 -> THAI DIGIT NINE
u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU
u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_13 generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-13',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK
u'\xa6' # 0xA6 -> BROKEN BAR
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
u'\xe6' # 0xBF -> LATIN SMALL LETTER AE
u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON
u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON
u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German)
u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK
u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK
u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON
u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK
u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE
u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA
u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA
u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON
u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA
u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON
u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK
u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE
u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE
u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON
u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_14 generated from 'MAPPINGS/ISO8859/8859-14.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-14',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE
u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE
u'\xa3' # 0xA3 -> POUND SIGN
u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE
u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE
u'\xa7' # 0xA7 -> SECTION SIGN
u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE
u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE
u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE
u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE
u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE
u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE
u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE
u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE
u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE
u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE
u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE
u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE
u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS
u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS
u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_15 generated from 'MAPPINGS/ISO8859/8859-15.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-15',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
u'\xa2' # 0xA2 -> CENT SIGN
u'\xa3' # 0xA3 -> POUND SIGN
u'\u20ac' # 0xA4 -> EURO SIGN
u'\xa5' # 0xA5 -> YEN SIGN
u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON
u'\xa7' # 0xA7 -> SECTION SIGN
u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0xAC -> NOT SIGN
u'\xad' # 0xAD -> SOFT HYPHEN
u'\xae' # 0xAE -> REGISTERED SIGN
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
u'\xb5' # 0xB5 -> MICRO SIGN
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON
u'\xb9' # 0xB9 -> SUPERSCRIPT ONE
u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE
u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE
u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\xbf' # 0xBF -> INVERTED QUESTION MARK
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_16 generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-16',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK
u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
u'\u20ac' # 0xA4 -> EURO SIGN
u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK
u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON
u'\xa7' # 0xA7 -> SECTION SIGN
u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON
u'\xa9' # 0xA9 -> COPYRIGHT SIGN
u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW
u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE
u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\xb1' # 0xB1 -> PLUS-MINUS SIGN
u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON
u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE
u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK
u'\xb6' # 0xB6 -> PILCROW SIGN
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON
u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON
u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW
u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE
u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE
u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE
u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK
u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE
u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK
u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW
u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_2 generated from 'MAPPINGS/ISO8859/8859-2.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-2',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u02d8' # 0xA2 -> BREVE
u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON
u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON
u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON
u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON
u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK
u'\u02db' # 0xB2 -> OGONEK
u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON
u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE
u'\u02c7' # 0xB7 -> CARON
u'\xb8' # 0xB8 -> CEDILLA
u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON
u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA
u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON
u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE
u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT
u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON
u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON
u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON
u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE
u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON
u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON
u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA
u'\u02d9' # 0xFF -> DOT ABOVE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_3 generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-3',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE
u'\u02d8' # 0xA2 -> BREVE
u'\xa3' # 0xA3 -> POUND SIGN
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\ufffe'
u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE
u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX
u'\xad' # 0xAD -> SOFT HYPHEN
u'\ufffe'
u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE
u'\xb2' # 0xB2 -> SUPERSCRIPT TWO
u'\xb3' # 0xB3 -> SUPERSCRIPT THREE
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\xb5' # 0xB5 -> MICRO SIGN
u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX
u'\xb7' # 0xB7 -> MIDDLE DOT
u'\xb8' # 0xB8 -> CEDILLA
u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I
u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA
u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE
u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX
u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
u'\ufffe'
u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\ufffe'
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX
u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\ufffe'
u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX
u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE
u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\ufffe'
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE
u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX
u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
u'\ufffe'
u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX
u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE
u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX
u'\u02d9' # 0xFF -> DOT ABOVE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_4 generated from 'MAPPINGS/ISO8859/8859-4.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-4',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA
u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA
u'\xa7' # 0xA7 -> SECTION SIGN
u'\xa8' # 0xA8 -> DIAERESIS
u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON
u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON
u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA
u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON
u'\xaf' # 0xAF -> MACRON
u'\xb0' # 0xB0 -> DEGREE SIGN
u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK
u'\u02db' # 0xB2 -> OGONEK
u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA
u'\xb4' # 0xB4 -> ACUTE ACCENT
u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE
u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA
u'\u02c7' # 0xB7 -> CARON
u'\xb8' # 0xB8 -> CEDILLA
u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON
u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON
u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA
u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE
u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG
u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON
u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG
u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON
u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA
u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd7' # 0xD7 -> MULTIPLICATION SIGN
u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE
u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON
u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON
u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE
u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK
u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON
u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON
u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA
u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf7' # 0xF7 -> DIVISION SIGN
u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK
u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE
u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON
u'\u02d9' # 0xFF -> DOT ABOVE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_5 generated from 'MAPPINGS/ISO8859/8859-5.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-5',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO
u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE
u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE
u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE
u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI
u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE
u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE
u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE
u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE
u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE
u'\xad' # 0xAD -> SOFT HYPHEN
u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U
u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE
u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A
u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE
u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE
u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE
u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE
u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE
u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE
u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE
u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I
u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I
u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA
u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL
u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM
u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN
u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O
u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE
u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER
u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES
u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE
u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U
u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF
u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA
u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE
u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE
u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA
u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA
u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN
u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU
u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN
u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E
u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU
u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA
u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A
u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE
u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE
u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE
u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE
u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE
u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE
u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE
u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I
u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I
u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA
u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL
u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM
u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN
u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O
u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE
u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER
u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES
u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE
u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U
u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF
u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA
u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE
u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE
u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA
u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA
u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN
u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU
u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN
u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E
u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU
u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA
u'\u2116' # 0xF0 -> NUMERO SIGN
u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO
u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE
u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE
u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE
u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI
u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE
u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE
u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE
u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE
u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE
u'\xa7' # 0xFD -> SECTION SIGN
u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U
u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

View File

@ -0,0 +1,307 @@
""" Python Character Mapping Codec iso8859_6 generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return codecs.CodecInfo(
name='iso8859-6',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
### Decoding Table
decoding_table = (
u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0A -> LINE FEED
u'\x0b' # 0x0B -> VERTICAL TABULATION
u'\x0c' # 0x0C -> FORM FEED
u'\r' # 0x0D -> CARRIAGE RETURN
u'\x0e' # 0x0E -> SHIFT OUT
u'\x0f' # 0x0F -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1A -> SUBSTITUTE
u'\x1b' # 0x1B -> ESCAPE
u'\x1c' # 0x1C -> FILE SEPARATOR
u'\x1d' # 0x1D -> GROUP SEPARATOR
u'\x1e' # 0x1E -> RECORD SEPARATOR
u'\x1f' # 0x1F -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2A -> ASTERISK
u'+' # 0x2B -> PLUS SIGN
u',' # 0x2C -> COMMA
u'-' # 0x2D -> HYPHEN-MINUS
u'.' # 0x2E -> FULL STOP
u'/' # 0x2F -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3A -> COLON
u';' # 0x3B -> SEMICOLON
u'<' # 0x3C -> LESS-THAN SIGN
u'=' # 0x3D -> EQUALS SIGN
u'>' # 0x3E -> GREATER-THAN SIGN
u'?' # 0x3F -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4A -> LATIN CAPITAL LETTER J
u'K' # 0x4B -> LATIN CAPITAL LETTER K
u'L' # 0x4C -> LATIN CAPITAL LETTER L
u'M' # 0x4D -> LATIN CAPITAL LETTER M
u'N' # 0x4E -> LATIN CAPITAL LETTER N
u'O' # 0x4F -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5A -> LATIN CAPITAL LETTER Z
u'[' # 0x5B -> LEFT SQUARE BRACKET
u'\\' # 0x5C -> REVERSE SOLIDUS
u']' # 0x5D -> RIGHT SQUARE BRACKET
u'^' # 0x5E -> CIRCUMFLEX ACCENT
u'_' # 0x5F -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6A -> LATIN SMALL LETTER J
u'k' # 0x6B -> LATIN SMALL LETTER K
u'l' # 0x6C -> LATIN SMALL LETTER L
u'm' # 0x6D -> LATIN SMALL LETTER M
u'n' # 0x6E -> LATIN SMALL LETTER N
u'o' # 0x6F -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7A -> LATIN SMALL LETTER Z
u'{' # 0x7B -> LEFT CURLY BRACKET
u'|' # 0x7C -> VERTICAL LINE
u'}' # 0x7D -> RIGHT CURLY BRACKET
u'~' # 0x7E -> TILDE
u'\x7f' # 0x7F -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8A -> <control>
u'\x8b' # 0x8B -> <control>
u'\x8c' # 0x8C -> <control>
u'\x8d' # 0x8D -> <control>
u'\x8e' # 0x8E -> <control>
u'\x8f' # 0x8F -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9A -> <control>
u'\x9b' # 0x9B -> <control>
u'\x9c' # 0x9C -> <control>
u'\x9d' # 0x9D -> <control>
u'\x9e' # 0x9E -> <control>
u'\x9f' # 0x9F -> <control>
u'\xa0' # 0xA0 -> NO-BREAK SPACE
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\xa4' # 0xA4 -> CURRENCY SIGN
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u060c' # 0xAC -> ARABIC COMMA
u'\xad' # 0xAD -> SOFT HYPHEN
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u061b' # 0xBB -> ARABIC SEMICOLON
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u061f' # 0xBF -> ARABIC QUESTION MARK
u'\ufffe'
u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA
u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
u'\u0627' # 0xC7 -> ARABIC LETTER ALEF
u'\u0628' # 0xC8 -> ARABIC LETTER BEH
u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA
u'\u062a' # 0xCA -> ARABIC LETTER TEH
u'\u062b' # 0xCB -> ARABIC LETTER THEH
u'\u062c' # 0xCC -> ARABIC LETTER JEEM
u'\u062d' # 0xCD -> ARABIC LETTER HAH
u'\u062e' # 0xCE -> ARABIC LETTER KHAH
u'\u062f' # 0xCF -> ARABIC LETTER DAL
u'\u0630' # 0xD0 -> ARABIC LETTER THAL
u'\u0631' # 0xD1 -> ARABIC LETTER REH
u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN
u'\u0633' # 0xD3 -> ARABIC LETTER SEEN
u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN
u'\u0635' # 0xD5 -> ARABIC LETTER SAD
u'\u0636' # 0xD6 -> ARABIC LETTER DAD
u'\u0637' # 0xD7 -> ARABIC LETTER TAH
u'\u0638' # 0xD8 -> ARABIC LETTER ZAH
u'\u0639' # 0xD9 -> ARABIC LETTER AIN
u'\u063a' # 0xDA -> ARABIC LETTER GHAIN
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u0640' # 0xE0 -> ARABIC TATWEEL
u'\u0641' # 0xE1 -> ARABIC LETTER FEH
u'\u0642' # 0xE2 -> ARABIC LETTER QAF
u'\u0643' # 0xE3 -> ARABIC LETTER KAF
u'\u0644' # 0xE4 -> ARABIC LETTER LAM
u'\u0645' # 0xE5 -> ARABIC LETTER MEEM
u'\u0646' # 0xE6 -> ARABIC LETTER NOON
u'\u0647' # 0xE7 -> ARABIC LETTER HEH
u'\u0648' # 0xE8 -> ARABIC LETTER WAW
u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA
u'\u064a' # 0xEA -> ARABIC LETTER YEH
u'\u064b' # 0xEB -> ARABIC FATHATAN
u'\u064c' # 0xEC -> ARABIC DAMMATAN
u'\u064d' # 0xED -> ARABIC KASRATAN
u'\u064e' # 0xEE -> ARABIC FATHA
u'\u064f' # 0xEF -> ARABIC DAMMA
u'\u0650' # 0xF0 -> ARABIC KASRA
u'\u0651' # 0xF1 -> ARABIC SHADDA
u'\u0652' # 0xF2 -> ARABIC SUKUN
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
)
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)

Some files were not shown because too many files have changed in this diff Show More