AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site.
These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524
This commit is contained in:
@@ -0,0 +1,79 @@
|
||||
To generate or modify mapping headers
|
||||
-------------------------------------
|
||||
Mapping headers are imported from CJKCodecs as pre-generated form.
|
||||
If you need to tweak or add something on it, please look at tools/
|
||||
subdirectory of CJKCodecs' distribution.
|
||||
|
||||
|
||||
|
||||
Notes on implmentation characteristics of each codecs
|
||||
-----------------------------------------------------
|
||||
|
||||
1) Big5 codec
|
||||
|
||||
The big5 codec maps the following characters as cp950 does rather
|
||||
than conforming Unicode.org's that maps to 0xFFFD.
|
||||
|
||||
BIG5 Unicode Description
|
||||
|
||||
0xA15A 0x2574 SPACING UNDERSCORE
|
||||
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
|
||||
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
|
||||
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
|
||||
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
|
||||
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
|
||||
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
|
||||
|
||||
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
|
||||
big5 codes already, a roundtrip compatibility is not guaranteed for
|
||||
them.
|
||||
|
||||
|
||||
2) cp932 codec
|
||||
|
||||
To conform to Windows's real mapping, cp932 codec maps the following
|
||||
codepoints in addition of the official cp932 mapping.
|
||||
|
||||
CP932 Unicode Description
|
||||
|
||||
0x80 0x80 UNDEFINED
|
||||
0xA0 0xF8F0 UNDEFINED
|
||||
0xFD 0xF8F1 UNDEFINED
|
||||
0xFE 0xF8F2 UNDEFINED
|
||||
0xFF 0xF8F3 UNDEFINED
|
||||
|
||||
|
||||
3) euc-jisx0213 codec
|
||||
|
||||
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
|
||||
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
|
||||
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
|
||||
is shown as a full width character, mapping to U+FF3C can make
|
||||
more sense.
|
||||
|
||||
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
|
||||
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
|
||||
overlapped by each other, it doesn't bother standard conformations
|
||||
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
|
||||
sessions, the codec will try to encode kanji characters in this
|
||||
order:
|
||||
|
||||
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
|
||||
|
||||
|
||||
4) euc-jp codec
|
||||
|
||||
The euc-jp codec is a compatibility instance on these points:
|
||||
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
|
||||
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
|
||||
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
|
||||
|
||||
|
||||
5) shift-jis codec
|
||||
|
||||
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
|
||||
instead of using JIS X 0201 for compatibility. The differences are:
|
||||
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
|
||||
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
|
||||
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.
|
||||
|
@@ -0,0 +1,444 @@
|
||||
/*
|
||||
* _codecs_cn.c: Codecs collection for Mainland Chinese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_cn.h"
|
||||
|
||||
/**
|
||||
* hz is predefined as 100 on AIX. So we undefine it to avoid
|
||||
* conflict against hz codec's.
|
||||
*/
|
||||
#ifdef _AIX
|
||||
#undef hz
|
||||
#endif
|
||||
|
||||
/* GBK and GB2312 map differently in few codepoints that are listed below:
|
||||
*
|
||||
* gb2312 gbk
|
||||
* A1A4 U+30FB KATAKANA MIDDLE DOT U+00B7 MIDDLE DOT
|
||||
* A1AA U+2015 HORIZONTAL BAR U+2014 EM DASH
|
||||
* A844 undefined U+2015 HORIZONTAL BAR
|
||||
*/
|
||||
|
||||
#define GBK_DECODE(dc1, dc2, assi) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
|
||||
else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
|
||||
else TRYMAP_DEC(gbkext, assi, dc1, dc2);
|
||||
|
||||
#define GBK_ENCODE(code, assi) \
|
||||
if ((code) == 0x2014) (assi) = 0xa1aa; \
|
||||
else if ((code) == 0x2015) (assi) = 0xa844; \
|
||||
else if ((code) == 0x00b7) (assi) = 0xa1a4; \
|
||||
else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
|
||||
|
||||
/*
|
||||
* GB2312 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GBK codec
|
||||
*/
|
||||
|
||||
ENCODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
GBK_ENCODE(c, code)
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
GBK_DECODE(c, IN2, **outbuf)
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* GB18030 codec
|
||||
*/
|
||||
|
||||
ENCODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
if (c > 0x10FFFF)
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
return 2; /* surrogates pair */
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
else if (c >= 0x10000) {
|
||||
ucs4_t tc = c - 0x10000;
|
||||
|
||||
REQUIRE_OUTBUF(4)
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)(tc + 0x90))
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
NEXT(2, 4) /* surrogates pair */
|
||||
#else
|
||||
NEXT(1, 4)
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
GBK_ENCODE(c, code)
|
||||
else TRYMAP_ENC(gb18030ext, code, c);
|
||||
else {
|
||||
const struct _gb18030_to_unibmp_ranges *utrrange;
|
||||
|
||||
REQUIRE_OUTBUF(4)
|
||||
|
||||
for (utrrange = gb18030_to_unibmp_ranges;
|
||||
utrrange->first != 0;
|
||||
utrrange++)
|
||||
if (utrrange->first <= c &&
|
||||
c <= utrrange->last) {
|
||||
Py_UNICODE tc;
|
||||
|
||||
tc = c - utrrange->first +
|
||||
utrrange->base;
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)tc + 0x81)
|
||||
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
|
||||
if (utrrange->first == 0)
|
||||
return 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
ucs4_t lseq;
|
||||
|
||||
REQUIRE_INBUF(4)
|
||||
c3 = IN3;
|
||||
c4 = IN4;
|
||||
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
|
||||
return 4;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUT1(utr->first - utr->base + lseq)
|
||||
NEXT(4, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
|
||||
* 1260 + (ucs4_t)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
WRITEUCS4(lseq);
|
||||
NEXT_IN(4)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
GBK_DECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* HZ codec
|
||||
*/
|
||||
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
WRITE2('~', '}')
|
||||
state->i = 0;
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i == 0) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else {
|
||||
WRITE3('~', '}', (unsigned char)c)
|
||||
NEXT(1, 3)
|
||||
state->i = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
WRITE4('~', '{', code >> 8, code & 0xff)
|
||||
NEXT(1, 4)
|
||||
state->i = 1;
|
||||
}
|
||||
else {
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
if (c == '~') {
|
||||
unsigned char c2 = IN2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
if (c2 == '~') {
|
||||
WRITE1('~')
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '\n')
|
||||
; /* line-continuation */
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
else { /* GB mode */
|
||||
REQUIRE_INBUF(2)
|
||||
REQUIRE_OUTBUF(1)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(gb2312)
|
||||
MAPPING_DECONLY(gbkext)
|
||||
MAPPING_ENCONLY(gbcommon)
|
||||
MAPPING_ENCDEC(gb18030ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(gb2312)
|
||||
CODEC_STATELESS(gbk)
|
||||
CODEC_STATELESS(gb18030)
|
||||
CODEC_STATEFUL(hz)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(cn)
|
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* _codecs_hk.c: Codecs collection for encodings from Hong Kong
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#define USING_IMPORTED_MAPS
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_hk.h"
|
||||
|
||||
/*
|
||||
* BIG5HKSCS codec
|
||||
*/
|
||||
|
||||
static const encode_map *big5_encmap = NULL;
|
||||
static const decode_map *big5_decmap = NULL;
|
||||
|
||||
CODEC_INIT(big5hkscs)
|
||||
{
|
||||
static int initialized = 0;
|
||||
|
||||
if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
|
||||
return -1;
|
||||
initialized = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
|
||||
* U+00CA U+0304 -> 8862 (U+00CA alone is mapped to 8866)
|
||||
* U+00CA U+030C -> 8864
|
||||
* U+00EA U+0304 -> 88a3 (U+00EA alone is mapped to 88a7)
|
||||
* U+00EA U+030C -> 88a5
|
||||
* These are handled by not mapping tables but a hand-written code.
|
||||
*/
|
||||
static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
|
||||
|
||||
ENCODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = **inbuf;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (c < 0x10000) {
|
||||
TRYMAP_ENC(big5hkscs_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft >= 2 &&
|
||||
((c & 0xffdf) == 0x00ca) &&
|
||||
(((*inbuf)[1] & 0xfff7) == 0x0304)) {
|
||||
code = big5hkscs_pairenc_table[
|
||||
((c >> 4) |
|
||||
((*inbuf)[1] >> 3)) & 3];
|
||||
insize = 2;
|
||||
}
|
||||
else if (inleft < 2 &&
|
||||
!(flags & MBENC_FLUSH))
|
||||
return MBERR_TOOFEW;
|
||||
else {
|
||||
if (c == 0xca)
|
||||
code = 0x8866;
|
||||
else /* c == 0xea */
|
||||
code = 0x88a7;
|
||||
}
|
||||
}
|
||||
}
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
}
|
||||
else if (c < 0x20000)
|
||||
return insize;
|
||||
else if (c < 0x30000) {
|
||||
TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
|
||||
|
||||
DECODER(big5hkscs)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t decoded;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
|
||||
goto hkscsdec;
|
||||
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
hkscsdec: TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
|
||||
int s = BH2S(c, IN2);
|
||||
const unsigned char *hintbase;
|
||||
|
||||
assert(0x87 <= c && c <= 0xfe);
|
||||
assert(0x40 <= IN2 && IN2 <= 0xfe);
|
||||
|
||||
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
|
||||
hintbase = big5hkscs_phint_0;
|
||||
s -= BH2S(0x87, 0x40);
|
||||
}
|
||||
else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
|
||||
hintbase = big5hkscs_phint_12130;
|
||||
s -= BH2S(0xc6, 0xa1);
|
||||
}
|
||||
else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
|
||||
hintbase = big5hkscs_phint_21924;
|
||||
s -= BH2S(0xf9, 0xd6);
|
||||
}
|
||||
else
|
||||
return MBERR_INTERNAL;
|
||||
|
||||
if (hintbase[s >> 3] & (1 << (s & 7))) {
|
||||
WRITEUCS4(decoded | 0x20000)
|
||||
NEXT_IN(2)
|
||||
}
|
||||
else {
|
||||
OUT1(decoded)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch ((c << 8) | IN2) {
|
||||
case 0x8862: WRITE2(0x00ca, 0x0304); break;
|
||||
case 0x8864: WRITE2(0x00ca, 0x030c); break;
|
||||
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
|
||||
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
|
||||
default: return 2;
|
||||
}
|
||||
|
||||
NEXT(2, 2) /* all decoded codepoints are pairs, above. */
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(big5hkscs)
|
||||
MAPPING_ENCONLY(big5hkscs_bmp)
|
||||
MAPPING_ENCONLY(big5hkscs_nonbmp)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS_WINIT(big5hkscs)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(hk)
|
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,731 @@
|
||||
/*
|
||||
* _codecs_jp.c: Codecs collection for Japanese encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_jp.h"
|
||||
#include "mappings_jisx0213_pair.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "emu_jisx0213_2000.h"
|
||||
|
||||
/*
|
||||
* CP932 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (c <= 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
WRITE1(c - 0xfec0)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
||||
/* Windows compatibility */
|
||||
REQUIRE_OUTBUF(1)
|
||||
if (c == 0xf8f0)
|
||||
OUT1(0xa0)
|
||||
else
|
||||
OUT1(c - 0xfef1 + 0xfd)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(cp932ext, code, c) {
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
|
||||
/* JIS X 0208 */
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
}
|
||||
else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UNICODE)(c - 0xe000) / 188;
|
||||
c2 = (Py_UNICODE)(c - 0xe000) % 188;
|
||||
OUT1(c1 + 0xf0)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
if (c <= 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUT1(0xf8f0) /* half-width katakana */
|
||||
else
|
||||
OUT1(0xfec0 + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUT1(0xf8f1 - 0xfd + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
}
|
||||
else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) ||
|
||||
(c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
Py_ssize_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, (*inbuf)[1],
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
else if (c == 0xff3c)
|
||||
/* F/W REVERSE SOLIDUS (see NOTES) */
|
||||
code = 0x2140;
|
||||
else if (c == 0xff5e)
|
||||
/* F/W TILDE (see NOTES) */
|
||||
code = 0x2232;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* Codeset 2 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 3)
|
||||
} else {
|
||||
/* Codeset 1 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3)
|
||||
c2 = IN2 ^ 0x80;
|
||||
c3 = IN3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(3)
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else return 3;
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c ^= 0x80;
|
||||
c2 = IN2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
|
||||
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
}
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* EUC-JP codec
|
||||
*/
|
||||
|
||||
ENCODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else if (c == 0xa5) { /* YEN SIGN */
|
||||
WRITE1(0x5c);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c == 0x203e) { /* OVERLINE */
|
||||
WRITE1(0x7e);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* JIS X 0212 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 3)
|
||||
} else {
|
||||
/* JIS X 0208 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
REQUIRE_INBUF(3)
|
||||
c2 = IN2;
|
||||
c3 = IN3;
|
||||
/* JIS X 0212 */
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT(3, 1)
|
||||
}
|
||||
else
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
unsigned char c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
/* JIS X 0208 */
|
||||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0)
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf,
|
||||
c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
#else
|
||||
if (c < 0x80) code = c;
|
||||
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
|
||||
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
|
||||
#endif
|
||||
else JISX0201_K_ENCODE(c, code)
|
||||
else UCS4INVALID(c)
|
||||
else code = NOCHAR;
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
OUT1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (code == NOCHAR) {
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c)
|
||||
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
#else
|
||||
if (c < 0x80) **outbuf = c;
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
|
||||
unsigned char c1, c2;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUT1(0xff3c)
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SHIFT_JIS-2004 codec
|
||||
*/
|
||||
|
||||
ENCODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
Py_ssize_t insize;
|
||||
|
||||
JISX0201_ENCODE(c, code)
|
||||
else DECODE_SURROGATE(c)
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
WRITE1((unsigned char)code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (c <= 0xffff) {
|
||||
EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap
|
||||
((ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return MBERR_TOOFEW;
|
||||
}
|
||||
else {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, IN2,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(
|
||||
(ucs2_t)c, 0,
|
||||
jisx0213_pair_encmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
/* abandon JIS X 0212 codes */
|
||||
if (code & 0x8000)
|
||||
return 1;
|
||||
}
|
||||
else return 1;
|
||||
}
|
||||
else if (c >> 16 == EMPBASE >> 16) {
|
||||
EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
|
||||
else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
|
||||
else return insize;
|
||||
}
|
||||
else
|
||||
return insize;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = (code & 0xff) - 0x21;
|
||||
|
||||
if (c1 & 0x80) { /* Plane 2 */
|
||||
if (c1 >= 0xee) c1 -= 0x87;
|
||||
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
|
||||
else c1 -= 0x43;
|
||||
}
|
||||
else /* Plane 1 */
|
||||
c1 -= 0x21;
|
||||
|
||||
if (c1 & 1) c2 += 0x5e;
|
||||
c1 >>= 1;
|
||||
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
|
||||
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
|
||||
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis_2004)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
JISX0201_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
|
||||
unsigned char c1, c2;
|
||||
ucs4_t code;
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
|
||||
c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
}
|
||||
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
}
|
||||
else { /* Plane 2 */
|
||||
if (c1 >= 0x67) c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
|
||||
else c1 -= 0x3d;
|
||||
|
||||
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
|
||||
c1, c2)
|
||||
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
|
||||
c1, c2) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
|
||||
WRITEUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(jisx0208)
|
||||
MAPPING_DECONLY(jisx0212)
|
||||
MAPPING_ENCONLY(jisxcommon)
|
||||
MAPPING_DECONLY(jisx0213_1_bmp)
|
||||
MAPPING_DECONLY(jisx0213_2_bmp)
|
||||
MAPPING_ENCONLY(jisx0213_bmp)
|
||||
MAPPING_DECONLY(jisx0213_1_emp)
|
||||
MAPPING_DECONLY(jisx0213_2_emp)
|
||||
MAPPING_ENCONLY(jisx0213_emp)
|
||||
MAPPING_ENCDEC(jisx0213_pair)
|
||||
MAPPING_ENCDEC(cp932ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(shift_jis)
|
||||
CODEC_STATELESS(cp932)
|
||||
CODEC_STATELESS(euc_jp)
|
||||
CODEC_STATELESS(shift_jis_2004)
|
||||
CODEC_STATELESS(euc_jis_2004)
|
||||
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
|
||||
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(jp)
|
@@ -0,0 +1,452 @@
|
||||
/*
|
||||
* _codecs_kr.c: Codecs collection for Korean encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_kr.h"
|
||||
|
||||
/*
|
||||
* EUC-KR codec
|
||||
*/
|
||||
|
||||
#define EUCKR_JAMO_FIRSTBYTE 0xA4
|
||||
#define EUCKR_JAMO_FILLER 0xD4
|
||||
|
||||
static const unsigned char u2cgk_choseong[19] = {
|
||||
0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
|
||||
0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
|
||||
0xbc, 0xbd, 0xbe
|
||||
};
|
||||
static const unsigned char u2cgk_jungseong[21] = {
|
||||
0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
|
||||
0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
|
||||
0xcf, 0xd0, 0xd1, 0xd2, 0xd3
|
||||
};
|
||||
static const unsigned char u2cgk_jongseong[28] = {
|
||||
0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
|
||||
0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
|
||||
0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
|
||||
0xbb, 0xbc, 0xbd, 0xbe
|
||||
};
|
||||
|
||||
ENCODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
if ((code & 0x8000) == 0) {
|
||||
/* KS X 1001 coded character */
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
else { /* Mapping is found in CP949 extension,
|
||||
* but we encode it in KS X 1001:1998 Annex 3,
|
||||
* make-up sequence for EUC-KR. */
|
||||
|
||||
REQUIRE_OUTBUF(8)
|
||||
|
||||
/* syllable composition precedence */
|
||||
OUT1(EUCKR_JAMO_FIRSTBYTE)
|
||||
OUT2(EUCKR_JAMO_FILLER)
|
||||
|
||||
/* All codepoints in CP949 extension are in unicode
|
||||
* Hangul Syllable area. */
|
||||
assert(0xac00 <= c && c <= 0xd7a3);
|
||||
c -= 0xac00;
|
||||
|
||||
OUT3(EUCKR_JAMO_FIRSTBYTE)
|
||||
OUT4(u2cgk_choseong[c / 588])
|
||||
NEXT_OUT(4)
|
||||
|
||||
OUT1(EUCKR_JAMO_FIRSTBYTE)
|
||||
OUT2(u2cgk_jungseong[(c / 28) % 21])
|
||||
OUT3(EUCKR_JAMO_FIRSTBYTE)
|
||||
OUT4(u2cgk_jongseong[c % 28])
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NONE 127
|
||||
|
||||
static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
|
||||
0, 1, NONE, 2, NONE, NONE, 3, 4,
|
||||
5, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
6, 7, 8, NONE, 9, 10, 11, 12,
|
||||
13, 14, 15, 16, 17, 18
|
||||
};
|
||||
static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
|
||||
1, 2, 3, 4, 5, 6, 7, NONE,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, NONE, 18, 19, 20, 21, 22,
|
||||
NONE, 23, 24, 25, 26, 27
|
||||
};
|
||||
|
||||
DECODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
if (c == EUCKR_JAMO_FIRSTBYTE &&
|
||||
IN2 == EUCKR_JAMO_FILLER) {
|
||||
/* KS X 1001:1998 Annex 3 make-up sequence */
|
||||
DBCHAR cho, jung, jong;
|
||||
|
||||
REQUIRE_INBUF(8)
|
||||
if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
|
||||
(*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
|
||||
return 8;
|
||||
|
||||
c = (*inbuf)[3];
|
||||
if (0xa1 <= c && c <= 0xbe)
|
||||
cho = cgk2u_choseong[c - 0xa1];
|
||||
else
|
||||
cho = NONE;
|
||||
|
||||
c = (*inbuf)[5];
|
||||
jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
|
||||
|
||||
c = (*inbuf)[7];
|
||||
if (c == EUCKR_JAMO_FILLER)
|
||||
jong = 0;
|
||||
else if (0xa1 <= c && c <= 0xbe)
|
||||
jong = cgk2u_jongseong[c - 0xa1];
|
||||
else
|
||||
jong = NONE;
|
||||
|
||||
if (cho == NONE || jung == NONE || jong == NONE)
|
||||
return 8;
|
||||
|
||||
OUT1(0xac00 + cho*588 + jung*28 + jong);
|
||||
NEXT(8, 1)
|
||||
}
|
||||
else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
|
||||
|
||||
/*
|
||||
* CP949 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2(code & 0xFF) /* MSB set: CP949 */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* JOHAB codec
|
||||
*/
|
||||
|
||||
static const unsigned char u2johabidx_choseong[32] = {
|
||||
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14,
|
||||
};
|
||||
static const unsigned char u2johabidx_jungseong[32] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const unsigned char u2johabidx_jongseong[32] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const DBCHAR u2johabjamo[] = {
|
||||
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
|
||||
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
|
||||
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
|
||||
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
|
||||
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
|
||||
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
|
||||
0x8741, 0x8761, 0x8781, 0x87a1,
|
||||
};
|
||||
|
||||
ENCODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
if (c >= 0xac00 && c <= 0xd7a3) {
|
||||
c -= 0xac00;
|
||||
code = 0x8000 |
|
||||
(u2johabidx_choseong[c / 588] << 10) |
|
||||
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
|
||||
u2johabidx_jongseong[c % 28];
|
||||
}
|
||||
else if (c >= 0x3131 && c <= 0x3163)
|
||||
code = u2johabjamo[c - 0x3131];
|
||||
else TRYMAP_ENC(cp949, code, c) {
|
||||
unsigned char c1, c2, t2;
|
||||
unsigned short t1;
|
||||
|
||||
assert((code & 0x8000) == 0);
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
if (((c1 >= 0x21 && c1 <= 0x2c) ||
|
||||
(c1 >= 0x4a && c1 <= 0x7d)) &&
|
||||
(c2 >= 0x21 && c2 <= 0x7e)) {
|
||||
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
|
||||
(c1 - 0x21 + 0x197));
|
||||
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
OUT1(t1 >> 1)
|
||||
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FILL 0xfd
|
||||
#define NONE 0xff
|
||||
|
||||
static const unsigned char johabidx_choseong[32] = {
|
||||
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||||
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
|
||||
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
|
||||
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jongseong[32] = {
|
||||
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
|
||||
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
|
||||
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
|
||||
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
|
||||
};
|
||||
|
||||
static const unsigned char johabjamo_choseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
|
||||
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
|
||||
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jongseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
|
||||
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
|
||||
};
|
||||
|
||||
DECODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
if (c < 0xd8) {
|
||||
/* johab hangul */
|
||||
unsigned char c_cho, c_jung, c_jong;
|
||||
unsigned char i_cho, i_jung, i_jong;
|
||||
|
||||
c_cho = (c >> 2) & 0x1f;
|
||||
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
|
||||
c_jong = c2 & 0x1f;
|
||||
|
||||
i_cho = johabidx_choseong[c_cho];
|
||||
i_jung = johabidx_jungseong[c_jung];
|
||||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 2;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3000)
|
||||
else
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jongseong[c_jong])
|
||||
}
|
||||
else {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_jungseong[c_jung])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 |
|
||||
johabjamo_choseong[c_cho])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
OUT1(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong))
|
||||
}
|
||||
NEXT(2, 1)
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 2;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
|
||||
2 * c - 0x197);
|
||||
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
|
||||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
#undef FILL
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_DECONLY(ksx1001)
|
||||
MAPPING_ENCONLY(cp949)
|
||||
MAPPING_DECONLY(cp949ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(euc_kr)
|
||||
CODEC_STATELESS(cp949)
|
||||
CODEC_STATELESS(johab)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(kr)
|
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* _codecs_tw.c: Codecs collection for Taiwan's encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#include "cjkcodecs.h"
|
||||
#include "mappings_tw.h"
|
||||
|
||||
/*
|
||||
* BIG5 codec
|
||||
*/
|
||||
|
||||
ENCODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
REQUIRE_OUTBUF(1)
|
||||
**outbuf = (unsigned char)c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
}
|
||||
else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CP950 codec
|
||||
*/
|
||||
|
||||
ENCODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1((unsigned char)c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
REQUIRE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp950ext, code, c);
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
REQUIRE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
REQUIRE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
BEGIN_MAPPINGS_LIST
|
||||
MAPPING_ENCDEC(big5)
|
||||
MAPPING_ENCDEC(cp950ext)
|
||||
END_MAPPINGS_LIST
|
||||
|
||||
BEGIN_CODECS_LIST
|
||||
CODEC_STATELESS(big5)
|
||||
CODEC_STATELESS(cp950)
|
||||
END_CODECS_LIST
|
||||
|
||||
I_AM_A_MODULE_FOR(tw)
|
@@ -0,0 +1,24 @@
|
||||
#define JISX0201_R_ENCODE(c, assi) \
|
||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
||||
(assi) = (c); \
|
||||
else if ((c) == 0x00a5) (assi) = 0x5c; \
|
||||
else if ((c) == 0x203e) (assi) = 0x7e;
|
||||
#define JISX0201_K_ENCODE(c, assi) \
|
||||
if ((c) >= 0xff61 && (c) <= 0xff9f) \
|
||||
(assi) = (c) - 0xfec0;
|
||||
#define JISX0201_ENCODE(c, assi) \
|
||||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE(c, assi) \
|
||||
if ((c) < 0x5c) (assi) = (c); \
|
||||
else if ((c) == 0x5c) (assi) = 0x00a5; \
|
||||
else if ((c) < 0x7e) (assi) = (c); \
|
||||
else if ((c) == 0x7e) (assi) = 0x203e; \
|
||||
else if ((c) == 0x7f) (assi) = 0x7f;
|
||||
#define JISX0201_K_DECODE(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, assi) \
|
||||
JISX0201_R_DECODE(c, assi) \
|
||||
else JISX0201_K_DECODE(c, assi)
|
@@ -0,0 +1,398 @@
|
||||
/*
|
||||
* cjkcodecs.h: common header for cjkcodecs
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#ifndef _CJKCODECS_H_
|
||||
#define _CJKCODECS_H_
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "multibytecodec.h"
|
||||
|
||||
|
||||
/* a unicode "undefined" codepoint */
|
||||
#define UNIINV 0xFFFE
|
||||
|
||||
/* internal-use DBCS codepoints which aren't used by any charsets */
|
||||
#define NOCHAR 0xFFFF
|
||||
#define MULTIC 0xFFFE
|
||||
#define DBCINV 0xFFFD
|
||||
|
||||
/* shorter macros to save source size of mapping tables */
|
||||
#define U UNIINV
|
||||
#define N NOCHAR
|
||||
#define M MULTIC
|
||||
#define D DBCINV
|
||||
|
||||
struct dbcs_index {
|
||||
const ucs2_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const ucs4_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
||||
struct unim_index {
|
||||
const DBCHAR *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct unim_index encode_map;
|
||||
|
||||
struct unim_index_bytebased {
|
||||
const unsigned char *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
|
||||
struct dbcs_map {
|
||||
const char *charset;
|
||||
const struct unim_index *encmap;
|
||||
const struct dbcs_index *decmap;
|
||||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
ucs4_t uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
static const MultibyteCodec *codec_list;
|
||||
static const struct dbcs_map *mapping_list;
|
||||
|
||||
#define CODEC_INIT(encoding) \
|
||||
static int encoding##_codec_init(const void *config)
|
||||
|
||||
#define ENCODER_INIT(encoding) \
|
||||
static int encoding##_encode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define ENCODER(encoding) \
|
||||
static Py_ssize_t encoding##_encode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
|
||||
unsigned char **outbuf, Py_ssize_t outleft, int flags)
|
||||
#define ENCODER_RESET(encoding) \
|
||||
static Py_ssize_t encoding##_encode_reset( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
unsigned char **outbuf, Py_ssize_t outleft)
|
||||
|
||||
#define DECODER_INIT(encoding) \
|
||||
static int encoding##_decode_init( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
#define DECODER(encoding) \
|
||||
static Py_ssize_t encoding##_decode( \
|
||||
MultibyteCodec_State *state, const void *config, \
|
||||
const unsigned char **inbuf, Py_ssize_t inleft, \
|
||||
Py_UNICODE **outbuf, Py_ssize_t outleft)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static Py_ssize_t encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state, const void *config)
|
||||
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
#define UCS4INVALID(code) \
|
||||
if ((code) > 0xFFFF) \
|
||||
return 1;
|
||||
#else
|
||||
#define UCS4INVALID(code) \
|
||||
if (0) ;
|
||||
#endif
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i);
|
||||
#define NEXT_OUT(o) \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o);
|
||||
#define NEXT(i, o) \
|
||||
NEXT_IN(i) NEXT_OUT(o)
|
||||
|
||||
#define REQUIRE_INBUF(n) \
|
||||
if (inleft < (n)) \
|
||||
return MBERR_TOOFEW;
|
||||
#define REQUIRE_OUTBUF(n) \
|
||||
if (outleft < (n)) \
|
||||
return MBERR_TOOSMALL;
|
||||
|
||||
#define IN1 ((*inbuf)[0])
|
||||
#define IN2 ((*inbuf)[1])
|
||||
#define IN3 ((*inbuf)[2])
|
||||
#define IN4 ((*inbuf)[3])
|
||||
|
||||
#define OUT1(c) ((*outbuf)[0]) = (c);
|
||||
#define OUT2(c) ((*outbuf)[1]) = (c);
|
||||
#define OUT3(c) ((*outbuf)[2]) = (c);
|
||||
#define OUT4(c) ((*outbuf)[3]) = (c);
|
||||
|
||||
#define WRITE1(c1) \
|
||||
REQUIRE_OUTBUF(1) \
|
||||
(*outbuf)[0] = (c1);
|
||||
#define WRITE2(c1, c2) \
|
||||
REQUIRE_OUTBUF(2) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2);
|
||||
#define WRITE3(c1, c2, c3) \
|
||||
REQUIRE_OUTBUF(3) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3);
|
||||
#define WRITE4(c1, c2, c3, c4) \
|
||||
REQUIRE_OUTBUF(4) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4);
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(2) \
|
||||
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
|
||||
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
|
||||
NEXT_OUT(2)
|
||||
#else
|
||||
# define WRITEUCS4(c) \
|
||||
REQUIRE_OUTBUF(1) \
|
||||
**outbuf = (Py_UNICODE)(c); \
|
||||
NEXT_OUT(1)
|
||||
#endif
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != NOCHAR)
|
||||
#define TRYMAP_ENC_COND(charset, assi, uni) \
|
||||
_TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
|
||||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
if TRYMAP_ENC_COND(charset, assi, uni)
|
||||
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
|
||||
|
||||
#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
|
||||
((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && \
|
||||
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
|
||||
(((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
|
||||
(((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
|
||||
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
|
||||
if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
|
||||
assplane, asshi, asslo, (uni) & 0xff)
|
||||
#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
|
||||
if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
#define DECODE_SURROGATE(c) \
|
||||
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
|
||||
REQUIRE_INBUF(2) \
|
||||
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
|
||||
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
|
||||
((ucs4_t)(IN2) - 0xdc00); \
|
||||
} \
|
||||
}
|
||||
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
|
||||
#else
|
||||
#define DECODE_SURROGATE(c) {;}
|
||||
#define GET_INSIZE(c) 1
|
||||
#endif
|
||||
|
||||
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
|
||||
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
|
||||
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
|
||||
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
|
||||
#define END_MAPPINGS_LIST \
|
||||
{"", NULL, NULL} }; \
|
||||
static const struct dbcs_map *mapping_list = \
|
||||
(const struct dbcs_map *)_mapping_list;
|
||||
|
||||
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
|
||||
#define _STATEFUL_METHODS(enc) \
|
||||
enc##_encode, \
|
||||
enc##_encode_init, \
|
||||
enc##_encode_reset, \
|
||||
enc##_decode, \
|
||||
enc##_decode_init, \
|
||||
enc##_decode_reset,
|
||||
#define _STATELESS_METHODS(enc) \
|
||||
enc##_encode, NULL, NULL, \
|
||||
enc##_decode, NULL, NULL,
|
||||
#define CODEC_STATEFUL(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATEFUL_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS(enc) { \
|
||||
#enc, NULL, NULL, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define CODEC_STATELESS_WINIT(enc) { \
|
||||
#enc, NULL, \
|
||||
enc##_codec_init, \
|
||||
_STATELESS_METHODS(enc) \
|
||||
},
|
||||
#define END_CODECS_LIST \
|
||||
{"", NULL,} }; \
|
||||
static const MultibyteCodec *codec_list = \
|
||||
(const MultibyteCodec *)_codec_list;
|
||||
|
||||
static PyObject *
|
||||
getmultibytecodec(void)
|
||||
{
|
||||
static PyObject *cofunc = NULL;
|
||||
|
||||
if (cofunc == NULL) {
|
||||
PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec");
|
||||
if (mod == NULL)
|
||||
return NULL;
|
||||
cofunc = PyObject_GetAttrString(mod, "__create_codec");
|
||||
Py_DECREF(mod);
|
||||
}
|
||||
return cofunc;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
getcodec(PyObject *self, PyObject *encoding)
|
||||
{
|
||||
PyObject *codecobj, *r, *cofunc;
|
||||
const MultibyteCodec *codec;
|
||||
const char *enc;
|
||||
|
||||
if (!PyString_Check(encoding)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoding name must be a string.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cofunc = getmultibytecodec();
|
||||
if (cofunc == NULL)
|
||||
return NULL;
|
||||
|
||||
enc = PyString_AS_STRING(encoding);
|
||||
for (codec = codec_list; codec->encoding[0]; codec++)
|
||||
if (strcmp(codec->encoding, enc) == 0)
|
||||
break;
|
||||
|
||||
if (codec->encoding[0] == '\0') {
|
||||
PyErr_SetString(PyExc_LookupError,
|
||||
"no such codec is supported.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
|
||||
if (codecobj == NULL)
|
||||
return NULL;
|
||||
|
||||
r = PyObject_CallFunctionObjArgs(cofunc, codecobj, NULL);
|
||||
Py_DECREF(codecobj);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
static int
|
||||
register_maps(PyObject *module)
|
||||
{
|
||||
const struct dbcs_map *h;
|
||||
|
||||
for (h = mapping_list; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
int r;
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
r = PyModule_AddObject(module, mhname,
|
||||
PyCapsule_New((void *)h, PyMultibyteCodec_CAPSULE_NAME, NULL));
|
||||
if (r == -1)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR
|
||||
find_pairencmap(ucs2_t body, ucs2_t modifier,
|
||||
const struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
ucs4_t value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
||||
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
|
||||
if (value < haystack[pos].uniseq) {
|
||||
if (max == pos) break;
|
||||
else max = pos;
|
||||
}
|
||||
else if (value > haystack[pos].uniseq) {
|
||||
if (min == pos) break;
|
||||
else min = pos;
|
||||
}
|
||||
else
|
||||
break;
|
||||
|
||||
if (value == haystack[pos].uniseq)
|
||||
return haystack[pos].code;
|
||||
else
|
||||
return DBCINV;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USING_IMPORTED_MAPS
|
||||
#define IMPORT_MAP(locale, charset, encmap, decmap) \
|
||||
importmap("_codecs_" #locale, "__map_" #charset, \
|
||||
(const void**)encmap, (const void**)decmap)
|
||||
|
||||
static int
|
||||
importmap(const char *modname, const char *symbol,
|
||||
const void **encmap, const void **decmap)
|
||||
{
|
||||
PyObject *o, *mod;
|
||||
|
||||
mod = PyImport_ImportModule((char *)modname);
|
||||
if (mod == NULL)
|
||||
return -1;
|
||||
|
||||
o = PyObject_GetAttrString(mod, (char*)symbol);
|
||||
if (o == NULL)
|
||||
goto errorexit;
|
||||
else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"map data must be a Capsule.");
|
||||
goto errorexit;
|
||||
}
|
||||
else {
|
||||
struct dbcs_map *map;
|
||||
map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME);
|
||||
if (encmap != NULL)
|
||||
*encmap = map->encmap;
|
||||
if (decmap != NULL)
|
||||
*decmap = map->decmap;
|
||||
Py_DECREF(o);
|
||||
}
|
||||
|
||||
Py_DECREF(mod);
|
||||
return 0;
|
||||
|
||||
errorexit:
|
||||
Py_DECREF(mod);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define I_AM_A_MODULE_FOR(loc) \
|
||||
void \
|
||||
init_codecs_##loc(void) \
|
||||
{ \
|
||||
PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
|
||||
if (m != NULL) \
|
||||
(void)register_maps(m); \
|
||||
}
|
||||
|
||||
#endif
|
@@ -0,0 +1,43 @@
|
||||
/* These routines may be quite inefficient, but it's used only to emulate old
|
||||
* standards. */
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_ENCODE_INVALID
|
||||
#define EMULATE_JISX0213_2000_ENCODE_INVALID 1
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c) \
|
||||
if (config == (void *)2000 && ( \
|
||||
(c) == 0x9B1C || (c) == 0x4FF1 || \
|
||||
(c) == 0x525D || (c) == 0x541E || \
|
||||
(c) == 0x5653 || (c) == 0x59F8 || \
|
||||
(c) == 0x5C5B || (c) == 0x5E77 || \
|
||||
(c) == 0x7626 || (c) == 0x7E6B)) \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID; \
|
||||
else if (config == (void *)2000 && (c) == 0x9B1D) \
|
||||
(assi) = 0x8000 | 0x7d3b; \
|
||||
|
||||
#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c) \
|
||||
if (config == (void *)2000 && (c) == 0x20B9F) \
|
||||
return EMULATE_JISX0213_2000_ENCODE_INVALID;
|
||||
|
||||
#ifndef EMULATE_JISX0213_2000_DECODE_INVALID
|
||||
#define EMULATE_JISX0213_2000_DECODE_INVALID 2
|
||||
#endif
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2) \
|
||||
if (config == (void *)2000 && \
|
||||
(((c1) == 0x2E && (c2) == 0x21) || \
|
||||
((c1) == 0x2F && (c2) == 0x7E) || \
|
||||
((c1) == 0x4F && (c2) == 0x54) || \
|
||||
((c1) == 0x4F && (c2) == 0x7E) || \
|
||||
((c1) == 0x74 && (c2) == 0x27) || \
|
||||
((c1) == 0x7E && (c2) == 0x7A) || \
|
||||
((c1) == 0x7E && (c2) == 0x7B) || \
|
||||
((c1) == 0x7E && (c2) == 0x7C) || \
|
||||
((c1) == 0x7E && (c2) == 0x7D) || \
|
||||
((c1) == 0x7E && (c2) == 0x7E))) \
|
||||
return EMULATE_JISX0213_2000_DECODE_INVALID;
|
||||
|
||||
#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) \
|
||||
if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) \
|
||||
(assi) = 0x9B1D;
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,59 @@
|
||||
#define JISX0213_ENCPAIRS 46
|
||||
#ifdef EXTERN_JISX0213_PAIR
|
||||
static const struct widedbcs_index *jisx0213_pair_decmap;
|
||||
static const struct pair_encodemap *jisx0213_pair_encmap;
|
||||
#else
|
||||
static const ucs4_t __jisx0213_pair_decmap[49] = {
|
||||
810234010,810365082,810496154,810627226,810758298,816525466,816656538,
|
||||
816787610,816918682,817049754,817574042,818163866,818426010,838283418,
|
||||
15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
|
||||
39453441,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,U,48825061,48562921,
|
||||
};
|
||||
|
||||
static const struct widedbcs_index jisx0213_pair_decmap[256] = {
|
||||
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap
|
||||
+0,119,123},{__jisx0213_pair_decmap+5,119,126},{__jisx0213_pair_decmap+13,120,
|
||||
120},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__jisx0213_pair_decmap+14,68,102},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,
|
||||
0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{
|
||||
0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0
|
||||
},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
|
||||
};
|
||||
|
||||
static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
|
||||
{0x00e60000,0x295c},{0x00e60300,0x2b44},{0x02540000,0x2b38},{0x02540300,0x2b48
|
||||
},{0x02540301,0x2b49},{0x02590000,0x2b30},{0x02590300,0x2b4c},{0x02590301,
|
||||
0x2b4d},{0x025a0000,0x2b43},{0x025a0300,0x2b4e},{0x025a0301,0x2b4f},{
|
||||
0x028c0000,0x2b37},{0x028c0300,0x2b4a},{0x028c0301,0x2b4b},{0x02e50000,0x2b60
|
||||
},{0x02e502e9,0x2b66},{0x02e90000,0x2b64},{0x02e902e5,0x2b65},{0x304b0000,
|
||||
0x242b},{0x304b309a,0x2477},{0x304d0000,0x242d},{0x304d309a,0x2478},{
|
||||
0x304f0000,0x242f},{0x304f309a,0x2479},{0x30510000,0x2431},{0x3051309a,0x247a
|
||||
},{0x30530000,0x2433},{0x3053309a,0x247b},{0x30ab0000,0x252b},{0x30ab309a,
|
||||
0x2577},{0x30ad0000,0x252d},{0x30ad309a,0x2578},{0x30af0000,0x252f},{
|
||||
0x30af309a,0x2579},{0x30b10000,0x2531},{0x30b1309a,0x257a},{0x30b30000,0x2533
|
||||
},{0x30b3309a,0x257b},{0x30bb0000,0x253b},{0x30bb309a,0x257c},{0x30c40000,
|
||||
0x2544},{0x30c4309a,0x257d},{0x30c80000,0x2548},{0x30c8309a,0x257e},{
|
||||
0x31f70000,0x2675},{0x31f7309a,0x2678},
|
||||
};
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* multibytecodec.h: Common Multibyte Codec Implementation
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
*/
|
||||
|
||||
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
||||
#define _PYTHON_MULTIBYTECODEC_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef uint32_t
|
||||
typedef uint32_t ucs4_t;
|
||||
#else
|
||||
typedef unsigned int ucs4_t;
|
||||
#endif
|
||||
|
||||
#ifdef uint16_t
|
||||
typedef uint16_t ucs2_t, DBCHAR;
|
||||
#else
|
||||
typedef unsigned short ucs2_t, DBCHAR;
|
||||
#endif
|
||||
|
||||
typedef union {
|
||||
void *p;
|
||||
int i;
|
||||
unsigned char c[8];
|
||||
ucs2_t u2[4];
|
||||
ucs4_t u4[2];
|
||||
} MultibyteCodec_State;
|
||||
|
||||
typedef int (*mbcodec_init)(const void *config);
|
||||
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
const Py_UNICODE **inbuf, Py_ssize_t inleft,
|
||||
unsigned char **outbuf, Py_ssize_t outleft,
|
||||
int flags);
|
||||
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
unsigned char **outbuf, Py_ssize_t outleft);
|
||||
typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
|
||||
const void *config,
|
||||
const unsigned char **inbuf, Py_ssize_t inleft,
|
||||
Py_UNICODE **outbuf, Py_ssize_t outleft);
|
||||
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
|
||||
const void *config);
|
||||
|
||||
typedef struct {
|
||||
const char *encoding;
|
||||
const void *config;
|
||||
mbcodec_init codecinit;
|
||||
mbencode_func encode;
|
||||
mbencodeinit_func encinit;
|
||||
mbencodereset_func encreset;
|
||||
mbdecode_func decode;
|
||||
mbdecodeinit_func decinit;
|
||||
mbdecodereset_func decreset;
|
||||
} MultibyteCodec;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
MultibyteCodec *codec;
|
||||
} MultibyteCodecObject;
|
||||
|
||||
#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
|
||||
|
||||
#define _MultibyteStatefulCodec_HEAD \
|
||||
PyObject_HEAD \
|
||||
MultibyteCodec *codec; \
|
||||
MultibyteCodec_State state; \
|
||||
PyObject *errors;
|
||||
typedef struct {
|
||||
_MultibyteStatefulCodec_HEAD
|
||||
} MultibyteStatefulCodecContext;
|
||||
|
||||
#define MAXENCPENDING 2
|
||||
#define _MultibyteStatefulEncoder_HEAD \
|
||||
_MultibyteStatefulCodec_HEAD \
|
||||
Py_UNICODE pending[MAXENCPENDING]; \
|
||||
Py_ssize_t pendingsize;
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
} MultibyteStatefulEncoderContext;
|
||||
|
||||
#define MAXDECPENDING 8
|
||||
#define _MultibyteStatefulDecoder_HEAD \
|
||||
_MultibyteStatefulCodec_HEAD \
|
||||
unsigned char pending[MAXDECPENDING]; \
|
||||
Py_ssize_t pendingsize;
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
} MultibyteStatefulDecoderContext;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
} MultibyteIncrementalEncoderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
} MultibyteIncrementalDecoderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulDecoder_HEAD
|
||||
PyObject *stream;
|
||||
} MultibyteStreamReaderObject;
|
||||
|
||||
typedef struct {
|
||||
_MultibyteStatefulEncoder_HEAD
|
||||
PyObject *stream;
|
||||
} MultibyteStreamWriterObject;
|
||||
|
||||
/* positive values for illegal sequences */
|
||||
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
|
||||
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
|
||||
#define MBERR_INTERNAL (-3) /* internal runtime error */
|
||||
|
||||
#define ERROR_STRICT (PyObject *)(1)
|
||||
#define ERROR_IGNORE (PyObject *)(2)
|
||||
#define ERROR_REPLACE (PyObject *)(3)
|
||||
#define ERROR_ISCUSTOM(p) ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
|
||||
#define ERROR_DECREF(p) do { \
|
||||
if (p != NULL && ERROR_ISCUSTOM(p)) { \
|
||||
Py_DECREF(p); \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
|
||||
#define MBENC_MAX MBENC_FLUSH
|
||||
|
||||
#define PyMultibyteCodec_CAPSULE_NAME "multibytecodec.__map_*"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
Reference in New Issue
Block a user