mirror of
https://github.com/php/php-src.git
synced 2024-11-29 04:46:07 +08:00
added simplified chinese, traditional chinese, korean support to mbstring. Note that this feature is experimental.
This commit is contained in:
parent
18805e8323
commit
5df01432c7
@ -2,7 +2,7 @@ dnl
|
||||
dnl $Id$
|
||||
dnl
|
||||
|
||||
PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
|
||||
PHP_ARG_WITH(mbstring, whether to enable multibyte string support,
|
||||
[ --disable-mbstring Disable multibyte string support], yes)
|
||||
|
||||
PHP_ARG_ENABLE(mbstr_enc_trans, whether to enable encoding translation,
|
||||
@ -11,9 +11,28 @@ PHP_ARG_ENABLE(mbstr_enc_trans, whether to enable encoding translation,
|
||||
PHP_ARG_ENABLE(mbregex, whether to enable multibyte regex support,
|
||||
[ --enable-mbregex Enable multibyte regex support], yes)
|
||||
|
||||
if test "$PHP_MBSTRING" != "no"; then
|
||||
if test "$PHP_MBSTRING" != "no"; then
|
||||
AC_DEFINE(HAVE_MBSTRING,1,[whether to have multibyte string support])
|
||||
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
|
||||
|
||||
if test "$PHP_MBSTRING" != "no" -o "$PHP_MBSTRING" = "ja"; then
|
||||
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "cn"; then
|
||||
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "tw"; then
|
||||
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "kr"; then
|
||||
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "all"; then
|
||||
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
|
||||
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
|
||||
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
|
||||
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
|
||||
fi
|
||||
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
|
||||
else
|
||||
PHP_MBSTR_ENC_TRANS=no
|
||||
fi
|
||||
|
@ -81,12 +81,30 @@
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "mbfilter.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
#include "mbfilter_ja.h"
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
#include "mbfilter_cn.h"
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
#include "mbfilter_tw.h"
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
#include "mbfilter_kr.h"
|
||||
#endif
|
||||
|
||||
#include "zend.h"
|
||||
|
||||
#ifdef PHP_WIN32
|
||||
@ -137,9 +155,20 @@ static mbfl_language mbfl_language_english = {
|
||||
mbfl_no_encoding_8bit
|
||||
};
|
||||
|
||||
static mbfl_language mbfl_language_chinese = {
|
||||
mbfl_no_language_chinese,
|
||||
"Chinese",
|
||||
"zh",
|
||||
NULL,
|
||||
mbfl_no_encoding_2022jp,
|
||||
mbfl_no_encoding_base64,
|
||||
mbfl_no_encoding_7bit
|
||||
};
|
||||
|
||||
static mbfl_language *mbfl_language_ptr_table[] = {
|
||||
&mbfl_language_uni,
|
||||
&mbfl_language_japanese,
|
||||
&mbfl_language_chinese,
|
||||
&mbfl_language_english,
|
||||
NULL
|
||||
};
|
||||
@ -204,6 +233,121 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
|
||||
};
|
||||
|
||||
|
||||
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
|
||||
static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
/* encoding structure */
|
||||
static const char *mbfl_encoding_pass_aliases[] = {"none", NULL};
|
||||
|
||||
@ -475,6 +619,7 @@ static mbfl_encoding mbfl_encoding_ascii = {
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_euc_jp = {
|
||||
@ -536,6 +681,83 @@ static mbfl_encoding mbfl_encoding_2022jp = {
|
||||
NULL,
|
||||
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
|
||||
};
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static const char *mbfl_encoding_euc_cn_aliases[] = {"EUC_CN", "eucCN", "x-euc-cn", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_euc_cn = {
|
||||
mbfl_no_encoding_euc_cn,
|
||||
"EUC-CN",
|
||||
"EUC-CN",
|
||||
(const char *(*)[])&mbfl_encoding_euc_cn_aliases,
|
||||
mblen_table_euccn,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_cp936 = {
|
||||
mbfl_no_encoding_cp936,
|
||||
"CP936",
|
||||
"CP936",
|
||||
(const char *(*)[])&mbfl_encoding_cp936_aliases,
|
||||
mblen_table_cp936,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_euc_tw = {
|
||||
mbfl_no_encoding_euc_tw,
|
||||
"EUC-TW",
|
||||
"EUC-TW",
|
||||
(const char *(*)[])&mbfl_encoding_euc_tw_aliases,
|
||||
mblen_table_euctw,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_big5_aliases[] = {"big5", "CP950", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_big5 = {
|
||||
mbfl_no_encoding_big5,
|
||||
"BIG-5",
|
||||
"BIG-5",
|
||||
(const char *(*)[])&mbfl_encoding_big5_aliases,
|
||||
mblen_table_big5,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_euc_kr = {
|
||||
mbfl_no_encoding_euc_kr,
|
||||
"EUC-KR",
|
||||
"EUC-KR",
|
||||
(const char *(*)[])&mbfl_encoding_euc_kr_aliases,
|
||||
mblen_table_euckr,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_uhc = {
|
||||
mbfl_no_encoding_uhc,
|
||||
"UHC",
|
||||
"UHC",
|
||||
(const char *(*)[])&mbfl_encoding_uhc_aliases,
|
||||
mblen_table_uhc,
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
|
||||
|
||||
@ -720,12 +942,14 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
|
||||
&mbfl_encoding_utf7,
|
||||
&mbfl_encoding_utf7imap,
|
||||
&mbfl_encoding_ascii,
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
&mbfl_encoding_euc_jp,
|
||||
&mbfl_encoding_sjis,
|
||||
&mbfl_encoding_eucjp_win,
|
||||
&mbfl_encoding_sjis_win,
|
||||
&mbfl_encoding_jis,
|
||||
&mbfl_encoding_2022jp,
|
||||
#endif
|
||||
&mbfl_encoding_cp1252,
|
||||
&mbfl_encoding_8859_1,
|
||||
&mbfl_encoding_8859_2,
|
||||
@ -740,6 +964,18 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
|
||||
&mbfl_encoding_8859_13,
|
||||
&mbfl_encoding_8859_14,
|
||||
&mbfl_encoding_8859_15,
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
&mbfl_encoding_euc_cn,
|
||||
&mbfl_encoding_cp936,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
&mbfl_encoding_euc_tw,
|
||||
&mbfl_encoding_big5,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
&mbfl_encoding_euc_kr,
|
||||
&mbfl_encoding_uhc,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -825,12 +1061,30 @@ static void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
|
||||
@ -1221,6 +1475,7 @@ static struct mbfl_convert_vtbl vtbl_wchar_ascii = {
|
||||
mbfl_filt_conv_wchar_ascii,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
|
||||
mbfl_no_encoding_euc_jp,
|
||||
mbfl_no_encoding_wchar,
|
||||
@ -1316,6 +1571,109 @@ static struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_sjiswin,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static struct mbfl_convert_vtbl vtbl_euccn_wchar = {
|
||||
mbfl_no_encoding_euc_cn,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_euccn_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_euccn = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_euc_cn,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_euccn,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_cp936_wchar = {
|
||||
mbfl_no_encoding_cp936,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_cp936_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_cp936,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_cp936,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
static struct mbfl_convert_vtbl vtbl_euctw_wchar = {
|
||||
mbfl_no_encoding_euc_tw,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_euctw_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_euctw = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_euc_tw,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_euctw,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_big5_wchar = {
|
||||
mbfl_no_encoding_big5,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_big5_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_big5 = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_big5,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_big5,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static struct mbfl_convert_vtbl vtbl_euckr_wchar = {
|
||||
mbfl_no_encoding_euc_kr,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_euckr_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_euckr = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_euc_kr,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_euckr,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_uhc_wchar = {
|
||||
mbfl_no_encoding_uhc,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_uhc_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_uhc = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_uhc,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_uhc,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
|
||||
mbfl_no_encoding_cp1252,
|
||||
@ -1546,6 +1904,7 @@ static struct mbfl_convert_vtbl vtbl_wchar_8859_15 = {
|
||||
static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
|
||||
&vtbl_utf8_wchar,
|
||||
&vtbl_wchar_utf8,
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
&vtbl_eucjp_wchar,
|
||||
&vtbl_wchar_eucjp,
|
||||
&vtbl_sjis_wchar,
|
||||
@ -1558,6 +1917,25 @@ static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
|
||||
&vtbl_wchar_eucjpwin,
|
||||
&vtbl_sjiswin_wchar,
|
||||
&vtbl_wchar_sjiswin,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
&vtbl_euccn_wchar,
|
||||
&vtbl_wchar_euccn,
|
||||
&vtbl_cp936_wchar,
|
||||
&vtbl_wchar_cp936,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
&vtbl_euctw_wchar,
|
||||
&vtbl_wchar_euctw,
|
||||
&vtbl_big5_wchar,
|
||||
&vtbl_wchar_big5,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
&vtbl_euckr_wchar,
|
||||
&vtbl_wchar_euckr,
|
||||
&vtbl_uhc_wchar,
|
||||
&vtbl_wchar_uhc,
|
||||
#endif
|
||||
&vtbl_cp1252_wchar,
|
||||
&vtbl_wchar_cp1252,
|
||||
&vtbl_ascii_wchar,
|
||||
@ -1655,6 +2033,7 @@ static struct mbfl_identify_vtbl vtbl_identify_utf7 = {
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_utf7 };
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static struct mbfl_identify_vtbl vtbl_identify_eucjp = {
|
||||
mbfl_no_encoding_euc_jp,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
@ -1690,6 +2069,49 @@ static struct mbfl_identify_vtbl vtbl_identify_2022jp = {
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_2022jp };
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static struct mbfl_identify_vtbl vtbl_identify_euccn = {
|
||||
mbfl_no_encoding_euc_cn,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_euccn };
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_cp936 = {
|
||||
mbfl_no_encoding_cp936,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_cp936 };
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
static struct mbfl_identify_vtbl vtbl_identify_euctw = {
|
||||
mbfl_no_encoding_euc_tw,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_euctw };
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_big5 = {
|
||||
mbfl_no_encoding_big5,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_big5 };
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static struct mbfl_identify_vtbl vtbl_identify_euckr = {
|
||||
mbfl_no_encoding_euc_kr,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_euckr };
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_uhc = {
|
||||
mbfl_no_encoding_uhc,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_uhc };
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
|
||||
mbfl_no_encoding_cp1252,
|
||||
@ -1785,12 +2207,26 @@ static struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
|
||||
&vtbl_identify_utf8,
|
||||
&vtbl_identify_utf7,
|
||||
&vtbl_identify_ascii,
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
&vtbl_identify_eucjp,
|
||||
&vtbl_identify_sjis,
|
||||
&vtbl_identify_eucjpwin,
|
||||
&vtbl_identify_sjiswin,
|
||||
&vtbl_identify_jis,
|
||||
&vtbl_identify_2022jp,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
&vtbl_identify_euccn,
|
||||
&vtbl_identify_cp936,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
&vtbl_identify_euctw,
|
||||
&vtbl_identify_big5,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
&vtbl_identify_euckr,
|
||||
&vtbl_identify_uhc,
|
||||
#endif
|
||||
&vtbl_identify_cp1252,
|
||||
&vtbl_identify_8859_1,
|
||||
&vtbl_identify_8859_2,
|
||||
@ -5075,6 +5511,7 @@ mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
return c;
|
||||
}
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static int
|
||||
mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
@ -5268,6 +5705,207 @@ retry:
|
||||
|
||||
return c;
|
||||
}
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
static int
|
||||
mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
switch (filter->status) {
|
||||
case 0: /* latin */
|
||||
if (c >= 0 && c < 0x80) { /* ok */
|
||||
;
|
||||
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
|
||||
filter->status = 1;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* got lead byte */
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
if (filter->status) { /* kanji second char */
|
||||
if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
} else if (c >= 0 && c < 0x80) { /* latin ok */
|
||||
;
|
||||
} else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */
|
||||
filter->status = 1;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
static int
|
||||
mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
switch (filter->status) {
|
||||
case 0: /* latin */
|
||||
if (c >= 0 && c < 0x80) { /* ok */
|
||||
;
|
||||
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
|
||||
filter->status = 1;
|
||||
} else if (c == 0x8e) { /* DBCS lead byte */
|
||||
filter->status = 2;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* got lead byte */
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
case 2: /* got lead byte */
|
||||
if (c >= 0xa1 && c < 0xaf) { /* ok */
|
||||
filter->status = 3;
|
||||
} else {
|
||||
filter->flag = 1; /* bad */
|
||||
}
|
||||
break;
|
||||
|
||||
case 3: /* got lead byte */
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 4;
|
||||
break;
|
||||
|
||||
case 4: /* got lead byte */
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
if (filter->status) { /* kanji second char */
|
||||
if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
} else if (c >= 0 && c < 0x80) { /* latin ok */
|
||||
;
|
||||
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
|
||||
filter->status = 1;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static int
|
||||
mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
switch (filter->status) {
|
||||
case 0: /* latin */
|
||||
if (c >= 0 && c < 0x80) { /* ok */
|
||||
;
|
||||
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
|
||||
filter->status = 1;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* got lead byte */
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
switch (filter->status) {
|
||||
case 0: /* latin */
|
||||
if (c >= 0 && c < 0x80) { /* ok */
|
||||
;
|
||||
} else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */
|
||||
filter->status= 1;
|
||||
} else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */
|
||||
filter->status= 2;
|
||||
} else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */
|
||||
filter->status= 3;
|
||||
} else { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
|
||||
case 1:
|
||||
case 2:
|
||||
if (c < 0x41 || (c > 0x5a && c < 0x61)
|
||||
|| (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
if (c < 0xa1 || c > 0xfe) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
filter->status = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
|
||||
/* We only distinguish the MS extensions to ISO-8859-1.
|
||||
* Actually, this is pretty much a NO-OP, since the identification
|
||||
@ -8232,3 +8870,10 @@ mbfl_html_numeric_entity(
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
||||
|
@ -109,7 +109,7 @@ enum mbfl_no_language {
|
||||
mbfl_no_language_korean, /* ko */
|
||||
mbfl_no_language_dutch, /* nl */
|
||||
mbfl_no_language_polish, /* pl */
|
||||
mbfl_no_language_portuguese, /* pt */
|
||||
mbfl_no_language_portuguese, /* pt */
|
||||
mbfl_no_language_swedish, /* sv */
|
||||
mbfl_no_language_chinese, /* zh */
|
||||
mbfl_no_language_max
|
||||
@ -167,6 +167,12 @@ enum mbfl_no_encoding {
|
||||
mbfl_no_encoding_8859_13,
|
||||
mbfl_no_encoding_8859_14,
|
||||
mbfl_no_encoding_8859_15,
|
||||
mbfl_no_encoding_euc_cn,
|
||||
mbfl_no_encoding_cp936,
|
||||
mbfl_no_encoding_euc_tw,
|
||||
mbfl_no_encoding_big5,
|
||||
mbfl_no_encoding_euc_kr,
|
||||
mbfl_no_encoding_uhc,
|
||||
mbfl_no_encoding_charset_max
|
||||
};
|
||||
|
||||
@ -210,7 +216,7 @@ typedef struct _mbfl_encoding {
|
||||
#define MBFL_ENCTYPE_MWC4LE 0x00000800
|
||||
#define MBFL_ENCTYPE_SHFTCODE 0x00001000
|
||||
|
||||
/* wchar plane, spesial charactor */
|
||||
/* wchar plane, special charactor */
|
||||
#define MBFL_WCSPLANE_MASK 0xffff
|
||||
#define MBFL_WCSPLANE_UCS2MAX 0x00010000
|
||||
#define MBFL_WCSPLANE_SUPMIN 0x00010000
|
||||
@ -233,7 +239,11 @@ typedef struct _mbfl_encoding {
|
||||
#define MBFL_WCSPLANE_8859_15 0x70f00000 /* 00h - FFh */
|
||||
#define MBFL_WCSPLANE_KSC5601 0x70f10000 /* 2121h - 7E7Eh */
|
||||
#define MBFL_WCSPLANE_GB2312 0x70f20000 /* 2121h - 7E7Eh */
|
||||
#define MBFL_WCSGROUP_MASK 0xffffff
|
||||
#define MBFL_WCSPLANE_WINCP936 0x70f30000 /* 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */
|
||||
#define MBFL_WCSGROUP_MASK 0xffffff
|
||||
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
|
||||
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
|
||||
#define MBFL_WCSGROUP_THROUGH 0x78000000 /* 000000h - FFFFFFh */
|
||||
|
263
ext/mbstring/mbfilter_cn.c
Normal file
263
ext/mbstring/mbfilter_cn.c
Normal file
@ -0,0 +1,263 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/*
|
||||
* "streamable simplified chinese code filter and converter"
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_cn.h"
|
||||
|
||||
#include "unicode_table_cn.h"
|
||||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
|
||||
/*
|
||||
* EUC-CN => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s1, s2, w;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* dbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
|
||||
w = (c1 - 0x81)*192 + (c - 0x40);
|
||||
if (w >= 0 && w < cp936_ucs_table_size) {
|
||||
w = cp936_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_GB2312;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => EUC-CN
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, c2, s;
|
||||
|
||||
s = 0;
|
||||
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
|
||||
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
|
||||
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
|
||||
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
|
||||
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
|
||||
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
|
||||
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
|
||||
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
|
||||
} else if (c >= ucs_r_cp936_table_min && c < ucs_r_cp936_table_max) {
|
||||
s = ucs_r_cp936_table[c - ucs_r_cp936_table_min];
|
||||
}
|
||||
c1 = (s >> 8) & 0xff;
|
||||
c2 = s & 0xff;
|
||||
|
||||
if (c1 < 0xa1 || c2 < 0xa1) { /* exclude CP932 extension */
|
||||
s = 0;
|
||||
}
|
||||
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_GB2312) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* CP936 => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s1, s2, w;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c == 0x80) { /* euro sign */
|
||||
CK((*filter->output_function)(0x20ac, filter->data TSRMLS_CC));
|
||||
} else if (c > 0x80 && c < 0xff) { /* dbcs lead byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* dbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if ( c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
|
||||
w = (c1 - 0x81)*192 + (c - 0x40);
|
||||
if (w >= 0 && w < cp936_ucs_table_size) {
|
||||
w = cp936_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_WINCP936;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => CP936
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, c2, s;
|
||||
|
||||
s = 0;
|
||||
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
|
||||
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
|
||||
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
|
||||
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
|
||||
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
|
||||
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
|
||||
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
|
||||
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
|
||||
} else if (c >= ucs_r_cp936_table_min && c < ucs_r_cp936_table_max) {
|
||||
s = ucs_r_cp936_table[c - ucs_r_cp936_table_min];
|
||||
}
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_WINCP936) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_CN */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
30
ext/mbstring/mbfilter_cn.h
Normal file
30
ext/mbstring/mbfilter_cn.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef MBFL_MBFILTER_CN_H
|
||||
#define MBFL_MBFILTER_CN_H
|
||||
|
||||
int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
|
||||
#endif /* MBFL_MBFILTER_CN_H */
|
@ -79,9 +79,15 @@
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_ja.h"
|
||||
|
||||
@ -1274,3 +1280,12 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter TSRMLS_DC)
|
||||
filter->status &= 0xff;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_JA */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
||||
|
324
ext/mbstring/mbfilter_kr.c
Normal file
324
ext/mbstring/mbfilter_kr.c
Normal file
@ -0,0 +1,324 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/*
|
||||
* "streamable korean code filter and converter"
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_cn.h"
|
||||
|
||||
#include "unicode_table_kr.h"
|
||||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
|
||||
/*
|
||||
* EUC-KR => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s1, s2, w, flag;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c > 0xa0 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* dbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
flag = 0;
|
||||
if (c1 >= 0xa1 && c1 <= 0xc6) {
|
||||
flag = 1;
|
||||
} else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
|
||||
flag = 2;
|
||||
}
|
||||
if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
|
||||
if (flag == 1){
|
||||
w = (c1 - 0xa1)*178 + (c - 0xa1) + 0x54;
|
||||
if (w >= 0 && w < uhc2_ucs_table_size) {
|
||||
w = uhc2_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
} else {
|
||||
if (c1 < 0xc9){
|
||||
w = (c1 - 0xc7)*94 + c - 0xa1;
|
||||
} else {
|
||||
w = (c1 - 0xc8)*94 + c - 0xa1;
|
||||
}
|
||||
if (w >= 0 && w < uhc3_ucs_table_size) {
|
||||
w = uhc3_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_KSC5601;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => EUC-KR
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, c2, s;
|
||||
|
||||
s = 0;
|
||||
|
||||
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
|
||||
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
|
||||
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
|
||||
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
|
||||
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
|
||||
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
|
||||
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
|
||||
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
|
||||
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
|
||||
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
|
||||
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
|
||||
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
|
||||
}
|
||||
|
||||
c1 = (s >> 8) & 0xff;
|
||||
c2 = s & 0xff;
|
||||
/* exclude UHC extension area */
|
||||
if (c1 < 0xa1 || c1 > 0xfe || c2 < 0xa1 && c2 > 0xfe){
|
||||
s = 0;
|
||||
}
|
||||
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_KSC5601) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* UHC => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s1, s2, w, flag;
|
||||
const short ofst1[] = { 0x41, 0x61, 0x81, 0xa1};
|
||||
const short ofst2[] = { 0x0, 0x1a, 0x34, 0x54};
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c > 0x80 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* dbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
|
||||
flag = 0;
|
||||
if ( c >= 0x41 && c <= 0x5a){
|
||||
flag = 1;
|
||||
} else if (c >= 0x61 && c <= 0x7a){
|
||||
flag = 2;
|
||||
} else if (c >= 0x81 && c <= 0xa0){
|
||||
flag = 3;
|
||||
} else if (c >= 0xa1 && c <= 0xfe){
|
||||
flag = 4;
|
||||
}
|
||||
if ( c1 >= 0x81 && c1 <= 0xa0 && flag > 0){
|
||||
w = (c1 - 0x81)*178 + (c - ofst1[flag-1] + ofst2[flag-1]);
|
||||
if (w >= 0 && w < uhc1_ucs_table_size) {
|
||||
w = uhc1_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
} else if ( c1 >= 0xa1 && c1 <= 0xc6 && flag > 0){
|
||||
w = (c1 - 0xa1)*178 + (c - ofst1[flag-1] + ofst2[flag-1]);
|
||||
if (w >= 0 && w < uhc2_ucs_table_size) {
|
||||
w = uhc2_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
} else if ( c1 >= 0xc7 && c1 <= 0xfe && flag == 4){
|
||||
if (c1 < 0xc9){
|
||||
w = (c1 - 0xc7)*94 + (c - ofst1[flag-1]);
|
||||
} else {
|
||||
w = (c1 - 0xc8)*94 + (c - ofst1[flag-1]);
|
||||
}
|
||||
if (w >= 0 && w < uhc3_ucs_table_size) {
|
||||
w = uhc3_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
}
|
||||
if (flag > 0){
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_UHC;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => UHC
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, c2, s;
|
||||
|
||||
s = 0;
|
||||
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
|
||||
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
|
||||
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
|
||||
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
|
||||
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
|
||||
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
|
||||
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
|
||||
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
|
||||
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
|
||||
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
|
||||
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
|
||||
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
|
||||
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
|
||||
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
|
||||
}
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_UHC) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
30
ext/mbstring/mbfilter_kr.h
Normal file
30
ext/mbstring/mbfilter_kr.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef MBFL_MBFILTER_KR_H
|
||||
#define MBFL_MBFILTER_KR_H
|
||||
|
||||
int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
|
||||
#endif /* MBFL_MBFILTER_KR_H */
|
336
ext/mbstring/mbfilter_tw.c
Normal file
336
ext/mbstring/mbfilter_tw.c
Normal file
@ -0,0 +1,336 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/*
|
||||
* "streamable traditional chinese code filter and converter"
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_tw.h"
|
||||
|
||||
#include "unicode_table_tw.h"
|
||||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
/*
|
||||
* EUC-TW => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s, w, plane;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c > 0xa0 && c < 0xff) { /* dbcs first byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else if (c == 0x8e) { /* mbcs first byte */
|
||||
filter->status = 2;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* mbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if (c > 0xa0 && c < 0xff) {
|
||||
w = (c1 - 0xa1)*94 + (c - 0xa1);
|
||||
if (w >= 0 && w < cns11643_1_ucs_table_size) {
|
||||
w = cns11643_1_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_CNS11643;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: /* got 0x8e, first char */
|
||||
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
filter->status = 0;
|
||||
} else if (c > 0xa0 && c < 0xaf) {
|
||||
filter->status = 3;
|
||||
filter->cache = c - 0xa1;
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 3: /* got 0x8e, third char */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
filter->status = 0;
|
||||
} else if (c > 0xa0 && c < 0xff) {
|
||||
filter->status = 4;
|
||||
filter->cache = (c1 << 8) + c - 0xa1;
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 4: /* mbcs fourth char */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff) {
|
||||
plane = (c1 & 0xf00) >> 8;
|
||||
s = (c1 & 0xff)*94 + c - 0xa1;
|
||||
w = 0;
|
||||
if (s >= 0) {
|
||||
if (plane == 1 & s < cns11643_2_ucs_table_size) {
|
||||
w = cns11643_2_ucs_table[s];
|
||||
}
|
||||
if (plane == 13 & s < cns11643_14_ucs_table_size) {
|
||||
w = cns11643_14_ucs_table[s];
|
||||
}
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_CNS11643;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c | 0x8e0000;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => EUC-TW
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c0, c1, c2, s, plane;
|
||||
|
||||
s = 0;
|
||||
if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
|
||||
s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
|
||||
} else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
|
||||
s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
|
||||
} else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
|
||||
s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
|
||||
} else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
|
||||
s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
|
||||
} else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
|
||||
s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
|
||||
}
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_CNS11643) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
plane = (s & 0x1f0000) >> 16;
|
||||
if (plane <= 1){
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
s = (s & 0xffff) | 0x8080;
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
s = (0x8ea00000 + (plane << 16)) | ((s & 0xffff) | 0x8080);
|
||||
CK((*filter->output_function)(0x8e , filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)((s >> 16) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Big5 => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, s1, s2, w;
|
||||
|
||||
switch (filter->status) {
|
||||
case 0:
|
||||
if (c >= 0 && c < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
|
||||
filter->status = 1;
|
||||
filter->cache = c;
|
||||
} else {
|
||||
w = c & MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
case 1: /* dbcs second byte */
|
||||
filter->status = 0;
|
||||
c1 = filter->cache;
|
||||
if ((c > 0x39 && c < 0x7f) | (c > 0xa0 && c < 0xff)) {
|
||||
if (c < 0x7f){
|
||||
w = (c1 - 0xa1)*157 + (c - 0x40);
|
||||
} else {
|
||||
w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
|
||||
}
|
||||
if (w >= 0 && w < big5_ucs_table_size) {
|
||||
w = big5_ucs_table[w];
|
||||
} else {
|
||||
w = 0;
|
||||
}
|
||||
if (w <= 0) {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSPLANE_MASK;
|
||||
w |= MBFL_WCSPLANE_BIG5;
|
||||
}
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
|
||||
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
w = (c1 << 8) | c;
|
||||
w &= MBFL_WCSGROUP_MASK;
|
||||
w |= MBFL_WCSGROUP_THROUGH;
|
||||
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => Big5
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int c1, c2, s;
|
||||
|
||||
s = 0;
|
||||
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
|
||||
s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
|
||||
} else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
|
||||
s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
|
||||
} else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
|
||||
s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
|
||||
} else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
|
||||
s = ucs_i_big5_table[c - ucs_i_big5_table_min];
|
||||
} else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max) {
|
||||
s = ucs_pua_big5_table[c - ucs_pua_big5_table_min];
|
||||
} else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
|
||||
s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
|
||||
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
|
||||
s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
|
||||
}
|
||||
if (s <= 0) {
|
||||
c1 = c & ~MBFL_WCSPLANE_MASK;
|
||||
if (c1 == MBFL_WCSPLANE_BIG5) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
if (c == 0) {
|
||||
s = 0;
|
||||
} else if (s <= 0) {
|
||||
s = -1;
|
||||
}
|
||||
}
|
||||
if (s >= 0) {
|
||||
if (s < 0x80) { /* latin */
|
||||
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
|
||||
} else {
|
||||
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
|
||||
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
|
||||
}
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
*/
|
30
ext/mbstring/mbfilter_tw.h
Normal file
30
ext/mbstring/mbfilter_tw.h
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2001 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 2.02 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available at through the world-wide-web at |
|
||||
| http://www.php.net/license/2_02.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: Rui Hirokawa <hirokawa@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef MBFL_MBFILTER_TW_H
|
||||
#define MBFL_MBFILTER_TW_H
|
||||
|
||||
int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
|
||||
#endif /* MBFL_MBFILTER_TW_H */
|
@ -71,6 +71,7 @@
|
||||
#include "mbregex.h"
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_jis,
|
||||
@ -78,6 +79,35 @@ static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_euc_jp,
|
||||
mbfl_no_encoding_sjis
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(HAVE_MBSTR_CN) & !defined(HAVE_MBSTR_JA)
|
||||
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_utf8,
|
||||
mbfl_no_encoding_euc_cn,
|
||||
mbfl_no_encoding_cp936
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_MBSTR_TW) & !defined(HAVE_MBSTR_JA)
|
||||
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_utf8,
|
||||
mbfl_no_encoding_euc_tw,
|
||||
mbfl_no_encoding_big5
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_MBSTR_KR) & !defined(HAVE_MBSTR_JA)
|
||||
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_utf8,
|
||||
mbfl_no_encoding_euc_kr,
|
||||
mbfl_no_encoding_uhc
|
||||
};
|
||||
#endif
|
||||
|
||||
static const int php_mbstr_default_identify_list_size = sizeof(php_mbstr_default_identify_list)/sizeof(enum mbfl_no_encoding);
|
||||
|
||||
@ -525,10 +555,30 @@ PHP_INI_END()
|
||||
static void
|
||||
php_mbstring_init_globals(zend_mbstring_globals *pglobals TSRMLS_DC)
|
||||
{
|
||||
#if defined(HAVE_MBSTR_CN) & !defined(HAVE_MBSTR_JA)
|
||||
MBSTRG(language) = mbfl_no_language_chinese;
|
||||
MBSTRG(current_language) = mbfl_no_language_chinese;
|
||||
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_cn;
|
||||
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_cn;
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW) & !defined(HAVE_MBSTR_JA)
|
||||
MBSTRG(language) = mbfl_no_language_chinese;
|
||||
MBSTRG(current_language) = mbfl_no_language_chinese;
|
||||
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_tw;
|
||||
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_tw;
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR) & !defined(HAVE_MBSTR_JA)
|
||||
MBSTRG(language) = mbfl_no_language_chinese;
|
||||
MBSTRG(current_language) = mbfl_no_language_korean;
|
||||
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_kr;
|
||||
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_kr;
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
MBSTRG(language) = mbfl_no_language_japanese;
|
||||
MBSTRG(current_language) = mbfl_no_language_japanese;
|
||||
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_jp;
|
||||
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_jp;
|
||||
#endif
|
||||
MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
|
||||
MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
|
||||
MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
|
||||
@ -724,12 +774,24 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
|
||||
PHP_MINFO_FUNCTION(mbstring)
|
||||
{
|
||||
php_info_print_table_start();
|
||||
php_info_print_table_header(2, "Multibyte (Japanese) Support", "enabled");
|
||||
php_info_print_table_header(2, "Multibyte Support", "enabled");
|
||||
#if defined(HAVE_MBSTR_JA)
|
||||
php_info_print_table_row(2, "japanese support", "enabled");
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_CN)
|
||||
php_info_print_table_row(2, "simplified chinese support", "enabled");
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_TW)
|
||||
php_info_print_table_row(2, "traditional chinese support", "enabled");
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
php_info_print_table_row(2, "korean support", "enabled");
|
||||
#endif
|
||||
#if defined(MBSTR_ENC_TRANS)
|
||||
php_info_print_table_row(2, "http input encoding translation", "enabled");
|
||||
#endif
|
||||
#if defined(HAVE_MBREGEX)
|
||||
php_info_print_table_row(2, "multibyte regex support", "enabled");
|
||||
php_info_print_table_row(2, "multibyte (japanese) regex support", "enabled");
|
||||
#endif
|
||||
php_info_print_table_end();
|
||||
|
||||
|
6414
ext/mbstring/unicode_table_cn.h
Normal file
6414
ext/mbstring/unicode_table_cn.h
Normal file
File diff suppressed because it is too large
Load Diff
6934
ext/mbstring/unicode_table_kr.h
Normal file
6934
ext/mbstring/unicode_table_kr.h
Normal file
File diff suppressed because it is too large
Load Diff
11215
ext/mbstring/unicode_table_tw.h
Normal file
11215
ext/mbstring/unicode_table_tw.h
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user