added simplified chinese, traditional chinese, korean support to mbstring. Note that this feature is experimental.

This commit is contained in:
Rui Hirokawa 2002-04-30 12:00:42 +00:00
parent 18805e8323
commit 5df01432c7
14 changed files with 26336 additions and 9 deletions

View File

@ -2,7 +2,7 @@ dnl
dnl $Id$
dnl
PHP_ARG_ENABLE(mbstring, whether to enable multibyte string support,
PHP_ARG_WITH(mbstring, whether to enable multibyte string support,
[ --disable-mbstring Disable multibyte string support], yes)
PHP_ARG_ENABLE(mbstr_enc_trans, whether to enable encoding translation,
@ -11,9 +11,28 @@ PHP_ARG_ENABLE(mbstr_enc_trans, whether to enable encoding translation,
PHP_ARG_ENABLE(mbregex, whether to enable multibyte regex support,
[ --enable-mbregex Enable multibyte regex support], yes)
if test "$PHP_MBSTRING" != "no"; then
if test "$PHP_MBSTRING" != "no"; then
AC_DEFINE(HAVE_MBSTRING,1,[whether to have multibyte string support])
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
if test "$PHP_MBSTRING" != "no" -o "$PHP_MBSTRING" = "ja"; then
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
fi
if test "$PHP_MBSTRING" = "cn"; then
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
fi
if test "$PHP_MBSTRING" = "tw"; then
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
fi
if test "$PHP_MBSTRING" = "kr"; then
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
fi
if test "$PHP_MBSTRING" = "all"; then
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
fi
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
else
PHP_MBSTR_ENC_TRANS=no
fi

View File

@ -81,12 +81,30 @@
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#include <stdlib.h>
#include "mbfilter.h"
#if defined(HAVE_MBSTR_JA)
#include "mbfilter_ja.h"
#endif
#if defined(HAVE_MBSTR_CN)
#include "mbfilter_cn.h"
#endif
#if defined(HAVE_MBSTR_TW)
#include "mbfilter_tw.h"
#endif
#if defined(HAVE_MBSTR_KR)
#include "mbfilter_kr.h"
#endif
#include "zend.h"
#ifdef PHP_WIN32
@ -137,9 +155,20 @@ static mbfl_language mbfl_language_english = {
mbfl_no_encoding_8bit
};
static mbfl_language mbfl_language_chinese = {
mbfl_no_language_chinese,
"Chinese",
"zh",
NULL,
mbfl_no_encoding_2022jp,
mbfl_no_encoding_base64,
mbfl_no_encoding_7bit
};
static mbfl_language *mbfl_language_ptr_table[] = {
&mbfl_language_uni,
&mbfl_language_japanese,
&mbfl_language_chinese,
&mbfl_language_english,
NULL
};
@ -204,6 +233,121 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
};
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
/* encoding structure */
static const char *mbfl_encoding_pass_aliases[] = {"none", NULL};
@ -475,6 +619,7 @@ static mbfl_encoding mbfl_encoding_ascii = {
MBFL_ENCTYPE_SBCS
};
#if defined(HAVE_MBSTR_JA)
static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL};
static mbfl_encoding mbfl_encoding_euc_jp = {
@ -536,6 +681,83 @@ static mbfl_encoding mbfl_encoding_2022jp = {
NULL,
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
static const char *mbfl_encoding_euc_cn_aliases[] = {"EUC_CN", "eucCN", "x-euc-cn", NULL};
static mbfl_encoding mbfl_encoding_euc_cn = {
mbfl_no_encoding_euc_cn,
"EUC-CN",
"EUC-CN",
(const char *(*)[])&mbfl_encoding_euc_cn_aliases,
mblen_table_euccn,
MBFL_ENCTYPE_MBCS
};
static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", NULL};
static mbfl_encoding mbfl_encoding_cp936 = {
mbfl_no_encoding_cp936,
"CP936",
"CP936",
(const char *(*)[])&mbfl_encoding_cp936_aliases,
mblen_table_cp936,
MBFL_ENCTYPE_MBCS
};
#endif /* HAVE_MBSTR_CN */
#if defined(HAVE_MBSTR_TW)
static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL};
static mbfl_encoding mbfl_encoding_euc_tw = {
mbfl_no_encoding_euc_tw,
"EUC-TW",
"EUC-TW",
(const char *(*)[])&mbfl_encoding_euc_tw_aliases,
mblen_table_euctw,
MBFL_ENCTYPE_MBCS
};
static const char *mbfl_encoding_big5_aliases[] = {"big5", "CP950", NULL};
static mbfl_encoding mbfl_encoding_big5 = {
mbfl_no_encoding_big5,
"BIG-5",
"BIG-5",
(const char *(*)[])&mbfl_encoding_big5_aliases,
mblen_table_big5,
MBFL_ENCTYPE_MBCS
};
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL};
static mbfl_encoding mbfl_encoding_euc_kr = {
mbfl_no_encoding_euc_kr,
"EUC-KR",
"EUC-KR",
(const char *(*)[])&mbfl_encoding_euc_kr_aliases,
mblen_table_euckr,
MBFL_ENCTYPE_MBCS
};
static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL};
static mbfl_encoding mbfl_encoding_uhc = {
mbfl_no_encoding_uhc,
"UHC",
"UHC",
(const char *(*)[])&mbfl_encoding_uhc_aliases,
mblen_table_uhc,
MBFL_ENCTYPE_MBCS
};
#endif /* HAVE_MBSTR_KR */
static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
@ -720,12 +942,14 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_utf7,
&mbfl_encoding_utf7imap,
&mbfl_encoding_ascii,
#if defined(HAVE_MBSTR_JA)
&mbfl_encoding_euc_jp,
&mbfl_encoding_sjis,
&mbfl_encoding_eucjp_win,
&mbfl_encoding_sjis_win,
&mbfl_encoding_jis,
&mbfl_encoding_2022jp,
#endif
&mbfl_encoding_cp1252,
&mbfl_encoding_8859_1,
&mbfl_encoding_8859_2,
@ -740,6 +964,18 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_8859_13,
&mbfl_encoding_8859_14,
&mbfl_encoding_8859_15,
#if defined(HAVE_MBSTR_CN)
&mbfl_encoding_euc_cn,
&mbfl_encoding_cp936,
#endif
#if defined(HAVE_MBSTR_TW)
&mbfl_encoding_euc_tw,
&mbfl_encoding_big5,
#endif
#if defined(HAVE_MBSTR_KR)
&mbfl_encoding_euc_kr,
&mbfl_encoding_uhc,
#endif
NULL
};
@ -825,12 +1061,30 @@ static void mbfl_filt_ident_false_ctor(mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_utf8(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_ascii(int c, mbfl_identify_filter *filter TSRMLS_DC);
#if defined(HAVE_MBSTR_JA)
static int mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_sjis(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_sjiswin(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_jis(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter TSRMLS_DC);
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
static int mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter TSRMLS_DC);
#endif /* HAVE_MBSTR_CN */
#if defined(HAVE_MBSTR_TW)
static int mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC);
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC);
#endif /* HAVE_MBSTR_KR */
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter TSRMLS_DC);
static int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter TSRMLS_DC);
@ -1221,6 +1475,7 @@ static struct mbfl_convert_vtbl vtbl_wchar_ascii = {
mbfl_filt_conv_wchar_ascii,
mbfl_filt_conv_common_flush };
#if defined(HAVE_MBSTR_JA)
static struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_wchar,
@ -1316,6 +1571,109 @@ static struct mbfl_convert_vtbl vtbl_wchar_sjiswin = {
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_sjiswin,
mbfl_filt_conv_common_flush };
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
static struct mbfl_convert_vtbl vtbl_euccn_wchar = {
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euccn_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_euccn = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_cn,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euccn,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_cp936_wchar = {
mbfl_no_encoding_cp936,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_cp936_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_cp936 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_cp936,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_cp936,
mbfl_filt_conv_common_flush };
#endif /* HAVE_MBSTR_CN */
#if defined(HAVE_MBSTR_TW)
static struct mbfl_convert_vtbl vtbl_euctw_wchar = {
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euctw_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_euctw = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_tw,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euctw,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_big5_wchar = {
mbfl_no_encoding_big5,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_big5_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_big5 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_big5,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_big5,
mbfl_filt_conv_common_flush };
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
static struct mbfl_convert_vtbl vtbl_euckr_wchar = {
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_euckr_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_euckr = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_euc_kr,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_euckr,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_uhc_wchar = {
mbfl_no_encoding_uhc,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_uhc_wchar,
mbfl_filt_conv_common_flush };
static struct mbfl_convert_vtbl vtbl_wchar_uhc = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_uhc,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_uhc,
mbfl_filt_conv_common_flush };
#endif /* HAVE_MBSTR_KR */
static struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
mbfl_no_encoding_cp1252,
@ -1546,6 +1904,7 @@ static struct mbfl_convert_vtbl vtbl_wchar_8859_15 = {
static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_utf8_wchar,
&vtbl_wchar_utf8,
#if defined(HAVE_MBSTR_JA)
&vtbl_eucjp_wchar,
&vtbl_wchar_eucjp,
&vtbl_sjis_wchar,
@ -1558,6 +1917,25 @@ static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_eucjpwin,
&vtbl_sjiswin_wchar,
&vtbl_wchar_sjiswin,
#endif
#if defined(HAVE_MBSTR_CN)
&vtbl_euccn_wchar,
&vtbl_wchar_euccn,
&vtbl_cp936_wchar,
&vtbl_wchar_cp936,
#endif
#if defined(HAVE_MBSTR_TW)
&vtbl_euctw_wchar,
&vtbl_wchar_euctw,
&vtbl_big5_wchar,
&vtbl_wchar_big5,
#endif
#if defined(HAVE_MBSTR_KR)
&vtbl_euckr_wchar,
&vtbl_wchar_euckr,
&vtbl_uhc_wchar,
&vtbl_wchar_uhc,
#endif
&vtbl_cp1252_wchar,
&vtbl_wchar_cp1252,
&vtbl_ascii_wchar,
@ -1655,6 +2033,7 @@ static struct mbfl_identify_vtbl vtbl_identify_utf7 = {
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_utf7 };
#if defined(HAVE_MBSTR_JA)
static struct mbfl_identify_vtbl vtbl_identify_eucjp = {
mbfl_no_encoding_euc_jp,
mbfl_filt_ident_common_ctor,
@ -1690,6 +2069,49 @@ static struct mbfl_identify_vtbl vtbl_identify_2022jp = {
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_2022jp };
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
static struct mbfl_identify_vtbl vtbl_identify_euccn = {
mbfl_no_encoding_euc_cn,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euccn };
static struct mbfl_identify_vtbl vtbl_identify_cp936 = {
mbfl_no_encoding_cp936,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_cp936 };
#endif /* HAVE_MBSTR_CN */
#if defined(HAVE_MBSTR_TW)
static struct mbfl_identify_vtbl vtbl_identify_euctw = {
mbfl_no_encoding_euc_tw,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euctw };
static struct mbfl_identify_vtbl vtbl_identify_big5 = {
mbfl_no_encoding_big5,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_big5 };
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
static struct mbfl_identify_vtbl vtbl_identify_euckr = {
mbfl_no_encoding_euc_kr,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_euckr };
static struct mbfl_identify_vtbl vtbl_identify_uhc = {
mbfl_no_encoding_uhc,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_uhc };
#endif /* HAVE_MBSTR_KR */
static struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
mbfl_no_encoding_cp1252,
@ -1785,12 +2207,26 @@ static struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_utf8,
&vtbl_identify_utf7,
&vtbl_identify_ascii,
#if defined(HAVE_MBSTR_JA)
&vtbl_identify_eucjp,
&vtbl_identify_sjis,
&vtbl_identify_eucjpwin,
&vtbl_identify_sjiswin,
&vtbl_identify_jis,
&vtbl_identify_2022jp,
#endif
#if defined(HAVE_MBSTR_CN)
&vtbl_identify_euccn,
&vtbl_identify_cp936,
#endif
#if defined(HAVE_MBSTR_TW)
&vtbl_identify_euctw,
&vtbl_identify_big5,
#endif
#if defined(HAVE_MBSTR_KR)
&vtbl_identify_euckr,
&vtbl_identify_uhc,
#endif
&vtbl_identify_cp1252,
&vtbl_identify_8859_1,
&vtbl_identify_8859_2,
@ -5075,6 +5511,7 @@ mbfl_filt_ident_utf7(int c, mbfl_identify_filter *filter TSRMLS_DC)
return c;
}
#if defined(HAVE_MBSTR_JA)
static int
mbfl_filt_ident_eucjp(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
@ -5268,6 +5705,207 @@ retry:
return c;
}
#endif /* HAVE_MBSTR_JA */
#if defined(HAVE_MBSTR_CN)
static int
mbfl_filt_ident_euccn(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}
static int
mbfl_filt_ident_cp936(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || c > 0xfe || c == 0x7f) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0x80 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}
#endif /* HAVE_MBSTR_CN */
#if defined(HAVE_MBSTR_TW)
static int
mbfl_filt_ident_euctw(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else if (c == 0x8e) { /* DBCS lead byte */
filter->status = 2;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 2: /* got lead byte */
if (c >= 0xa1 && c < 0xaf) { /* ok */
filter->status = 3;
} else {
filter->flag = 1; /* bad */
}
break;
case 3: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 4;
break;
case 4: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}
static int
mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
if (filter->status) { /* kanji second char */
if (c < 0x40 || (c > 0x7e && c < 0xa1) ||c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
} else if (c >= 0 && c < 0x80) { /* latin ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
return c;
}
#endif /* HAVE_MBSTR_TW */
#if defined(HAVE_MBSTR_KR)
static int
mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c > 0xa0 && c < 0xff) { /* DBCS lead byte */
filter->status = 1;
} else { /* bad */
filter->flag = 1;
}
break;
case 1: /* got lead byte */
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}
static int
mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC)
{
switch (filter->status) {
case 0: /* latin */
if (c >= 0 && c < 0x80) { /* ok */
;
} else if (c >= 0x81 && c <= 0xa0) { /* dbcs first char */
filter->status= 1;
} else if (c >= 0xa1 && c <= 0xc6) { /* dbcs first char */
filter->status= 2;
} else if (c >= 0xc7 && c <= 0xfe) { /* dbcs first char */
filter->status= 3;
} else { /* bad */
filter->flag = 1;
}
case 1:
case 2:
if (c < 0x41 || (c > 0x5a && c < 0x61)
|| (c > 0x7a && c < 0x81) || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
case 3:
if (c < 0xa1 || c > 0xfe) { /* bad */
filter->flag = 1;
}
filter->status = 0;
break;
default:
filter->status = 0;
break;
}
return c;
}
#endif /* HAVE_MBSTR_KR */
/* We only distinguish the MS extensions to ISO-8859-1.
* Actually, this is pretty much a NO-OP, since the identification
@ -8232,3 +8870,10 @@ mbfl_html_numeric_entity(
return result;
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View File

@ -109,7 +109,7 @@ enum mbfl_no_language {
mbfl_no_language_korean, /* ko */
mbfl_no_language_dutch, /* nl */
mbfl_no_language_polish, /* pl */
mbfl_no_language_portuguese, /* pt */
mbfl_no_language_portuguese, /* pt */
mbfl_no_language_swedish, /* sv */
mbfl_no_language_chinese, /* zh */
mbfl_no_language_max
@ -167,6 +167,12 @@ enum mbfl_no_encoding {
mbfl_no_encoding_8859_13,
mbfl_no_encoding_8859_14,
mbfl_no_encoding_8859_15,
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_cp936,
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_big5,
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_uhc,
mbfl_no_encoding_charset_max
};
@ -210,7 +216,7 @@ typedef struct _mbfl_encoding {
#define MBFL_ENCTYPE_MWC4LE 0x00000800
#define MBFL_ENCTYPE_SHFTCODE 0x00001000
/* wchar plane, spesial charactor */
/* wchar plane, special charactor */
#define MBFL_WCSPLANE_MASK 0xffff
#define MBFL_WCSPLANE_UCS2MAX 0x00010000
#define MBFL_WCSPLANE_SUPMIN 0x00010000
@ -233,7 +239,11 @@ typedef struct _mbfl_encoding {
#define MBFL_WCSPLANE_8859_15 0x70f00000 /* 00h - FFh */
#define MBFL_WCSPLANE_KSC5601 0x70f10000 /* 2121h - 7E7Eh */
#define MBFL_WCSPLANE_GB2312 0x70f20000 /* 2121h - 7E7Eh */
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSPLANE_WINCP936 0x70f30000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
#define MBFL_WCSGROUP_THROUGH 0x78000000 /* 000000h - FFFFFFh */

263
ext/mbstring/mbfilter_cn.c Normal file
View File

@ -0,0 +1,263 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/*
* "streamable simplified chinese code filter and converter"
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#if defined(HAVE_MBSTR_CN)
#include "mbfilter.h"
#include "mbfilter_cn.h"
#include "unicode_table_cn.h"
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-CN => wchar
*/
int
mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_GB2312;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-CN
*/
int
mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_r_cp936_table_min && c < ucs_r_cp936_table_max) {
s = ucs_r_cp936_table[c - ucs_r_cp936_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
if (c1 < 0xa1 || c2 < 0xa1) { /* exclude CP932 extension */
s = 0;
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_GB2312) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
}
}
return c;
}
/*
* CP936 => wchar
*/
int
mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c == 0x80) { /* euro sign */
CK((*filter->output_function)(0x20ac, filter->data TSRMLS_CC));
} else if (c > 0x80 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ( c1 < 0xff && c1 > 0x80 && c > 0x39 && c < 0xff && c != 0x7f) {
w = (c1 - 0x81)*192 + (c - 0x40);
if (w >= 0 && w < cp936_ucs_table_size) {
w = cp936_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_WINCP936;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => CP936
*/
int
mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) {
s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min];
} else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) {
s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min];
} else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) {
s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min];
} else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) {
s = ucs_i_cp936_table[c - ucs_i_cp936_table_min];
} else if (c >= ucs_r_cp936_table_min && c < ucs_r_cp936_table_max) {
s = ucs_r_cp936_table[c - ucs_r_cp936_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP936) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
}
}
return c;
}
#endif /* HAVE_MBSTR_CN */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View File

@ -0,0 +1,30 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifndef MBFL_MBFILTER_CN_H
#define MBFL_MBFILTER_CN_H
int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter TSRMLS_DC);
#endif /* MBFL_MBFILTER_CN_H */

View File

@ -79,9 +79,15 @@
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#if defined(HAVE_MBSTR_JA)
#include "mbfilter.h"
#include "mbfilter_ja.h"
@ -1274,3 +1280,12 @@ mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter TSRMLS_DC)
filter->status &= 0xff;
return 0;
}
#endif /* HAVE_MBSTR_JA */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

324
ext/mbstring/mbfilter_kr.c Normal file
View File

@ -0,0 +1,324 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/*
* "streamable korean code filter and converter"
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#if defined(HAVE_MBSTR_KR)
#include "mbfilter.h"
#include "mbfilter_cn.h"
#include "unicode_table_kr.h"
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-KR => wchar
*/
int
mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s1, s2, w, flag;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c > 0xa0 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
flag = 0;
if (c1 >= 0xa1 && c1 <= 0xc6) {
flag = 1;
} else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) {
flag = 2;
}
if (flag > 0 && c >= 0xa1 && c <= 0xfe) {
if (flag == 1){
w = (c1 - 0xa1)*178 + (c - 0xa1) + 0x54;
if (w >= 0 && w < uhc2_ucs_table_size) {
w = uhc2_ucs_table[w];
} else {
w = 0;
}
} else {
if (c1 < 0xc9){
w = (c1 - 0xc7)*94 + c - 0xa1;
} else {
w = (c1 - 0xc8)*94 + c - 0xa1;
}
if (w >= 0 && w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
} else {
w = 0;
}
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_KSC5601;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-KR
*/
int
mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
c1 = (s >> 8) & 0xff;
c2 = s & 0xff;
/* exclude UHC extension area */
if (c1 < 0xa1 || c1 > 0xfe || c2 < 0xa1 && c2 > 0xfe){
s = 0;
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_KSC5601) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
/*
* UHC => wchar
*/
int
mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s1, s2, w, flag;
const short ofst1[] = { 0x41, 0x61, 0x81, 0xa1};
const short ofst2[] = { 0x0, 0x1a, 0x34, 0x54};
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c > 0x80 && c < 0xff && c != 0xc9) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
flag = 0;
if ( c >= 0x41 && c <= 0x5a){
flag = 1;
} else if (c >= 0x61 && c <= 0x7a){
flag = 2;
} else if (c >= 0x81 && c <= 0xa0){
flag = 3;
} else if (c >= 0xa1 && c <= 0xfe){
flag = 4;
}
if ( c1 >= 0x81 && c1 <= 0xa0 && flag > 0){
w = (c1 - 0x81)*178 + (c - ofst1[flag-1] + ofst2[flag-1]);
if (w >= 0 && w < uhc1_ucs_table_size) {
w = uhc1_ucs_table[w];
} else {
w = 0;
}
} else if ( c1 >= 0xa1 && c1 <= 0xc6 && flag > 0){
w = (c1 - 0xa1)*178 + (c - ofst1[flag-1] + ofst2[flag-1]);
if (w >= 0 && w < uhc2_ucs_table_size) {
w = uhc2_ucs_table[w];
} else {
w = 0;
}
} else if ( c1 >= 0xc7 && c1 <= 0xfe && flag == 4){
if (c1 < 0xc9){
w = (c1 - 0xc7)*94 + (c - ofst1[flag-1]);
} else {
w = (c1 - 0xc8)*94 + (c - ofst1[flag-1]);
}
if (w >= 0 && w < uhc3_ucs_table_size) {
w = uhc3_ucs_table[w];
} else {
w = 0;
}
}
if (flag > 0){
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_UHC;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else {
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => UHC
*/
int
mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min];
} else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) {
s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min];
} else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) {
s = ucs_i_uhc_table[c - ucs_i_uhc_table_min];
} else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) {
s = ucs_s_uhc_table[c - ucs_s_uhc_table_min];
} else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) {
s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min];
} else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) {
s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_UHC) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
}
}
return c;
}
#endif /* HAVE_MBSTR_KR */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View File

@ -0,0 +1,30 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifndef MBFL_MBFILTER_KR_H
#define MBFL_MBFILTER_KR_H
int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter TSRMLS_DC);
#endif /* MBFL_MBFILTER_KR_H */

336
ext/mbstring/mbfilter_tw.c Normal file
View File

@ -0,0 +1,336 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/*
* "streamable traditional chinese code filter and converter"
*/
/* $Id$ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php.h"
#include "php_globals.h"
#if defined(HAVE_MBSTR_TW)
#include "mbfilter.h"
#include "mbfilter_tw.h"
#include "unicode_table_tw.h"
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* EUC-TW => wchar
*/
int
mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s, w, plane;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c > 0xa0 && c < 0xff) { /* dbcs first byte */
filter->status = 1;
filter->cache = c;
} else if (c == 0x8e) { /* mbcs first byte */
filter->status = 2;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* mbcs second byte */
filter->status = 0;
c1 = filter->cache;
if (c > 0xa0 && c < 0xff) {
w = (c1 - 0xa1)*94 + (c - 0xa1);
if (w >= 0 && w < cns11643_1_ucs_table_size) {
w = cns11643_1_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_CNS11643;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 2: /* got 0x8e, first char */
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
filter->status = 0;
} else if (c > 0xa0 && c < 0xaf) {
filter->status = 3;
filter->cache = c - 0xa1;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 3: /* got 0x8e, third char */
filter->status = 0;
c1 = filter->cache;
if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
filter->status = 0;
} else if (c > 0xa0 && c < 0xff) {
filter->status = 4;
filter->cache = (c1 << 8) + c - 0xa1;
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 4: /* mbcs fourth char */
filter->status = 0;
c1 = filter->cache;
if (c1 >= 0x100 && c1 <= 0xdff && c > 0xa0 && c < 0xff) {
plane = (c1 & 0xf00) >> 8;
s = (c1 & 0xff)*94 + c - 0xa1;
w = 0;
if (s >= 0) {
if (plane == 1 & s < cns11643_2_ucs_table_size) {
w = cns11643_2_ucs_table[s];
}
if (plane == 13 & s < cns11643_14_ucs_table_size) {
w = cns11643_14_ucs_table[s];
}
}
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_CNS11643;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c | 0x8e0000;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => EUC-TW
*/
int
mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c0, c1, c2, s, plane;
s = 0;
if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) {
s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min];
} else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) {
s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min];
} else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) {
s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min];
} else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) {
s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min];
} else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) {
s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_CNS11643) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
plane = (s & 0x1f0000) >> 16;
if (plane <= 1){
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
s = (s & 0xffff) | 0x8080;
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
s = (0x8ea00000 + (plane << 16)) | ((s & 0xffff) | 0x8080);
CK((*filter->output_function)(0x8e , filter->data TSRMLS_CC));
CK((*filter->output_function)((s >> 16) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
}
}
return c;
}
/*
* Big5 => wchar
*/
int
mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, s1, s2, w;
switch (filter->status) {
case 0:
if (c >= 0 && c < 0x80) { /* latin */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else if (c > 0xa0 && c < 0xff) { /* dbcs lead byte */
filter->status = 1;
filter->cache = c;
} else {
w = c & MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
case 1: /* dbcs second byte */
filter->status = 0;
c1 = filter->cache;
if ((c > 0x39 && c < 0x7f) | (c > 0xa0 && c < 0xff)) {
if (c < 0x7f){
w = (c1 - 0xa1)*157 + (c - 0x40);
} else {
w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f;
}
if (w >= 0 && w < big5_ucs_table_size) {
w = big5_ucs_table[w];
} else {
w = 0;
}
if (w <= 0) {
w = (c1 << 8) | c;
w &= MBFL_WCSPLANE_MASK;
w |= MBFL_WCSPLANE_BIG5;
}
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
} else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
CK((*filter->output_function)(c, filter->data TSRMLS_CC));
} else {
w = (c1 << 8) | c;
w &= MBFL_WCSGROUP_MASK;
w |= MBFL_WCSGROUP_THROUGH;
CK((*filter->output_function)(w, filter->data TSRMLS_CC));
}
break;
default:
filter->status = 0;
break;
}
return c;
}
/*
* wchar => Big5
*/
int
mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter TSRMLS_DC)
{
int c1, c2, s;
s = 0;
if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) {
s = ucs_a1_big5_table[c - ucs_a1_big5_table_min];
} else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) {
s = ucs_a2_big5_table[c - ucs_a2_big5_table_min];
} else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) {
s = ucs_a3_big5_table[c - ucs_a3_big5_table_min];
} else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) {
s = ucs_i_big5_table[c - ucs_i_big5_table_min];
} else if (c >= ucs_pua_big5_table_min && c < ucs_pua_big5_table_max) {
s = ucs_pua_big5_table[c - ucs_pua_big5_table_min];
} else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) {
s = ucs_r1_big5_table[c - ucs_r1_big5_table_min];
} else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) {
s = ucs_r2_big5_table[c - ucs_r2_big5_table_min];
}
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_BIG5) {
s = c & MBFL_WCSPLANE_MASK;
}
if (c == 0) {
s = 0;
} else if (s <= 0) {
s = -1;
}
}
if (s >= 0) {
if (s < 0x80) { /* latin */
CK((*filter->output_function)(s, filter->data TSRMLS_CC));
} else {
CK((*filter->output_function)((s >> 8) & 0xff, filter->data TSRMLS_CC));
CK((*filter->output_function)(s & 0xff, filter->data TSRMLS_CC));
}
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter TSRMLS_CC));
}
}
return c;
}
#endif /* HAVE_MBSTR_TW */
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/

View File

@ -0,0 +1,30 @@
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 2001 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 2.02 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available at through the world-wide-web at |
| http://www.php.net/license/2_02.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rui Hirokawa <hirokawa@php.net> |
+----------------------------------------------------------------------+
*/
/* $Id$ */
#ifndef MBFL_MBFILTER_TW_H
#define MBFL_MBFILTER_TW_H
int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter TSRMLS_DC);
#endif /* MBFL_MBFILTER_TW_H */

View File

@ -71,6 +71,7 @@
#include "mbregex.h"
#endif
#if defined(HAVE_MBSTR_JA)
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_jis,
@ -78,6 +79,35 @@ static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
mbfl_no_encoding_euc_jp,
mbfl_no_encoding_sjis
};
#endif
#if defined(HAVE_MBSTR_CN) & !defined(HAVE_MBSTR_JA)
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_cn,
mbfl_no_encoding_cp936
};
#endif
#if defined(HAVE_MBSTR_TW) & !defined(HAVE_MBSTR_JA)
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_tw,
mbfl_no_encoding_big5
};
#endif
#if defined(HAVE_MBSTR_KR) & !defined(HAVE_MBSTR_JA)
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
mbfl_no_encoding_euc_kr,
mbfl_no_encoding_uhc
};
#endif
static const int php_mbstr_default_identify_list_size = sizeof(php_mbstr_default_identify_list)/sizeof(enum mbfl_no_encoding);
@ -525,10 +555,30 @@ PHP_INI_END()
static void
php_mbstring_init_globals(zend_mbstring_globals *pglobals TSRMLS_DC)
{
#if defined(HAVE_MBSTR_CN) & !defined(HAVE_MBSTR_JA)
MBSTRG(language) = mbfl_no_language_chinese;
MBSTRG(current_language) = mbfl_no_language_chinese;
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_cn;
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_cn;
#endif
#if defined(HAVE_MBSTR_TW) & !defined(HAVE_MBSTR_JA)
MBSTRG(language) = mbfl_no_language_chinese;
MBSTRG(current_language) = mbfl_no_language_chinese;
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_tw;
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_tw;
#endif
#if defined(HAVE_MBSTR_KR) & !defined(HAVE_MBSTR_JA)
MBSTRG(language) = mbfl_no_language_chinese;
MBSTRG(current_language) = mbfl_no_language_korean;
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_kr;
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_kr;
#endif
#if defined(HAVE_MBSTR_JA)
MBSTRG(language) = mbfl_no_language_japanese;
MBSTRG(current_language) = mbfl_no_language_japanese;
MBSTRG(internal_encoding) = mbfl_no_encoding_euc_jp;
MBSTRG(current_internal_encoding) = mbfl_no_encoding_euc_jp;
#endif
MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
@ -724,12 +774,24 @@ PHP_RSHUTDOWN_FUNCTION(mbstring)
PHP_MINFO_FUNCTION(mbstring)
{
php_info_print_table_start();
php_info_print_table_header(2, "Multibyte (Japanese) Support", "enabled");
php_info_print_table_header(2, "Multibyte Support", "enabled");
#if defined(HAVE_MBSTR_JA)
php_info_print_table_row(2, "japanese support", "enabled");
#endif
#if defined(HAVE_MBSTR_CN)
php_info_print_table_row(2, "simplified chinese support", "enabled");
#endif
#if defined(HAVE_MBSTR_TW)
php_info_print_table_row(2, "traditional chinese support", "enabled");
#endif
#if defined(HAVE_MBSTR_KR)
php_info_print_table_row(2, "korean support", "enabled");
#endif
#if defined(MBSTR_ENC_TRANS)
php_info_print_table_row(2, "http input encoding translation", "enabled");
#endif
#if defined(HAVE_MBREGEX)
php_info_print_table_row(2, "multibyte regex support", "enabled");
php_info_print_table_row(2, "multibyte (japanese) regex support", "enabled");
#endif
php_info_print_table_end();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff