mirror of
https://github.com/php/php-src.git
synced 2024-11-25 10:54:15 +08:00
Added russian codepages (koi8-r,cp1251,cp866) support.
This commit is contained in:
parent
5700c0d421
commit
4974e6073e
@ -26,13 +26,17 @@ if test "$PHP_MBSTRING" != "no"; then
|
||||
if test "$PHP_MBSTRING" = "kr"; then
|
||||
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "ru"; then
|
||||
AC_DEFINE(HAVE_MBSTR_RU,1,[whether to have russian support])
|
||||
fi
|
||||
if test "$PHP_MBSTRING" = "all"; then
|
||||
AC_DEFINE(HAVE_MBSTR_JA,1,[whether to have japanese support])
|
||||
AC_DEFINE(HAVE_MBSTR_CN,1,[whether to have simplified chinese support])
|
||||
AC_DEFINE(HAVE_MBSTR_TW,1,[whether to have traditional chinese support])
|
||||
AC_DEFINE(HAVE_MBSTR_KR,1,[whether to have korean support])
|
||||
AC_DEFINE(HAVE_MBSTR_RU,1,[whether to have russian support])
|
||||
fi
|
||||
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
|
||||
PHP_NEW_EXTENSION(mbstring, mbfilter_ja.c mbfilter_cn.c mbfilter_tw.c mbfilter_kr.c mbfilter_ru.c mbfilter.c mbstring.c mbregex.c php_mbregex.c, $ext_shared)
|
||||
else
|
||||
PHP_MBSTR_ENC_TRANS=no
|
||||
fi
|
||||
|
@ -104,6 +104,9 @@
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
#include "mbfilter_kr.h"
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
#include "mbfilter_ru.h"
|
||||
#endif
|
||||
|
||||
#include "zend.h"
|
||||
|
||||
@ -185,6 +188,16 @@ static mbfl_language mbfl_language_traditional_chinese = {
|
||||
mbfl_no_encoding_7bit
|
||||
};
|
||||
|
||||
static mbfl_language mbfl_language_russian = {
|
||||
mbfl_no_language_russian,
|
||||
"Russian",
|
||||
"ru",
|
||||
NULL,
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_no_encoding_qprint,
|
||||
mbfl_no_encoding_8bit
|
||||
};
|
||||
|
||||
static mbfl_language *mbfl_language_ptr_table[] = {
|
||||
&mbfl_language_uni,
|
||||
&mbfl_language_japanese,
|
||||
@ -192,6 +205,7 @@ static mbfl_language *mbfl_language_ptr_table[] = {
|
||||
&mbfl_language_simplified_chinese,
|
||||
&mbfl_language_traditional_chinese,
|
||||
&mbfl_language_english,
|
||||
&mbfl_language_russian,
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -788,15 +802,6 @@ static mbfl_encoding mbfl_encoding_uhc = {
|
||||
MBFL_ENCTYPE_MBCS
|
||||
};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_2022kr = {
|
||||
mbfl_no_encoding_2022kr,
|
||||
"ISO-2022-KR",
|
||||
"ISO-2022-KR",
|
||||
NULL,
|
||||
NULL,
|
||||
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
|
||||
};
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static const char *mbfl_encoding_cp1252_aliases[] = {"cp1252", NULL};
|
||||
@ -953,6 +958,41 @@ static mbfl_encoding mbfl_encoding_8859_15 = {
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static const char *mbfl_encoding_cp1251_aliases[] = {"CP1251", "CP-1251", "WINDOWS-1251", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_cp1251 = {
|
||||
mbfl_no_encoding_cp1251,
|
||||
"Windows-1251",
|
||||
"Windows-1251",
|
||||
&mbfl_encoding_cp1251_aliases,
|
||||
NULL,
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_cp866_aliases[] = {"CP866", "CP-866", "IBM-866", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_cp866 = {
|
||||
mbfl_no_encoding_cp866,
|
||||
"CP866",
|
||||
"CP866",
|
||||
&mbfl_encoding_cp866_aliases,
|
||||
NULL,
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
|
||||
static const char *mbfl_encoding_koi8r_aliases[] = {"KOI8-R", "KOI8R", NULL};
|
||||
|
||||
static mbfl_encoding mbfl_encoding_koi8r = {
|
||||
mbfl_no_encoding_koi8r,
|
||||
"KOI8-R",
|
||||
"KOI8-R",
|
||||
&mbfl_encoding_koi8r_aliases,
|
||||
NULL,
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
#endif
|
||||
|
||||
static mbfl_encoding *mbfl_encoding_ptr_list[] = {
|
||||
&mbfl_encoding_pass,
|
||||
&mbfl_encoding_auto,
|
||||
@ -1016,7 +1056,11 @@ static mbfl_encoding *mbfl_encoding_ptr_list[] = {
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
&mbfl_encoding_euc_kr,
|
||||
&mbfl_encoding_uhc,
|
||||
&mbfl_encoding_2022kr,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
&mbfl_encoding_cp1251,
|
||||
&mbfl_encoding_cp866,
|
||||
&mbfl_encoding_koi8r,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
@ -1125,9 +1169,14 @@ static int mbfl_filt_ident_big5(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
static int mbfl_filt_ident_euckr(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
#endif /* HAVE_MBSTR_RU */
|
||||
|
||||
static int mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_false(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
static int mbfl_filt_ident_true(int c, mbfl_identify_filter *filter TSRMLS_DC);
|
||||
@ -1734,24 +1783,58 @@ static struct mbfl_convert_vtbl vtbl_wchar_uhc = {
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_uhc,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_2022kr = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_2022kr,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_2022kr,
|
||||
mbfl_filt_conv_any_2022kr_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_2022kr_wchar = {
|
||||
mbfl_no_encoding_2022kr,
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_cp1251 = {
|
||||
mbfl_no_encoding_cp1251,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_2022kr_wchar,
|
||||
mbfl_filt_conv_wchar_cp1251,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
static struct mbfl_convert_vtbl vtbl_cp1251_wchar = {
|
||||
mbfl_no_encoding_cp1251,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_cp1251_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_cp866 = {
|
||||
mbfl_no_encoding_cp866,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_cp866,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_cp866_wchar = {
|
||||
mbfl_no_encoding_cp866,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_cp866_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_wchar_koi8r = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_koi8r,
|
||||
mbfl_filt_conv_common_flush };
|
||||
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_koi8r_wchar = {
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_koi8r_wchar,
|
||||
mbfl_filt_conv_common_flush };
|
||||
#endif /* HAVE_MBSTR_RU */
|
||||
|
||||
static struct mbfl_convert_vtbl vtbl_cp1252_wchar = {
|
||||
mbfl_no_encoding_cp1252,
|
||||
@ -2015,8 +2098,14 @@ static struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
|
||||
&vtbl_wchar_euckr,
|
||||
&vtbl_uhc_wchar,
|
||||
&vtbl_wchar_uhc,
|
||||
&vtbl_2022kr_wchar,
|
||||
&vtbl_wchar_2022kr,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
&vtbl_cp1251_wchar,
|
||||
&vtbl_wchar_cp1251,
|
||||
&vtbl_cp866_wchar,
|
||||
&vtbl_wchar_cp866,
|
||||
&vtbl_koi8r_wchar,
|
||||
&vtbl_wchar_koi8r,
|
||||
#endif
|
||||
&vtbl_cp1252_wchar,
|
||||
&vtbl_wchar_cp1252,
|
||||
@ -2200,14 +2289,27 @@ static struct mbfl_identify_vtbl vtbl_identify_uhc = {
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_uhc };
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_2022kr = {
|
||||
mbfl_no_encoding_2022kr,
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
static struct mbfl_identify_vtbl vtbl_identify_cp1251 = {
|
||||
mbfl_no_encoding_cp1251,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_2022kr };
|
||||
mbfl_filt_ident_cp1251 };
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
static struct mbfl_identify_vtbl vtbl_identify_cp866 = {
|
||||
mbfl_no_encoding_cp866,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_cp866 };
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_koi8r = {
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_koi8r };
|
||||
#endif /* HAVE_MBSTR_RU */
|
||||
|
||||
static struct mbfl_identify_vtbl vtbl_identify_cp1252 = {
|
||||
mbfl_no_encoding_cp1252,
|
||||
@ -2323,7 +2425,11 @@ static struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
|
||||
#if defined(HAVE_MBSTR_KR)
|
||||
&vtbl_identify_euckr,
|
||||
&vtbl_identify_uhc,
|
||||
&vtbl_identify_2022kr,
|
||||
#endif
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
&vtbl_identify_cp1251,
|
||||
&vtbl_identify_cp866,
|
||||
&vtbl_identify_koi8r,
|
||||
#endif
|
||||
&vtbl_identify_cp1252,
|
||||
&vtbl_identify_8859_1,
|
||||
@ -6049,77 +6155,6 @@ mbfl_filt_ident_uhc(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_2022kr(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
retry:
|
||||
switch (filter->status & 0xf) {
|
||||
/* case 0x00: ASCII */
|
||||
/* case 0x10: KSC5601 mode */
|
||||
/* case 0x20: KSC5601 DBCS */
|
||||
/* case 0x40: KSC5601 SBCS */
|
||||
case 0:
|
||||
if (!(filter->status & 0x10)) {
|
||||
if (c == 0x1b)
|
||||
filter->status += 2;
|
||||
} else if (filter->status == 0x20 && c > 0x20 && c < 0x7f) { /* kanji first char */
|
||||
filter->status += 1;
|
||||
} else if (c >= 0 && c < 0x80) { /* latin, CTLs */
|
||||
;
|
||||
} else {
|
||||
filter->flag = 1; /* bad */
|
||||
}
|
||||
break;
|
||||
|
||||
/* case 0x21: KSC5601 second char */
|
||||
case 1:
|
||||
filter->status &= ~0xf;
|
||||
if (c < 0x21 || c > 0x7e) { /* bad */
|
||||
filter->flag = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
/* ESC */
|
||||
case 2:
|
||||
if (c == 0x24) { /* '$' */
|
||||
filter->status++;
|
||||
} else {
|
||||
filter->flag = 1; /* bad */
|
||||
filter->status &= ~0xf;
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
/* ESC $ */
|
||||
case 3:
|
||||
if (c == 0x29) { /* ')' */
|
||||
filter->status++;
|
||||
} else {
|
||||
filter->flag = 1; /* bad */
|
||||
filter->status &= ~0xf;
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
/* ESC $) */
|
||||
case 5:
|
||||
if (c == 0x43) { /* 'C' */
|
||||
filter->status = 0x10;
|
||||
} else {
|
||||
filter->flag = 1; /* bad */
|
||||
filter->status &= ~0xf;
|
||||
goto retry;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
filter->status = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_KR */
|
||||
|
||||
|
||||
@ -6139,6 +6174,39 @@ mbfl_filt_ident_cp1252(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
return c;
|
||||
}
|
||||
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
// all of this is so ugly now!
|
||||
static int
|
||||
mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter)
|
||||
{
|
||||
if (c >= 0x80 && c < 0xff)
|
||||
filter->flag = 0;
|
||||
else
|
||||
filter->flag = 1; /* not it */
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_cp866(int c, mbfl_identify_filter *filter)
|
||||
{
|
||||
if (c >= 0x80 && c < 0xff)
|
||||
filter->flag = 0;
|
||||
else
|
||||
filter->flag = 1; /* not it */
|
||||
return c;
|
||||
}
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_koi8r(int c, mbfl_identify_filter *filter)
|
||||
{
|
||||
if (c >= 0x80 && c < 0xff)
|
||||
filter->flag = 0;
|
||||
else
|
||||
filter->flag = 1; /* not it */
|
||||
return c;
|
||||
}
|
||||
#endif /* HAVE_MBSTR_RU */
|
||||
|
||||
static int
|
||||
mbfl_filt_ident_2022jp(int c, mbfl_identify_filter *filter TSRMLS_DC)
|
||||
{
|
||||
|
@ -113,6 +113,7 @@ enum mbfl_no_language {
|
||||
mbfl_no_language_swedish, /* sv */
|
||||
mbfl_no_language_simplified_chinese, /* zh-cn */
|
||||
mbfl_no_language_traditional_chinese, /* zh-tw */
|
||||
mbfl_no_language_russian, /* ru */
|
||||
mbfl_no_language_max
|
||||
};
|
||||
|
||||
@ -176,6 +177,9 @@ enum mbfl_no_encoding {
|
||||
mbfl_no_encoding_2022kr,
|
||||
mbfl_no_encoding_uhc,
|
||||
mbfl_no_encoding_hz,
|
||||
mbfl_no_encoding_cp1251,
|
||||
mbfl_no_encoding_cp866,
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_no_encoding_charset_max
|
||||
};
|
||||
|
||||
@ -246,6 +250,9 @@ typedef struct _mbfl_encoding {
|
||||
#define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */
|
||||
#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */
|
||||
#define MBFL_WCSPLANE_CP1251 0x70f70000
|
||||
#define MBFL_WCSPLANE_CP866 0x70f80000
|
||||
#define MBFL_WCSPLANE_KOI8R 0x70f90000
|
||||
#define MBFL_WCSGROUP_MASK 0xffffff
|
||||
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
|
||||
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
|
||||
|
213
ext/mbstring/mbfilter_ru.c
Normal file
213
ext/mbstring/mbfilter_ru.c
Normal file
@ -0,0 +1,213 @@
|
||||
/*
|
||||
* "russian code filter and converter"
|
||||
*/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
#if defined(HAVE_MBSTR_RU)
|
||||
#include "mbfilter.h"
|
||||
#include "unicode_table_ru.h"
|
||||
|
||||
/*
|
||||
* encoding filter
|
||||
*/
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
/*
|
||||
* cp1251 => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s;
|
||||
|
||||
if (c >= 0 && c < cp1251_ucs_table_min) {
|
||||
s = c;
|
||||
} else if (c >= cp1251_ucs_table_min && c < 0x100) {
|
||||
s = cp1251_ucs_table[c - cp1251_ucs_table_min];
|
||||
if (s <= 0) {
|
||||
s = c;
|
||||
s &= MBFL_WCSPLANE_MASK;
|
||||
s |= MBFL_WCSPLANE_CP1251;
|
||||
}
|
||||
} else {
|
||||
s = c;
|
||||
s &= MBFL_WCSGROUP_MASK;
|
||||
s |= MBFL_WCSGROUP_THROUGH;
|
||||
}
|
||||
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => cp1251
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s, n;
|
||||
|
||||
if (c >= 0 && c < cp1251_ucs_table_min) {
|
||||
s = c;
|
||||
} else {
|
||||
s = -1;
|
||||
n = cp1251_ucs_table_len-1;
|
||||
while (n >= 0) {
|
||||
if (c == cp1251_ucs_table[n]) {
|
||||
s = cp1251_ucs_table_min + n;
|
||||
break;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1251) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
if (s >= 0) {
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* cp866 => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_cp866_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s;
|
||||
|
||||
if (c >= 0 && c < cp866_ucs_table_min) {
|
||||
s = c;
|
||||
} else if (c >= cp866_ucs_table_min && c < 0x100) {
|
||||
s = cp866_ucs_table[c - cp866_ucs_table_min];
|
||||
if (s <= 0) {
|
||||
s = c;
|
||||
s &= MBFL_WCSPLANE_MASK;
|
||||
s |= MBFL_WCSPLANE_CP866;
|
||||
}
|
||||
} else {
|
||||
s = c;
|
||||
s &= MBFL_WCSGROUP_MASK;
|
||||
s |= MBFL_WCSGROUP_THROUGH;
|
||||
}
|
||||
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => cp866
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_cp866(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s, n;
|
||||
|
||||
if (c >= 0 && c < cp866_ucs_table_min) {
|
||||
s = c;
|
||||
} else {
|
||||
s = -1;
|
||||
n = cp866_ucs_table_len-1;
|
||||
while (n >= 0) {
|
||||
if (c == cp866_ucs_table[n]) {
|
||||
s = cp866_ucs_table_min + n;
|
||||
break;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP866) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
if (s >= 0) {
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* koi8r => wchar
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_koi8r_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s;
|
||||
|
||||
if (c >= 0 && c < koi8r_ucs_table_min) {
|
||||
s = c;
|
||||
} else if (c >= koi8r_ucs_table_min && c < 0x100) {
|
||||
s = koi8r_ucs_table[c - koi8r_ucs_table_min];
|
||||
if (s <= 0) {
|
||||
s = c;
|
||||
s &= MBFL_WCSPLANE_MASK;
|
||||
s |= MBFL_WCSPLANE_KOI8R;
|
||||
}
|
||||
} else {
|
||||
s = c;
|
||||
s &= MBFL_WCSGROUP_MASK;
|
||||
s |= MBFL_WCSGROUP_THROUGH;
|
||||
}
|
||||
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => koi8r
|
||||
*/
|
||||
int
|
||||
mbfl_filt_conv_wchar_koi8r(int c, mbfl_convert_filter *filter TSRMLS_DC)
|
||||
{
|
||||
int s, n;
|
||||
|
||||
if (c >= 0 && c < koi8r_ucs_table_min) {
|
||||
s = c;
|
||||
} else {
|
||||
s = -1;
|
||||
n = koi8r_ucs_table_len-1;
|
||||
while (n >= 0) {
|
||||
if (c == koi8r_ucs_table[n]) {
|
||||
s = koi8r_ucs_table_min + n;
|
||||
break;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8R) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
if (s >= 0) {
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HAVE_MBSTR_TW */
|
||||
|
11
ext/mbstring/mbfilter_ru.h
Normal file
11
ext/mbstring/mbfilter_ru.h
Normal file
@ -0,0 +1,11 @@
|
||||
#ifndef MBFL_MBFILTER_RU_H
|
||||
#define MBFL_MBFILTER_RU_H
|
||||
|
||||
int mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_cp866_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_cp866(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_koi8r_wchar(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
int mbfl_filt_conv_wchar_koi8r(int c, mbfl_convert_filter *filter TSRMLS_DC);
|
||||
|
||||
#endif /* MBFL_MBFILTER_RU_H */
|
@ -65,7 +65,7 @@
|
||||
#include "php_content_types.h"
|
||||
#include "SAPI.h"
|
||||
|
||||
#if ZEND_MULTIBYTE
|
||||
#ifdef ZEND_MULTIBYTE
|
||||
#include "zend_multibyte.h"
|
||||
#endif /* ZEND_MULTIBYTE */
|
||||
|
||||
@ -113,6 +113,16 @@ static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_MBSTR_RU) & !defined(HAVE_MBSTR_JA) & !defined(HAVE_MBSTR_TW) & !defined(HAVE_MBSTR_KR)
|
||||
static const enum mbfl_no_encoding php_mbstr_default_identify_list[] = {
|
||||
mbfl_no_encoding_ascii,
|
||||
mbfl_no_encoding_utf8,
|
||||
mbfl_no_encoding_koi8r,
|
||||
mbfl_no_encoding_cp1251,
|
||||
mbfl_no_encoding_cp866
|
||||
};
|
||||
#endif
|
||||
|
||||
static const int php_mbstr_default_identify_list_size = sizeof(php_mbstr_default_identify_list)/sizeof(enum mbfl_no_encoding);
|
||||
|
||||
static unsigned char third_and_rest_force_ref[] = { 3, BYREF_NONE, BYREF_NONE, BYREF_FORCE_REST };
|
||||
@ -2059,10 +2069,6 @@ PHP_FUNCTION(mb_strcut)
|
||||
if (from < 0) {
|
||||
from = 0;
|
||||
}
|
||||
}
|
||||
if (Z_STRLEN_PP(arg1) < from) {
|
||||
/* keep index within string */
|
||||
from = Z_STRLEN_PP(arg1);
|
||||
}
|
||||
|
||||
/* if "length" position is negative, set it to the length
|
||||
@ -2074,10 +2080,6 @@ PHP_FUNCTION(mb_strcut)
|
||||
len = 0;
|
||||
}
|
||||
}
|
||||
if (Z_STRLEN_PP(arg1) < (from + len)) {
|
||||
/* limit span to characters in string */
|
||||
len = Z_STRLEN_PP(arg1) - from;
|
||||
}
|
||||
|
||||
ret = mbfl_strcut(&string, &result, from, len TSRMLS_CC);
|
||||
if (ret != NULL) {
|
||||
@ -2269,7 +2271,7 @@ PHPAPI char * php_mb_convert_encoding(char *input, size_t length, char *_to_enco
|
||||
string.no_encoding = from_encoding;
|
||||
}
|
||||
} else {
|
||||
php_error(E_WARNING, "$s() illegal character encoding specified",
|
||||
php_error(E_WARNING, "%s() illegal character encoding specified",
|
||||
get_active_function_name(TSRMLS_C));
|
||||
}
|
||||
if (list != NULL) {
|
||||
|
69
ext/mbstring/unicode_table_ru.h
Normal file
69
ext/mbstring/unicode_table_ru.h
Normal file
@ -0,0 +1,69 @@
|
||||
// cp1251 to Unicode table
|
||||
static const unsigned short cp1251_ucs_table[] = {
|
||||
0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021,
|
||||
0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f,
|
||||
0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
|
||||
0x003f, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
|
||||
0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7,
|
||||
0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407,
|
||||
0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457,
|
||||
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||||
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
|
||||
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||||
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
|
||||
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||||
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
|
||||
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||||
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f
|
||||
};
|
||||
static const int cp1251_ucs_table_min = 0x80;
|
||||
static const int cp1251_ucs_table_len = (sizeof (cp1251_ucs_table) / sizeof (unsigned short));
|
||||
static const int cp1251_ucs_table_max = 0x80 + (sizeof (cp1251_ucs_table) / sizeof (unsigned short));
|
||||
|
||||
// cp866_DOSCyrillicRussian to Unicode table
|
||||
static const unsigned short cp866_ucs_table[] = {
|
||||
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||||
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
|
||||
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||||
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
|
||||
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||||
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
|
||||
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
|
||||
0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
|
||||
0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
|
||||
0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
|
||||
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
|
||||
0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
|
||||
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||||
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
|
||||
0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040e, 0x045e,
|
||||
0x00b0, 0x2219, 0x00b7, 0x221a, 0x2116, 0x00a4, 0x25a0, 0x00a0
|
||||
};
|
||||
static const int cp866_ucs_table_min = 0x80;
|
||||
static const int cp866_ucs_table_len = (sizeof (cp866_ucs_table) / sizeof (unsigned short));
|
||||
static const int cp866_ucs_table_max = 0x80 + (sizeof (cp866_ucs_table) / sizeof (unsigned short));
|
||||
|
||||
// KOI8-R (RFC1489) to Unicode
|
||||
static const unsigned short koi8r_ucs_table[] = {
|
||||
0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 0x2518, 0x251c, 0x2524,
|
||||
0x252c, 0x2534, 0x253c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
|
||||
0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
|
||||
0x2264, 0x2265, 0x00a0, 0x2321, 0x00b0, 0x00b2, 0x00b7, 0x00f7,
|
||||
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
|
||||
0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 0x255e,
|
||||
0x255f, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
|
||||
0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x00a9,
|
||||
0x044e, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
||||
0x0445, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e,
|
||||
0x043f, 0x044f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
||||
0x044c, 0x044b, 0x0437, 0x0448, 0x044d, 0x0449, 0x0447, 0x044a,
|
||||
0x042e, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
||||
0x0425, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e,
|
||||
0x041f, 0x042f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
||||
0x042c, 0x042b, 0x0417, 0x0428, 0x042d, 0x0429, 0x0427, 0x042a
|
||||
};
|
||||
static const int koi8r_ucs_table_min = 0x80;
|
||||
static const int koi8r_ucs_table_len = (sizeof (koi8r_ucs_table) / sizeof (unsigned short));
|
||||
static const int koi8r_ucs_table_max = 0x80 + (sizeof (koi8r_ucs_table) / sizeof (unsigned short));
|
||||
|
Loading…
Reference in New Issue
Block a user