mirror of
https://github.com/php/php-src.git
synced 2025-01-26 21:54:16 +08:00
fixed #35711, added mbstring.strict_detection to detect the encoding striktly.
This commit is contained in:
parent
91a735502c
commit
8864bf27e2
@ -336,7 +336,7 @@ mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *str
|
||||
* encoding detector
|
||||
*/
|
||||
mbfl_encoding_detector *
|
||||
mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz)
|
||||
mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
|
||||
{
|
||||
mbfl_encoding_detector *identd;
|
||||
|
||||
@ -371,6 +371,9 @@ mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz)
|
||||
}
|
||||
identd->filter_list_size = num;
|
||||
|
||||
/* set strict flag */
|
||||
identd->strict = strict;
|
||||
|
||||
return identd;
|
||||
}
|
||||
|
||||
@ -441,10 +444,25 @@ enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *ident
|
||||
while (n >= 0) {
|
||||
filter = identd->filter_list[n];
|
||||
if (!filter->flag) {
|
||||
if (identd->strict && filter->status) {
|
||||
continue;
|
||||
}
|
||||
encoding = filter->encoding->no_encoding;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
|
||||
/* fallback judge */
|
||||
if (encoding == mbfl_no_encoding_invalid) {
|
||||
n = identd->filter_list_size - 1;
|
||||
while (n >= 0) {
|
||||
filter = identd->filter_list[n];
|
||||
if (!filter->flag) {
|
||||
encoding = filter->encoding->no_encoding;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return encoding;
|
||||
@ -623,11 +641,11 @@ mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, i
|
||||
}
|
||||
|
||||
enum mbfl_no_encoding
|
||||
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz)
|
||||
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
|
||||
{
|
||||
const mbfl_encoding *encoding;
|
||||
|
||||
encoding = mbfl_identify_encoding(string, elist, elistsz, 0);
|
||||
encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
|
||||
if (encoding != NULL &&
|
||||
encoding->no_encoding > mbfl_no_encoding_charset_min &&
|
||||
encoding->no_encoding < mbfl_no_encoding_charset_max) {
|
||||
|
@ -138,9 +138,10 @@ typedef struct _mbfl_encoding_detector mbfl_encoding_detector;
|
||||
struct _mbfl_encoding_detector {
|
||||
mbfl_identify_filter **filter_list;
|
||||
int filter_list_size;
|
||||
int strict;
|
||||
};
|
||||
|
||||
MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz);
|
||||
MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict);
|
||||
MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
|
||||
MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
|
||||
MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
|
||||
@ -163,7 +164,7 @@ MBFLAPI extern const char *
|
||||
mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
|
||||
|
||||
MBFLAPI extern enum mbfl_no_encoding
|
||||
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz);
|
||||
mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
|
||||
|
||||
/*
|
||||
* strlen
|
||||
|
@ -266,7 +266,7 @@ enum mbfl_no_encoding _php_mb_encoding_handler_ex(const php_mb_encoding_handler_
|
||||
} else {
|
||||
/* auto detect */
|
||||
from_encoding = mbfl_no_encoding_invalid;
|
||||
identd = mbfl_encoding_detector_new((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings);
|
||||
identd = mbfl_encoding_detector_new((enum mbfl_no_encoding *)info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection));
|
||||
if (identd) {
|
||||
n = 0;
|
||||
while (n < num) {
|
||||
|
@ -733,6 +733,9 @@ PHP_INI_BEGIN()
|
||||
STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
|
||||
PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation,
|
||||
encoding_translation, zend_mbstring_globals, mbstring_globals)
|
||||
|
||||
STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
|
||||
PHP_INI_ALL, OnUpdateLong, strict_detection, zend_mbstring_globals, mbstring_globals)
|
||||
PHP_INI_END()
|
||||
/* }}} */
|
||||
|
||||
@ -768,6 +771,7 @@ static void _php_mb_globals_ctor(zend_mbstring_globals *pglobals TSRMLS_DC)
|
||||
MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
|
||||
MBSTRG(func_overload) = 0;
|
||||
MBSTRG(encoding_translation) = 0;
|
||||
MBSTRG(strict_detection) = 0;
|
||||
pglobals->outconv = NULL;
|
||||
#if HAVE_MBREGEX
|
||||
_php_mb_regex_globals_ctor(pglobals TSRMLS_CC);
|
||||
@ -2021,7 +2025,7 @@ MBSTRING_API char * php_mb_convert_encoding(char *input, size_t length, char *_t
|
||||
string.no_encoding = from_encoding;
|
||||
} else if (size > 1) {
|
||||
/* auto detect */
|
||||
from_encoding = mbfl_identify_encoding_no(&string, list, size);
|
||||
from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
|
||||
if (from_encoding != mbfl_no_encoding_invalid) {
|
||||
string.no_encoding = from_encoding;
|
||||
} else {
|
||||
@ -2213,7 +2217,7 @@ PHP_FUNCTION(mb_detect_encoding)
|
||||
mbfl_string string;
|
||||
const char *ret;
|
||||
enum mbfl_no_encoding *elist;
|
||||
int size, *list, strict = 0;
|
||||
int size, *list, strict;
|
||||
|
||||
if (ZEND_NUM_ARGS() == 1) {
|
||||
if (zend_get_parameters_ex(1, &arg_str) == FAILURE) {
|
||||
@ -2263,6 +2267,9 @@ PHP_FUNCTION(mb_detect_encoding)
|
||||
convert_to_long_ex(arg_strict);
|
||||
strict = Z_LVAL_PP(arg_strict);
|
||||
}
|
||||
else {
|
||||
strict = MBSTRG(strict_detection);
|
||||
}
|
||||
|
||||
if (size > 0 && list != NULL) {
|
||||
elist = list;
|
||||
@ -2554,7 +2561,7 @@ PHP_FUNCTION(mb_convert_variables)
|
||||
stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
|
||||
stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
|
||||
stack_level = 0;
|
||||
identd = mbfl_encoding_detector_new(elist, elistsz);
|
||||
identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
|
||||
if (identd != NULL) {
|
||||
n = 2;
|
||||
while (n < argc || stack_level > 0) {
|
||||
@ -3173,7 +3180,7 @@ PHP_FUNCTION(mb_send_mail)
|
||||
orig_str.no_encoding = MBSTRG(current_internal_encoding);
|
||||
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|
||||
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
|
||||
orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
|
||||
orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
|
||||
}
|
||||
pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
|
||||
if (pstr != NULL) {
|
||||
@ -3193,7 +3200,7 @@ PHP_FUNCTION(mb_send_mail)
|
||||
|
||||
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|
||||
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
|
||||
orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
|
||||
orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
|
||||
}
|
||||
|
||||
pstr = NULL;
|
||||
@ -3570,7 +3577,7 @@ MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length
|
||||
mbfl_string_init(&string);
|
||||
string.no_language = MBSTRG(current_language);
|
||||
|
||||
identd = mbfl_encoding_detector_new(elist, size);
|
||||
identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
|
||||
|
||||
if (identd) {
|
||||
int n = 0;
|
||||
|
@ -180,6 +180,7 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring)
|
||||
int current_filter_illegal_substchar;
|
||||
long func_overload;
|
||||
zend_bool encoding_translation;
|
||||
long strict_detection;
|
||||
mbfl_buffer_converter *outconv;
|
||||
#if HAVE_MBREGEX && defined(PHP_MBREGEX_GLOBALS)
|
||||
PHP_MBREGEX_GLOBALS
|
||||
|
Loading…
Reference in New Issue
Block a user