mirror of
https://github.com/php/php-src.git
synced 2024-12-02 14:24:10 +08:00
Update conversion error behavior and add some new modes.
# The various escape modes are what ICU calls them. We may want to come # up with different names for UNICODE/ICU/JAVA ones..
This commit is contained in:
parent
a27b78c2d2
commit
1c09ef8e37
14
Zend/zend.c
14
Zend/zend.c
@ -173,9 +173,15 @@ static void zend_update_converters_error_behavior(TSRMLS_D)
|
||||
static ZEND_INI_MH(OnUpdateConversionErrorMode)
|
||||
{
|
||||
if (!new_value) {
|
||||
UG(from_u_error_mode) = ZEND_FROM_U_ERROR_SUBST;
|
||||
UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST;
|
||||
} else {
|
||||
UG(from_u_error_mode) = atoi(new_value);
|
||||
uint16_t mode = atoi(new_value);
|
||||
|
||||
if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
|
||||
zend_error(E_WARNING, "Illegal value for conversion error mode");
|
||||
return FAILURE;
|
||||
}
|
||||
UG(from_u_error_mode) = mode;
|
||||
}
|
||||
zend_update_converters_error_behavior(TSRMLS_C);
|
||||
return SUCCESS;
|
||||
@ -194,7 +200,7 @@ static ZEND_INI_MH(OnUpdateConversionSubstChar)
|
||||
zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value");
|
||||
return FAILURE;
|
||||
}
|
||||
if (c < 0 || c >= 0x10FFFF) {
|
||||
if (c < 0 || c >= UCHAR_MAX_VALUE) {
|
||||
zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c);
|
||||
return FAILURE;
|
||||
}
|
||||
@ -952,7 +958,7 @@ static void unicode_globals_ctor(zend_unicode_globals *unicode_globals TSRMLS_DC
|
||||
unicode_globals->http_input_encoding_conv = NULL;
|
||||
unicode_globals->subst_char_len = 0;
|
||||
zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8");
|
||||
unicode_globals->from_u_error_mode = ZEND_FROM_U_ERROR_SUBST;
|
||||
unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST;
|
||||
|
||||
zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0);
|
||||
}
|
||||
|
@ -116,10 +116,15 @@ void zend_register_standard_constants(TSRMLS_D)
|
||||
|
||||
REGISTER_MAIN_LONG_CONSTANT("E_ALL", E_ALL, CONST_PERSISTENT | CONST_CS);
|
||||
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_STOP", ZEND_FROM_U_ERROR_STOP, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SKIP", ZEND_FROM_U_ERROR_SKIP, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SUBSTITUTE", ZEND_FROM_U_ERROR_SUBST, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_ESCAPE", ZEND_FROM_U_ERROR_ESCAPE, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_STOP", ZEND_CONV_ERROR_STOP, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SKIP", ZEND_CONV_ERROR_SKIP, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SUBST", ZEND_CONV_ERROR_SUBST, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_UNICODE", ZEND_CONV_ERROR_ESCAPE_UNICODE, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_ICU", ZEND_CONV_ERROR_ESCAPE_ICU, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS);
|
||||
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS);
|
||||
|
||||
/* true/false constants */
|
||||
{
|
||||
|
@ -301,7 +301,7 @@ struct _zend_unicode_globals {
|
||||
UConverter *http_input_encoding_conv;/* http input encoding converter */
|
||||
UConverter *utf8_conv; /* all-purpose UTF-8 converter */
|
||||
|
||||
uint8_t from_u_error_mode;
|
||||
uint16_t from_u_error_mode;
|
||||
UChar subst_char[3];
|
||||
uint8_t subst_char_len;
|
||||
|
||||
|
@ -35,21 +35,36 @@ void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode)
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
switch (error_mode) {
|
||||
case ZEND_FROM_U_ERROR_STOP:
|
||||
case ZEND_CONV_ERROR_STOP:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_SKIP:
|
||||
case ZEND_CONV_ERROR_SKIP:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_ESCAPE:
|
||||
/* UTODO replace with custom callback for various substitution patterns */
|
||||
case ZEND_CONV_ERROR_SUBST:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_CONV_ERROR_ESCAPE_UNICODE:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_SUBST:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status);
|
||||
case ZEND_CONV_ERROR_ESCAPE_ICU:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_CONV_ERROR_ESCAPE_JAVA:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_CONV_ERROR_ESCAPE_XML_DEC:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_CONV_ERROR_ESCAPE_XML_HEX:
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -317,50 +332,6 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC)
|
||||
char *s = NULL;
|
||||
int s_len;
|
||||
|
||||
#if 0
|
||||
/* UTODO Putting it here for now, until we figure out the framework */
|
||||
switch (UG(from_u_error_mode)) {
|
||||
case ZEND_FROM_U_ERROR_STOP:
|
||||
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_SKIP:
|
||||
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_ESCAPE:
|
||||
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
case ZEND_FROM_U_ERROR_SUBST:
|
||||
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (UG(subst_chars)) {
|
||||
char subchar[16];
|
||||
int8_t char_len = 16;
|
||||
status = U_ZERO_ERROR;
|
||||
ucnv_getSubstChars(UG(runtime_encoding_conv), subchar, &char_len, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
zend_error(E_WARNING, "Could not get substitution characters");
|
||||
return FAILURE;
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
ucnv_setSubstChars(UG(runtime_encoding_conv), UG(subst_chars), MIN(char_len, UG(subst_chars_len)), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
zend_error(E_WARNING, "Could not set substitution characters");
|
||||
return FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
#endif
|
||||
|
||||
UChar *u = Z_USTRVAL_P(string);
|
||||
int u_len = Z_USTRLEN_P(string);
|
||||
|
||||
|
@ -28,10 +28,20 @@
|
||||
#include <unicode/uloc.h>
|
||||
#include <unicode/ucol.h>
|
||||
|
||||
#define ZEND_FROM_U_ERROR_STOP 0
|
||||
#define ZEND_FROM_U_ERROR_SKIP 1
|
||||
#define ZEND_FROM_U_ERROR_SUBST 2
|
||||
#define ZEND_FROM_U_ERROR_ESCAPE 3
|
||||
enum {
|
||||
ZEND_CONV_ERROR_STOP,
|
||||
ZEND_CONV_ERROR_SKIP,
|
||||
ZEND_CONV_ERROR_SUBST,
|
||||
ZEND_CONV_ERROR_ESCAPE_UNICODE,
|
||||
ZEND_CONV_ERROR_ESCAPE_ICU,
|
||||
ZEND_CONV_ERROR_ESCAPE_JAVA,
|
||||
ZEND_CONV_ERROR_ESCAPE_XML_DEC,
|
||||
ZEND_CONV_ERROR_ESCAPE_XML_HEX,
|
||||
ZEND_CONV_ERROR_LAST_ENUM,
|
||||
|
||||
ZEND_CONV_ERROR_EXCEPTION = 0x100
|
||||
};
|
||||
|
||||
|
||||
/* internal functions */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user