Update conversion error behavior and add some new modes.

# The various escape modes are what ICU calls them. We may want to come
# up with different names for UNICODE/ICU/JAVA ones..
This commit is contained in:
Andrei Zmievski 2006-03-23 22:00:42 +00:00
parent a27b78c2d2
commit 1c09ef8e37
5 changed files with 55 additions and 63 deletions

View File

@ -173,9 +173,15 @@ static void zend_update_converters_error_behavior(TSRMLS_D)
static ZEND_INI_MH(OnUpdateConversionErrorMode)
{
if (!new_value) {
UG(from_u_error_mode) = ZEND_FROM_U_ERROR_SUBST;
UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST;
} else {
UG(from_u_error_mode) = atoi(new_value);
uint16_t mode = atoi(new_value);
if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
zend_error(E_WARNING, "Illegal value for conversion error mode");
return FAILURE;
}
UG(from_u_error_mode) = mode;
}
zend_update_converters_error_behavior(TSRMLS_C);
return SUCCESS;
@ -194,7 +200,7 @@ static ZEND_INI_MH(OnUpdateConversionSubstChar)
zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value");
return FAILURE;
}
if (c < 0 || c >= 0x10FFFF) {
if (c < 0 || c >= UCHAR_MAX_VALUE) {
zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c);
return FAILURE;
}
@ -952,7 +958,7 @@ static void unicode_globals_ctor(zend_unicode_globals *unicode_globals TSRMLS_DC
unicode_globals->http_input_encoding_conv = NULL;
unicode_globals->subst_char_len = 0;
zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8");
unicode_globals->from_u_error_mode = ZEND_FROM_U_ERROR_SUBST;
unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST;
zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0);
}

View File

@ -116,10 +116,15 @@ void zend_register_standard_constants(TSRMLS_D)
REGISTER_MAIN_LONG_CONSTANT("E_ALL", E_ALL, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_STOP", ZEND_FROM_U_ERROR_STOP, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SKIP", ZEND_FROM_U_ERROR_SKIP, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_SUBSTITUTE", ZEND_FROM_U_ERROR_SUBST, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_INVALID_ESCAPE", ZEND_FROM_U_ERROR_ESCAPE, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_STOP", ZEND_CONV_ERROR_STOP, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SKIP", ZEND_CONV_ERROR_SKIP, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_SUBST", ZEND_CONV_ERROR_SUBST, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_UNICODE", ZEND_CONV_ERROR_ESCAPE_UNICODE, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_ICU", ZEND_CONV_ERROR_ESCAPE_ICU, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS);
/* true/false constants */
{

View File

@ -301,7 +301,7 @@ struct _zend_unicode_globals {
UConverter *http_input_encoding_conv;/* http input encoding converter */
UConverter *utf8_conv; /* all-purpose UTF-8 converter */
uint8_t from_u_error_mode;
uint16_t from_u_error_mode;
UChar subst_char[3];
uint8_t subst_char_len;

View File

@ -35,21 +35,36 @@ void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode)
UErrorCode status = U_ZERO_ERROR;
switch (error_mode) {
case ZEND_FROM_U_ERROR_STOP:
case ZEND_CONV_ERROR_STOP:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_SKIP:
case ZEND_CONV_ERROR_SKIP:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SKIP, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_ESCAPE:
/* UTODO replace with custom callback for various substitution patterns */
case ZEND_CONV_ERROR_SUBST:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SUB_STOP_ON_ILLEGAL, NULL, NULL, &status);
break;
case ZEND_CONV_ERROR_ESCAPE_UNICODE:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_SUBST:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, UCNV_SKIP_STOP_ON_ILLEGAL, NULL, NULL, &status);
case ZEND_CONV_ERROR_ESCAPE_ICU:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, NULL, NULL, &status);
break;
case ZEND_CONV_ERROR_ESCAPE_JAVA:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, NULL, NULL, &status);
break;
case ZEND_CONV_ERROR_ESCAPE_XML_DEC:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, NULL, NULL, &status);
break;
case ZEND_CONV_ERROR_ESCAPE_XML_HEX:
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, NULL, NULL, &status);
break;
default:
@ -317,50 +332,6 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC)
char *s = NULL;
int s_len;
#if 0
/* UTODO Putting it here for now, until we figure out the framework */
switch (UG(from_u_error_mode)) {
case ZEND_FROM_U_ERROR_STOP:
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_SKIP:
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_ESCAPE:
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, NULL, NULL, &status);
break;
case ZEND_FROM_U_ERROR_SUBST:
ucnv_setFromUCallBack(UG(runtime_encoding_conv), UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
break;
default:
assert(0);
break;
}
if (UG(subst_chars)) {
char subchar[16];
int8_t char_len = 16;
status = U_ZERO_ERROR;
ucnv_getSubstChars(UG(runtime_encoding_conv), subchar, &char_len, &status);
if (U_FAILURE(status)) {
zend_error(E_WARNING, "Could not get substitution characters");
return FAILURE;
}
status = U_ZERO_ERROR;
ucnv_setSubstChars(UG(runtime_encoding_conv), UG(subst_chars), MIN(char_len, UG(subst_chars_len)), &status);
if (U_FAILURE(status)) {
zend_error(E_WARNING, "Could not set substitution characters");
return FAILURE;
}
}
status = U_ZERO_ERROR;
#endif
UChar *u = Z_USTRVAL_P(string);
int u_len = Z_USTRLEN_P(string);

View File

@ -28,10 +28,20 @@
#include <unicode/uloc.h>
#include <unicode/ucol.h>
#define ZEND_FROM_U_ERROR_STOP 0
#define ZEND_FROM_U_ERROR_SKIP 1
#define ZEND_FROM_U_ERROR_SUBST 2
#define ZEND_FROM_U_ERROR_ESCAPE 3
enum {
ZEND_CONV_ERROR_STOP,
ZEND_CONV_ERROR_SKIP,
ZEND_CONV_ERROR_SUBST,
ZEND_CONV_ERROR_ESCAPE_UNICODE,
ZEND_CONV_ERROR_ESCAPE_ICU,
ZEND_CONV_ERROR_ESCAPE_JAVA,
ZEND_CONV_ERROR_ESCAPE_XML_DEC,
ZEND_CONV_ERROR_ESCAPE_XML_HEX,
ZEND_CONV_ERROR_LAST_ENUM,
ZEND_CONV_ERROR_EXCEPTION = 0x100
};
/* internal functions */