* Remove unicode.from_error_mode and unicode.from_subst_char from INI

settings.
* Add unicode_set_error_mode() and unicode_set_subst_char() functions to
  manipulate these global settings.
This commit is contained in:
Andrei Zmievski 2006-03-26 01:48:33 +00:00
parent d38d354fe8
commit 930bde5897
9 changed files with 110 additions and 78 deletions

View File

@ -110,8 +110,8 @@ static ZEND_INI_MH(OnUpdateEncoding)
*converter = NULL;
}
if (*converter) {
zend_set_converter_error_mode(*converter, UG(from_u_error_mode));
zend_set_converter_subst_char(*converter, UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(*converter, UG(from_error_mode));
zend_set_converter_subst_char(*converter, UG(from_subst_char));
}
return SUCCESS;
@ -150,67 +150,20 @@ static ZEND_INI_MH(OnUpdateErrorMode)
}
#endif
static void zend_update_converters_error_behavior(TSRMLS_D)
void zend_update_converters_error_behavior(TSRMLS_D)
{
if (UG(fallback_encoding_conv)) {
zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_u_error_mode));
zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(UG(fallback_encoding_conv), UG(from_error_mode));
zend_set_converter_subst_char(UG(fallback_encoding_conv), UG(from_subst_char));
}
if (UG(runtime_encoding_conv)) {
zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_u_error_mode));
zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(UG(runtime_encoding_conv), UG(from_error_mode));
zend_set_converter_subst_char(UG(runtime_encoding_conv), UG(from_subst_char));
}
if (UG(output_encoding_conv)) {
zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode));
zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode));
zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char));
}
if (UG(http_input_encoding_conv)) {
zend_set_converter_error_mode(UG(http_input_encoding_conv), UG(from_u_error_mode));
}
}
static ZEND_INI_MH(OnUpdateConversionErrorMode)
{
if (!new_value) {
UG(from_u_error_mode) = ZEND_CONV_ERROR_SUBST;
} else {
uint16_t mode = atoi(new_value);
if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
zend_error(E_WARNING, "Illegal value for conversion error mode");
return FAILURE;
}
UG(from_u_error_mode) = mode;
}
zend_update_converters_error_behavior(TSRMLS_C);
return SUCCESS;
}
static ZEND_INI_MH(OnUpdateConversionSubstChar)
{
uint8_t i = 0;
UChar32 c = 0x3f; /*'?'*/
char *end_ptr;
if (new_value) {
c = (int32_t)strtol(new_value, &end_ptr, 16);
if (end_ptr < new_value + strlen(new_value)) {
zend_error(E_WARNING, "Substitution character string should be a hexadecimal Unicode codepoint value");
return FAILURE;
}
if (c < 0 || c >= UCHAR_MAX_VALUE) {
zend_error(E_WARNING, "Substitution character value U+%06x is out of range 0-10FFFF", c);
return FAILURE;
}
}
U16_APPEND_UNSAFE(UG(subst_char), i, c);
UG(subst_char)[i] = 0;
UG(subst_char_len) = i;
zend_update_converters_error_behavior(TSRMLS_C);
return SUCCESS;
}
@ -223,8 +176,6 @@ ZEND_INI_BEGIN()
STD_ZEND_INI_ENTRY("unicode.runtime_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, runtime_encoding_conv, zend_unicode_globals, unicode_globals)
STD_ZEND_INI_ENTRY("unicode.script_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, script_encoding_conv, zend_unicode_globals, unicode_globals)
STD_ZEND_INI_ENTRY("unicode.http_input_encoding", NULL, ZEND_INI_ALL, OnUpdateEncoding, http_input_encoding_conv, zend_unicode_globals, unicode_globals)
ZEND_INI_ENTRY("unicode.from_error_mode", "2", ZEND_INI_ALL, OnUpdateConversionErrorMode)
ZEND_INI_ENTRY("unicode.from_error_subst_char", "3f", ZEND_INI_ALL, OnUpdateConversionSubstChar)
ZEND_INI_END()
@ -956,9 +907,10 @@ static void unicode_globals_ctor(zend_unicode_globals *unicode_globals TSRMLS_DC
unicode_globals->output_encoding_conv = NULL;
unicode_globals->script_encoding_conv = NULL;
unicode_globals->http_input_encoding_conv = NULL;
unicode_globals->subst_char_len = 0;
zend_set_converter_encoding(&unicode_globals->utf8_conv, "UTF-8");
unicode_globals->from_u_error_mode = ZEND_CONV_ERROR_SUBST;
unicode_globals->from_error_mode = ZEND_CONV_ERROR_SUBST;
memset(unicode_globals->from_subst_char, 0, 3 * sizeof(UChar));
zend_codepoint_to_uchar(0x3f, unicode_globals->from_subst_char);
zend_hash_init_ex(&unicode_globals->flex_compatible, 0, NULL, NULL, 1, 0);
}

View File

@ -126,6 +126,9 @@ void zend_register_standard_constants(TSRMLS_D)
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, CONST_PERSISTENT | CONST_CS);
REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, CONST_PERSISTENT | CONST_CS);
/* true/false constants */
{
zend_constant c;

View File

@ -301,9 +301,10 @@ struct _zend_unicode_globals {
UConverter *http_input_encoding_conv;/* http input encoding converter */
UConverter *utf8_conv; /* all-purpose UTF-8 converter */
uint16_t from_u_error_mode;
UChar subst_char[3];
uint8_t subst_char_len;
uint16_t from_error_mode;
UChar from_subst_char[3];
uint16_t to_error_mode;
UChar to_subst_char[3];
char *default_locale;
UCollator *default_collator;

View File

@ -78,7 +78,7 @@ void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode)
/* }}} */
/* {{{ zend_set_converter_subst_char */
void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len)
void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char)
{
char dest[8];
int8_t dest_len = 8;
@ -86,6 +86,7 @@ void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t s
UErrorCode temp = U_ZERO_ERROR;
const void *old_context;
UConverterFromUCallback old_cb;
int32_t subst_char_len = u_strlen(subst_char);
if (!subst_char_len)
return;
@ -377,10 +378,12 @@ ZEND_API int zval_unicode_to_string(zval *string, UConverter *conv TSRMLS_DC)
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(u, num_conv)-1;
/* XXX needs to be fixed, but a leak is better than invalid memory
if (s) {
efree(s);
}
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
*/
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
retval = FAILURE;
}

View File

@ -42,6 +42,11 @@ enum {
ZEND_CONV_ERROR_EXCEPTION = 0x100
};
typedef enum {
ZEND_FROM_UNICODE,
ZEND_TO_UNICODE,
} zend_conv_direction;
extern ZEND_API zend_class_entry *unicodeConversionException;
@ -49,9 +54,10 @@ extern ZEND_API zend_class_entry *unicodeConversionException;
/* internal functions */
int zend_set_converter_encoding(UConverter **converter, const char *encoding);
void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char, int8_t subst_char_len);
void zend_set_converter_subst_char(UConverter *conv, UChar *subst_char);
void zend_set_converter_error_mode(UConverter *conv, uint8_t error_mode);
void zend_register_unicode_exceptions(TSRMLS_D);
void zend_update_converters_error_behavior(TSRMLS_D);
/* API functions */
@ -84,7 +90,9 @@ static inline UChar32 zend_get_codepoint_at(UChar *str, int length, int n)
int32_t offset = 0;
UChar32 c = 0;
U16_FWD_N(str, offset, length, n);
if (n > 0) {
U16_FWD_N(str, offset, length, n);
}
U16_GET(str, 0, offset, length, c);
return c;

View File

@ -112,6 +112,74 @@ static PHP_FUNCTION(unicode_encode)
}
/* }}} */
PHP_FUNCTION(unicode_set_error_mode)
{
zend_conv_direction direction;
long tmp, mode;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ll", &tmp, &mode) == FAILURE) {
return;
}
direction = (zend_conv_direction) tmp;
if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) {
php_error(E_WARNING, "Invalid conversion direction value");
RETURN_FALSE;
}
if ((mode & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
php_error(E_WARNING, "Illegal value for conversion error mode");
RETURN_FALSE;
}
if (direction == ZEND_FROM_UNICODE) {
UG(from_error_mode) = mode;
}
zend_update_converters_error_behavior(TSRMLS_C);
RETURN_TRUE;
}
PHP_FUNCTION(unicode_set_subst_char)
{
zend_conv_direction direction;
UChar *subst_char;
UChar32 cp;
int subst_char_len;
long tmp;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "lu", &tmp, &subst_char, &subst_char_len) == FAILURE) {
return;
}
direction = (zend_conv_direction) tmp;
if (direction != ZEND_FROM_UNICODE && direction != ZEND_TO_UNICODE) {
php_error(E_WARNING, "Invalid conversion direction value");
RETURN_FALSE;
}
if (subst_char_len < 1 ) {
php_error(E_WARNING, "Empty substitution character");
RETURN_FALSE;
}
cp = zend_get_codepoint_at(subst_char, subst_char_len, 0);
if (cp < 0 || cp >= UCHAR_MAX_VALUE) {
zend_error(E_WARNING, "Substitution character value U+%06x is out of range (0 - 0x10FFFF)", cp);
RETURN_FALSE;
}
if (direction == ZEND_FROM_UNICODE) {
int len;
len = zend_codepoint_to_uchar(cp, UG(from_subst_char));
UG(from_subst_char)[len] = 0;
}
zend_update_converters_error_behavior(TSRMLS_C);
RETURN_TRUE;
}
/* {{{ unicode_functions[] */
zend_function_entry unicode_functions[] = {
PHP_FE(i18n_loc_get_default, NULL)
@ -119,6 +187,8 @@ zend_function_entry unicode_functions[] = {
PHP_FE(unicode_decode, NULL)
PHP_FE(unicode_semantics, NULL)
PHP_FE(unicode_encode, NULL)
PHP_FE(unicode_set_error_mode, NULL)
PHP_FE(unicode_set_subst_char, NULL)
{ NULL, NULL, NULL }
};
/* }}} */
@ -144,7 +214,6 @@ zend_module_entry unicode_module_entry = {
ZEND_GET_MODULE(unicode)
#endif
/* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(unicode)
{
@ -158,7 +227,6 @@ PHP_MINIT_FUNCTION(unicode)
}
/* }}} */
/* {{{ PHP_MSHUTDOWN_FUNCTION */
PHP_MSHUTDOWN_FUNCTION(unicode)
{
@ -172,7 +240,6 @@ PHP_MSHUTDOWN_FUNCTION(unicode)
}
/* }}} */
/* {{{ PHP_RINIT_FUNCTION */
PHP_RINIT_FUNCTION(unicode)
{
@ -180,7 +247,6 @@ PHP_RINIT_FUNCTION(unicode)
}
/* }}} */
/* {{{ PHP_RSHUTDOWN_FUNCTION */
PHP_RSHUTDOWN_FUNCTION(unicode)
{
@ -188,7 +254,6 @@ PHP_RSHUTDOWN_FUNCTION(unicode)
}
/* }}} */
/* {{{ PHP_MINFO_FUNCTION */
PHP_MINFO_FUNCTION(unicode)
{

View File

@ -220,8 +220,8 @@ static ZEND_INI_MH(OnUpdateOutputEncoding)
UG(output_encoding_conv) = NULL;
}
if (UG(output_encoding_conv)) {
zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode));
zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_error_mode));
zend_set_converter_subst_char(UG(output_encoding_conv), UG(from_subst_char));
if (stage == ZEND_INI_STAGE_RUNTIME) {
sapi_update_default_charset(TSRMLS_C);
}

View File

@ -712,7 +712,7 @@ PHPAPI int _php_stream_bucket_convert(php_stream_bucket *bucket, unsigned char t
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(bucket->buf.u, num_conv)-1;
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
}
if (bucket->own_buf) {

View File

@ -1263,7 +1263,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu
if (U_FAILURE(status)) {
int32_t offset = u_countChar32(buf.u, num_conv)-1;
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_u_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
}
freeme = buf.s = dest;
buflen = destlen;
@ -2293,8 +2293,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
}
} else {
/* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
zend_set_converter_error_mode(stream->output_encoding, UG(from_u_error_mode));
zend_set_converter_subst_char(stream->output_encoding, UG(subst_char), UG(subst_char_len));
zend_set_converter_error_mode(stream->output_encoding, UG(from_error_mode));
zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char));
}
}
if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {