Merge branch 'pull-request/1094'

* pull-request/1094:
  added php_mb_check_code_point for mb_substitute_character
  news entry for PR #1094
This commit is contained in:
Joe Watkins 2017-01-04 06:55:29 +00:00
commit f9a435a06d
No known key found for this signature in database
GPG Key ID: F9BA0ADA31CBD89E
3 changed files with 90 additions and 3 deletions

1
NEWS
View File

@ -62,6 +62,7 @@ PHP NEWS
handles multibyte replacement char nicely). (Masakielastic, Yasuo)
. Added array input support to mb_convert_encoding(). (Yasuo)
. Added array input support to mb_check_encoding(). (Yasuo)
. Fixed bug #69079 (enhancement for mb_substitute_character). (masakielastic)
- Mcrypt:
. The deprecated mcrypt extension has been moved to PECL. (leigh)

View File

@ -104,6 +104,11 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc);
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
/* }}} */
/* {{{ php_mb_default_identify_list */
@ -1992,6 +1997,73 @@ PHP_FUNCTION(mb_detect_order)
}
/* }}} */
static inline int php_mb_check_code_point(long cp)
{
enum mbfl_no_encoding no_enc;
char* buf;
char buf_len;
no_enc = MBSTRG(current_internal_encoding)->no_encoding;
if (php_mb_is_no_encoding_utf8(no_enc)) {
if ((cp > 0 && 0xd800 > cp) || (cp > 0xdfff && 0x110000 > cp)) {
return 1;
}
return 0;
} else if (php_mb_is_no_encoding_unicode(no_enc)) {
if (0 > cp || cp > 0x10ffff) {
return 0;
}
return 1;
// backward compatibility
} else if (php_mb_is_unsupported_no_encoding(no_enc)) {
return cp < 0xffff && cp > 0x0;
}
if (cp < 0x100) {
buf_len = 1;
buf = (char *) safe_emalloc(buf_len, 1, 1);
buf[0] = cp;
buf[1] = 0;
} else if (cp < 0x10000) {
buf_len = 2;
buf = (char *) safe_emalloc(buf_len, 1, 1);
buf[0] = cp >> 8;
buf[1] = cp & 0xff;
buf[2] = 0;
} else if (cp < 0x1000000) {
buf_len = 3;
buf = (char *) safe_emalloc(buf_len, 1, 1);
buf[0] = cp >> 16;
buf[1] = (cp >> 8) & 0xff;
buf[2] = cp & 0xff;
buf[3] = 0;
} else {
buf_len = 4;
buf = (char *) safe_emalloc(buf_len, 1, 1);
buf[0] = cp >> 24;
buf[1] = (cp >> 16) & 0xff;
buf[2] = (cp >> 8) & 0xff;
buf[3] = cp & 0xff;
buf[4] = 0;
}
if (php_mb_check_encoding(buf, buf_len, NULL)) {
efree(buf);
return 1;
}
efree(buf);
return 0;
}
/* {{{ proto mixed mb_substitute_character([mixed substchar])
Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)
@ -2026,7 +2098,7 @@ PHP_FUNCTION(mb_substitute_character)
} else {
convert_to_long_ex(arg1);
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
} else {
@ -2037,7 +2109,7 @@ PHP_FUNCTION(mb_substitute_character)
break;
default:
convert_to_long_ex(arg1);
if (Z_LVAL_P(arg1) < 0xffff && Z_LVAL_P(arg1) > 0x0) {
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
} else {
@ -3124,7 +3196,7 @@ PHP_FUNCTION(mb_strimwidth)
if (from < 0) {
from += swidth;
}
if (from < 0 || (size_t)from > str_len) {
php_error_docref(NULL, E_WARNING, "Start position is out of range");
RETURN_FALSE;

View File

@ -0,0 +1,14 @@
--TEST--
Bug #69079 (enhancement for mb_substitute_character)
--SKIPIF--
<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
--FILE--
<?php
mb_internal_encoding('UTF-8');
var_dump(mb_substitute_character(0x1f600));
mb_internal_encoding('EUC-JP-2004');
var_dump(mb_substitute_character(0x8fa1ef));
?>
--EXPECT--
bool(true)
bool(true)