mirror of
https://github.com/php/php-src.git
synced 2024-12-15 21:05:51 +08:00
Move utf8_encode and utf8_decode to ext/standard
This commit is contained in:
parent
a5251f78f8
commit
1a512eed44
@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
|
||||
ZEND_ARG_INFO(0, length)
|
||||
ZEND_ARG_INFO(0, case_sensitivity)
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
|
||||
ZEND_ARG_INFO(0, data)
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
|
||||
ZEND_ARG_INFO(0, data)
|
||||
ZEND_END_ARG_INFO()
|
||||
/* }}} */
|
||||
/* {{{ syslog.c */
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */
|
||||
PHP_FE(str_split, arginfo_str_split)
|
||||
PHP_FE(strpbrk, arginfo_strpbrk)
|
||||
PHP_FE(substr_compare, arginfo_substr_compare)
|
||||
PHP_FE(utf8_encode, arginfo_utf8_encode)
|
||||
PHP_FE(utf8_decode, arginfo_utf8_decode)
|
||||
|
||||
#ifdef HAVE_STRCOLL
|
||||
PHP_FE(strcoll, arginfo_strcoll)
|
||||
|
@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count);
|
||||
PHP_FUNCTION(str_split);
|
||||
PHP_FUNCTION(strpbrk);
|
||||
PHP_FUNCTION(substr_compare);
|
||||
PHP_FUNCTION(utf8_encode);
|
||||
PHP_FUNCTION(utf8_decode);
|
||||
#ifdef HAVE_STRCOLL
|
||||
PHP_FUNCTION(strcoll);
|
||||
#endif
|
||||
|
@ -64,6 +64,8 @@
|
||||
|
||||
/* For str_getcsv() support */
|
||||
#include "ext/standard/file.h"
|
||||
/* For php_next_utf8_char() */
|
||||
#include "ext/standard/html.h"
|
||||
|
||||
#define STR_PAD_LEFT 0
|
||||
#define STR_PAD_RIGHT 1
|
||||
@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare)
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ */
|
||||
static zend_string *php_utf8_encode(const char *s, size_t len)
|
||||
{
|
||||
size_t pos = len;
|
||||
zend_string *str;
|
||||
unsigned char c;
|
||||
|
||||
str = zend_string_safe_alloc(len, 2, 0, 0);
|
||||
ZSTR_LEN(str) = 0;
|
||||
while (pos > 0) {
|
||||
/* The lower 256 codepoints of Unicode are identical to Latin-1,
|
||||
* so we don't need to do any mapping here. */
|
||||
c = (unsigned char)(*s);
|
||||
if (c < 0x80) {
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
|
||||
/* We only account for the single-byte and two-byte cases because
|
||||
* we're only dealing with the first 256 Unicode codepoints. */
|
||||
} else {
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
|
||||
}
|
||||
pos--;
|
||||
s++;
|
||||
}
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
|
||||
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
|
||||
return str;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ */
|
||||
static zend_string *php_utf8_decode(const char *s, size_t len)
|
||||
{
|
||||
size_t pos = 0;
|
||||
unsigned int c;
|
||||
zend_string *str;
|
||||
|
||||
str = zend_string_alloc(len, 0);
|
||||
ZSTR_LEN(str) = 0;
|
||||
while (pos < len) {
|
||||
int status = FAILURE;
|
||||
c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
|
||||
|
||||
/* The lower 256 codepoints of Unicode are identical to Latin-1,
|
||||
* so we don't need to do any mapping here beyond replacing non-Latin-1
|
||||
* characters. */
|
||||
if (status == FAILURE || c > 0xFFU) {
|
||||
c = '?';
|
||||
}
|
||||
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
|
||||
}
|
||||
ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
|
||||
if (ZSTR_LEN(str) < len) {
|
||||
str = zend_string_truncate(str, ZSTR_LEN(str), 0);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
/* {{{ proto string utf8_encode(string data)
|
||||
Encodes an ISO-8859-1 string to UTF-8 */
|
||||
PHP_FUNCTION(utf8_encode)
|
||||
{
|
||||
char *arg;
|
||||
size_t arg_len;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
RETURN_STR(php_utf8_encode(arg, arg_len));
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto string utf8_decode(string data)
|
||||
Converts a UTF-8 encoded string to ISO-8859-1 */
|
||||
PHP_FUNCTION(utf8_decode)
|
||||
{
|
||||
char *arg;
|
||||
size_t arg_len;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
RETURN_STR(php_utf8_decode(arg, arg_len));
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
|
@ -1,10 +1,5 @@
|
||||
--TEST--
|
||||
Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string)
|
||||
--SKIPIF--
|
||||
<?php
|
||||
require_once("skipif.inc");
|
||||
if (!extension_loaded('xml')) die ("skip xml extension not available");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
echo utf8_decode('abc'.chr(0xe0));
|
@ -1,10 +1,5 @@
|
||||
--TEST--
|
||||
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
|
||||
--SKIPIF--
|
||||
<?php
|
||||
require_once("skipif.inc");
|
||||
if (!extension_loaded('xml')) die ("skip xml extension not available");
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
|
@ -1,7 +1,5 @@
|
||||
--TEST--
|
||||
UTF-8<->ISO Latin 1 encoding/decoding test
|
||||
--SKIPIF--
|
||||
<?php include("skipif.inc"); ?>
|
||||
--FILE--
|
||||
<?php
|
||||
printf("%s -> %s\n", urlencode("æ"), urlencode(utf8_encode("æ")));
|
@ -1,16 +1,10 @@
|
||||
--TEST--
|
||||
Test utf8_decode() function : error conditions
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded("xml")) {
|
||||
print "skip - XML extension not loaded";
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
/* Prototype : proto string utf8_decode(string data)
|
||||
* Description: Converts a UTF-8 encoded string to ISO-8859-1
|
||||
* Source code: ext/xml/xml.c
|
||||
* Source code: ext/standard/string.c
|
||||
* Alias to functions:
|
||||
*/
|
||||
|
@ -1,16 +1,10 @@
|
||||
--TEST--
|
||||
Test utf8_decode() function : usage variations - different types for data
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded("xml")) {
|
||||
print "skip - XML extension not loaded";
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
/* Prototype : proto string utf8_decode(string data)
|
||||
* Description: Converts a UTF-8 encoded string to ISO-8859-1
|
||||
* Source code: ext/xml/xml.c
|
||||
* Source code: ext/standard/string.c
|
||||
* Alias to functions:
|
||||
*/
|
||||
|
@ -1,16 +1,10 @@
|
||||
--TEST--
|
||||
Test utf8_encode() function : error conditions
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded("xml")) {
|
||||
print "skip - XML extension not loaded";
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
/* Prototype : proto string utf8_encode(string data)
|
||||
* Description: Encodes an ISO-8859-1 string to UTF-8
|
||||
* Source code: ext/xml/xml.c
|
||||
* Source code: ext/standard/string.c
|
||||
* Alias to functions:
|
||||
*/
|
||||
|
@ -1,16 +1,10 @@
|
||||
--TEST--
|
||||
Test utf8_encode() function : usage variations - <type here specifics of this variation>
|
||||
--SKIPIF--
|
||||
<?php
|
||||
if (!extension_loaded("xml")) {
|
||||
print "skip - XML extension not loaded";
|
||||
}
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
/* Prototype : proto string utf8_encode(string data)
|
||||
* Description: Encodes an ISO-8859-1 string to UTF-8
|
||||
* Source code: ext/xml/xml.c
|
||||
* Source code: ext/standard/string.c
|
||||
* Alias to functions:
|
||||
*/
|
||||
|
@ -212,14 +212,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2)
|
||||
ZEND_ARG_INFO(0, option)
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
|
||||
ZEND_ARG_INFO(0, data)
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
|
||||
ZEND_ARG_INFO(0, data)
|
||||
ZEND_END_ARG_INFO()
|
||||
|
||||
const zend_function_entry xml_functions[] = {
|
||||
PHP_FE(xml_parser_create, arginfo_xml_parser_create)
|
||||
PHP_FE(xml_parser_create_ns, arginfo_xml_parser_create_ns)
|
||||
@ -243,8 +235,6 @@ const zend_function_entry xml_functions[] = {
|
||||
PHP_FE(xml_parser_free, arginfo_xml_parser_free)
|
||||
PHP_FE(xml_parser_set_option, arginfo_xml_parser_set_option)
|
||||
PHP_FE(xml_parser_get_option, arginfo_xml_parser_get_option)
|
||||
PHP_FE(utf8_encode, arginfo_utf8_encode)
|
||||
PHP_FE(utf8_decode, arginfo_utf8_decode)
|
||||
PHP_FE_END
|
||||
};
|
||||
|
||||
@ -1667,46 +1657,6 @@ PHP_FUNCTION(xml_parser_get_option)
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto string utf8_encode(string data)
|
||||
Encodes an ISO-8859-1 string to UTF-8 */
|
||||
PHP_FUNCTION(utf8_encode)
|
||||
{
|
||||
char *arg;
|
||||
size_t arg_len;
|
||||
zend_string *encoded;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
encoded = xml_utf8_encode(arg, arg_len, (XML_Char*)"ISO-8859-1");
|
||||
if (encoded == NULL) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
RETURN_STR(encoded);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
/* {{{ proto string utf8_decode(string data)
|
||||
Converts a UTF-8 encoded string to ISO-8859-1 */
|
||||
PHP_FUNCTION(utf8_decode)
|
||||
{
|
||||
char *arg;
|
||||
size_t arg_len;
|
||||
zend_string *decoded;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
decoded = xml_utf8_decode((XML_Char*)arg, arg_len, (XML_Char*)"ISO-8859-1");
|
||||
if (decoded == NULL) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
RETURN_STR(decoded);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user