mirror of
https://github.com/php/php-src.git
synced 2024-11-29 12:53:37 +08:00
- Fix strrpos() logic (which was broken even in PHP 5).
- Optimizations for a couple of functions.
This commit is contained in:
parent
640c76969e
commit
0b34032e49
@ -2372,14 +2372,8 @@ PHP_FUNCTION(stristr)
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
if (U_IS_BMP(Z_LVAL_PP(needle))) {
|
||||
u_needle_char[needle_len++] = (UChar)Z_LVAL_PP(needle);
|
||||
u_needle_char[needle_len] = 0;
|
||||
} else {
|
||||
u_needle_char[needle_len++] = (UChar)U16_LEAD(Z_LVAL_PP(needle));
|
||||
u_needle_char[needle_len++] = (UChar)U16_TRAIL(Z_LVAL_PP(needle));
|
||||
u_needle_char[needle_len] = 0;
|
||||
}
|
||||
needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
|
||||
u_needle_char[needle_len] = 0;
|
||||
target.u = u_needle_char;
|
||||
} else {
|
||||
needle_char[needle_len++] = (char)Z_LVAL_PP(needle);
|
||||
@ -2426,7 +2420,7 @@ PHP_FUNCTION(stristr)
|
||||
Finds first occurrence of a string within another */
|
||||
PHP_FUNCTION(strstr)
|
||||
{
|
||||
void *haystack;
|
||||
zstr haystack;
|
||||
int haystack_len;
|
||||
zend_uchar haystack_type;
|
||||
zval **needle;
|
||||
@ -2450,16 +2444,16 @@ PHP_FUNCTION(strstr)
|
||||
/* haystack type determines the needle type */
|
||||
if (haystack_type == IS_UNICODE) {
|
||||
convert_to_unicode_ex(needle);
|
||||
found = zend_u_memnstr((UChar*)haystack,
|
||||
found = zend_u_memnstr(haystack.u,
|
||||
Z_USTRVAL_PP(needle),
|
||||
Z_USTRLEN_PP(needle),
|
||||
(UChar*)haystack + haystack_len);
|
||||
haystack.u + haystack_len);
|
||||
} else {
|
||||
convert_to_string_ex(needle);
|
||||
found = php_memnstr((char*)haystack,
|
||||
found = php_memnstr(haystack.s,
|
||||
Z_STRVAL_PP(needle),
|
||||
Z_STRLEN_PP(needle),
|
||||
(char*)haystack + haystack_len);
|
||||
haystack.s + haystack_len);
|
||||
}
|
||||
} else {
|
||||
convert_to_long_ex(needle);
|
||||
@ -2468,39 +2462,33 @@ PHP_FUNCTION(strstr)
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
/* supplementary codepoint values may require 2 UChar's */
|
||||
if (U_IS_BMP(Z_LVAL_PP(needle))) {
|
||||
u_needle_char[n_len++] = (UChar) Z_LVAL_PP(needle);
|
||||
u_needle_char[n_len] = 0;
|
||||
} else {
|
||||
u_needle_char[n_len++] = (UChar) U16_LEAD(Z_LVAL_PP(needle));
|
||||
u_needle_char[n_len++] = (UChar) U16_TRAIL(Z_LVAL_PP(needle));
|
||||
u_needle_char[n_len] = 0;
|
||||
}
|
||||
|
||||
found = zend_u_memnstr((UChar*)haystack,
|
||||
n_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
|
||||
u_needle_char[n_len] = 0;
|
||||
|
||||
found = zend_u_memnstr(haystack.u,
|
||||
u_needle_char,
|
||||
n_len,
|
||||
(UChar*)haystack + haystack_len);
|
||||
haystack.u + haystack_len);
|
||||
} else {
|
||||
needle_char[0] = (char) Z_LVAL_PP(needle);
|
||||
needle_char[1] = 0;
|
||||
|
||||
found = php_memnstr((char*)haystack,
|
||||
found = php_memnstr(haystack.s,
|
||||
needle_char,
|
||||
1,
|
||||
(char*)haystack + haystack_len);
|
||||
haystack.s + haystack_len);
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
switch (haystack_type) {
|
||||
case IS_UNICODE:
|
||||
found_offset = (UChar*)found - (UChar*)haystack;
|
||||
found_offset = (UChar*)found - haystack.u;
|
||||
if (part) {
|
||||
UChar *ret;
|
||||
ret = eumalloc(found_offset + 1);
|
||||
u_strncpy(ret, haystack, found_offset);
|
||||
u_strncpy(ret, haystack.u, found_offset);
|
||||
ret[found_offset] = '\0';
|
||||
RETURN_UNICODEL(ret , found_offset, 0);
|
||||
} else {
|
||||
@ -2509,11 +2497,11 @@ PHP_FUNCTION(strstr)
|
||||
break;
|
||||
|
||||
case IS_STRING:
|
||||
found_offset = (char *)found - (char *)haystack;
|
||||
found_offset = (char *)found - haystack.s;
|
||||
if (part) {
|
||||
char *ret;
|
||||
ret = emalloc(found_offset + 1);
|
||||
strncpy(ret, haystack, found_offset);
|
||||
strncpy(ret, haystack.s, found_offset);
|
||||
ret[found_offset] = '\0';
|
||||
RETURN_STRINGL(ret , found_offset, 0);
|
||||
} else {
|
||||
@ -2535,7 +2523,7 @@ PHP_FUNCTION(strstr)
|
||||
Finds position of first occurrence of a string within another */
|
||||
PHP_FUNCTION(strpos)
|
||||
{
|
||||
void *haystack;
|
||||
zstr haystack;
|
||||
int haystack_len;
|
||||
zend_uchar haystack_type;
|
||||
zval **needle;
|
||||
@ -2544,6 +2532,7 @@ PHP_FUNCTION(strpos)
|
||||
char needle_char[2];
|
||||
UChar u_needle_char[3];
|
||||
int n_len = 0;
|
||||
int32_t cu_offset = 0;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|l", &haystack,
|
||||
&haystack_len, &haystack_type, &needle, &offset) == FAILURE) {
|
||||
@ -2551,7 +2540,7 @@ PHP_FUNCTION(strpos)
|
||||
}
|
||||
|
||||
/*
|
||||
* Unicode note: it's okay to not convert offset to codepoint offset here.
|
||||
* Unicode note: it's okay to not convert offset to code unit offset here.
|
||||
* We'll just do a rough check that the offset does not exceed length in
|
||||
* code units, and leave the rest to zend_u_memnstr().
|
||||
*/
|
||||
@ -2568,25 +2557,23 @@ PHP_FUNCTION(strpos)
|
||||
|
||||
/* haystack type determines the needle type */
|
||||
if (haystack_type == IS_UNICODE) {
|
||||
int32_t cp_offset = 0;
|
||||
convert_to_unicode_ex(needle);
|
||||
/* locate the codepoint at the specified offset */
|
||||
U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset);
|
||||
found = zend_u_memnstr((UChar*)haystack + cp_offset,
|
||||
/* calculate code unit offset */
|
||||
U16_FWD_N(haystack.u, cu_offset, haystack_len, offset);
|
||||
found = zend_u_memnstr(haystack.u + cu_offset,
|
||||
Z_USTRVAL_PP(needle),
|
||||
Z_USTRLEN_PP(needle),
|
||||
(UChar*)haystack + haystack_len);
|
||||
haystack.u + haystack_len);
|
||||
} else {
|
||||
convert_to_string_ex(needle);
|
||||
found = php_memnstr((char*)haystack + offset,
|
||||
found = php_memnstr(haystack.s + offset,
|
||||
Z_STRVAL_PP(needle),
|
||||
Z_STRLEN_PP(needle),
|
||||
(char*)haystack + haystack_len);
|
||||
haystack.s + haystack_len);
|
||||
}
|
||||
} else {
|
||||
convert_to_long_ex(needle);
|
||||
if (haystack_type == IS_UNICODE) {
|
||||
int32_t cp_offset = 0;
|
||||
if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
@ -2594,30 +2581,33 @@ PHP_FUNCTION(strpos)
|
||||
n_len += zend_codepoint_to_uchar(Z_LVAL_PP(needle), u_needle_char);
|
||||
u_needle_char[n_len] = 0;
|
||||
|
||||
/* locate the codepoint at the specified offset */
|
||||
U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset);
|
||||
found = zend_u_memnstr((UChar*)haystack + cp_offset,
|
||||
/* calculate code unit offset */
|
||||
U16_FWD_N(haystack.u, cu_offset, haystack_len, offset);
|
||||
found = zend_u_memnstr(haystack.u + cu_offset,
|
||||
u_needle_char,
|
||||
n_len,
|
||||
(UChar*)haystack + haystack_len);
|
||||
haystack.u + haystack_len);
|
||||
} else {
|
||||
needle_char[0] = (char) Z_LVAL_PP(needle);
|
||||
needle_char[1] = 0;
|
||||
|
||||
found = php_memnstr((char*)haystack + offset,
|
||||
found = php_memnstr(haystack.s + offset,
|
||||
needle_char,
|
||||
1,
|
||||
(char*)haystack + haystack_len);
|
||||
haystack.s + haystack_len);
|
||||
}
|
||||
}
|
||||
|
||||
if (found) {
|
||||
if (haystack_type == IS_UNICODE) {
|
||||
/* simple subtraction will not suffice, since there may be
|
||||
supplementary codepoints */
|
||||
RETURN_LONG(u_countChar32(haystack, ((char *)found - (char *)haystack)/sizeof(UChar)));
|
||||
/* Simple subtraction will not suffice, since there may be
|
||||
supplementary codepoints. We count how many codepoints there are
|
||||
between the starting offset and the found location and add them
|
||||
to the starting codepoint offset. */
|
||||
RETURN_LONG(offset + u_countChar32(haystack.u + cu_offset,
|
||||
(UChar*)found - (haystack.u + cu_offset)));
|
||||
} else {
|
||||
RETURN_LONG((char *)found - (char *)haystack);
|
||||
RETURN_LONG((char *)found - haystack.s);
|
||||
}
|
||||
} else {
|
||||
RETURN_FALSE;
|
||||
@ -2636,7 +2626,7 @@ PHP_FUNCTION(stripos)
|
||||
void *haystack_dup = NULL, *needle_dup = NULL;
|
||||
char needle_char[2];
|
||||
char c = 0;
|
||||
UChar u_needle_char[8];
|
||||
UChar u_needle_char[3];
|
||||
void *found = NULL;
|
||||
int cu_offset = 0;
|
||||
|
||||
@ -2647,6 +2637,12 @@ PHP_FUNCTION(stripos)
|
||||
if (Z_TYPE_PP(haystack) != IS_UNICODE && Z_TYPE_PP(haystack) != IS_STRING) {
|
||||
convert_to_text_ex(haystack);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unicode note: it's okay to not convert offset to code unit offset here.
|
||||
* We'll just do a rough check that the offset does not exceed length in
|
||||
* code units, and leave the rest to zend_u_memnstr().
|
||||
*/
|
||||
if (offset < 0 || offset > Z_UNILEN_PP(haystack)) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string.");
|
||||
RETURN_FALSE;
|
||||
@ -2670,7 +2666,7 @@ PHP_FUNCTION(stripos)
|
||||
}
|
||||
needle_len = Z_UNILEN_PP(needle);
|
||||
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
|
||||
/* calculate codeunit offset */
|
||||
/* calculate code unit offset */
|
||||
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset);
|
||||
found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset, Z_USTRVAL_PP(needle), haystack_len, needle_len TSRMLS_CC);
|
||||
} else {
|
||||
@ -2683,44 +2679,20 @@ PHP_FUNCTION(stripos)
|
||||
(char *)haystack_dup + haystack_len);
|
||||
}
|
||||
} else {
|
||||
switch (Z_TYPE_PP(needle)) {
|
||||
case IS_LONG:
|
||||
case IS_BOOL:
|
||||
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
|
||||
if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
|
||||
} else {
|
||||
c = tolower((char)Z_LVAL_PP(needle));
|
||||
}
|
||||
break;
|
||||
case IS_DOUBLE:
|
||||
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
|
||||
if ((UChar32)Z_DVAL_PP(needle) < 0 || (UChar32)Z_DVAL_PP(needle) > 0x10FFFF) {
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
needle_len = zend_codepoint_to_uchar((UChar32)Z_DVAL_PP(needle), u_needle_char);
|
||||
} else {
|
||||
c = tolower((char)Z_DVAL_PP(needle));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
php_error_docref(NULL TSRMLS_CC, E_WARNING, "needle is not a string or an integer.");
|
||||
RETURN_FALSE;
|
||||
break;
|
||||
|
||||
}
|
||||
convert_to_long_ex(needle);
|
||||
if (Z_TYPE_PP(haystack) == IS_UNICODE) {
|
||||
/* calculate codeunit offset */
|
||||
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset);
|
||||
if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) {
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(needle), u_needle_char);
|
||||
u_needle_char[needle_len] = 0;
|
||||
/* calculate code unit offset */
|
||||
U16_FWD_N(Z_USTRVAL_PP(haystack), cu_offset, haystack_len, offset);
|
||||
found = php_u_stristr(Z_USTRVAL_PP(haystack) + cu_offset,
|
||||
u_needle_char, haystack_len, needle_len TSRMLS_CC);
|
||||
|
||||
} else {
|
||||
c = tolower((char)Z_LVAL_PP(needle));
|
||||
needle_char[0] = c;
|
||||
needle_char[1] = '\0';
|
||||
haystack_dup = estrndup(Z_STRVAL_PP(haystack), haystack_len);
|
||||
@ -2767,6 +2739,7 @@ PHP_FUNCTION(strrpos)
|
||||
long offset = 0;
|
||||
char *p, *e, ord_needle[2];
|
||||
UChar *pos, *u_p, *u_e, u_ord_needle[3];
|
||||
int cu_offset = 0;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZ|l",
|
||||
&zhaystack, &zneedle, &offset) == FAILURE) {
|
||||
@ -2790,14 +2763,8 @@ PHP_FUNCTION(strrpos)
|
||||
php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
if (U_IS_BMP(Z_LVAL_PP(zneedle))) {
|
||||
u_ord_needle[needle_len++] = (UChar)Z_LVAL_PP(zneedle);
|
||||
u_ord_needle[needle_len] = 0;
|
||||
} else {
|
||||
u_ord_needle[needle_len++] = (UChar)U16_LEAD(Z_LVAL_PP(zneedle));
|
||||
u_ord_needle[needle_len++] = (UChar)U16_TRAIL(Z_LVAL_PP(zneedle));
|
||||
u_ord_needle[needle_len] = 0;
|
||||
}
|
||||
needle_len = zend_codepoint_to_uchar((UChar32)Z_LVAL_PP(zneedle), u_ord_needle);
|
||||
u_ord_needle[needle_len] = 0;
|
||||
needle.u = u_ord_needle;
|
||||
} else {
|
||||
convert_to_long_ex(zneedle);
|
||||
@ -2810,40 +2777,60 @@ PHP_FUNCTION(strrpos)
|
||||
haystack = Z_UNIVAL_PP(zhaystack);
|
||||
haystack_len = Z_UNILEN_PP(zhaystack);
|
||||
|
||||
if ((haystack_len == 0) || (needle_len == 0)) {
|
||||
if ((haystack_len == 0) || (needle_len == 0) || needle_len > haystack_len) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
if (Z_TYPE_PP(zhaystack) == IS_UNICODE) {
|
||||
if (offset >= 0) {
|
||||
u_p = haystack.u + offset;
|
||||
U16_FWD_N(haystack.u, cu_offset, haystack_len, offset);
|
||||
if (cu_offset > haystack_len - needle_len) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
u_p = haystack.u + cu_offset;
|
||||
u_e = haystack.u + haystack_len - needle_len;
|
||||
} else {
|
||||
u_p = haystack.u;
|
||||
if (-offset > haystack_len) {
|
||||
u_e = haystack.u - needle_len;
|
||||
} else if (needle_len > -offset) {
|
||||
u_e = haystack.u + haystack_len - needle_len;
|
||||
RETURN_FALSE;
|
||||
} else {
|
||||
u_e = haystack.u + haystack_len + offset;
|
||||
cu_offset = haystack_len;
|
||||
U16_BACK_N(haystack.u, 0, cu_offset, -offset);
|
||||
if (cu_offset == 0) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
if (needle_len > haystack_len - cu_offset) {
|
||||
u_e = haystack.u + haystack_len - needle_len;
|
||||
} else {
|
||||
u_e = haystack.u + cu_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pos = u_strFindLast(u_p, u_e-u_p+needle_len, needle.u, needle_len);
|
||||
if (pos) {
|
||||
RETURN_LONG(pos - haystack.u);
|
||||
if (offset > 0) {
|
||||
RETURN_LONG(offset + u_countChar32(u_p, (UChar*)pos - u_p));
|
||||
} else {
|
||||
RETURN_LONG(u_countChar32(haystack.u, (UChar*)pos - haystack.u));
|
||||
}
|
||||
} else {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
} else {
|
||||
if (offset >= 0) {
|
||||
if (offset > haystack_len) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
p = haystack.s + offset;
|
||||
e = haystack.s + haystack_len - needle_len;
|
||||
} else {
|
||||
p = haystack.s;
|
||||
if (-offset > haystack_len) {
|
||||
e = haystack.s - needle_len;
|
||||
} else if (needle_len > -offset) {
|
||||
RETURN_FALSE;
|
||||
}
|
||||
|
||||
p = haystack.s;
|
||||
if (needle_len > -offset) {
|
||||
e = haystack.s + haystack_len - needle_len;
|
||||
} else {
|
||||
e = haystack.s + haystack_len + offset;
|
||||
|
@ -26,7 +26,6 @@ ext/standard
|
||||
sscanf()
|
||||
Params API. Rest - no idea yet.
|
||||
|
||||
stristr()
|
||||
strripos()
|
||||
str_replace()
|
||||
stri_replace()
|
||||
@ -158,6 +157,7 @@ ext/standard
|
||||
stripcslashes()
|
||||
stripslashes()
|
||||
stripos()
|
||||
stristr()
|
||||
strpbrk()
|
||||
strpos()
|
||||
strrchr()
|
||||
|
Loading…
Reference in New Issue
Block a user