Implement #77726: Allow null character in regex patterns

In 8b3c1a3, this was disallowed to fix #55856, which was a security
issue caused by the /e modifier. The fix that was made was the
"Easier fix" as described in the original report.

With this fix, pattern strings are no longer treated as null terminated,
so null characters can be placed inside and matched against with regex
patterns without security problems, so there is no longer a reason to
give the error. Allowing this is consistent with the behaviour of many
other languages, including JavaScript, and thanks to PCRE2[0], it does
not require manually escaping null characters. Now that we can avoid the
error here without the cost of escaping characters, there is really no
need anymore to stray here from the conventional behaviour.

Currently, null characters are still disallowed before the first
delimiter and in the options section at the end of a regex string, but
these error messages have been updated.

[0] Since PCRE2, pattern strings no longer have to be null terminated,
and raw null characters match as normal.

Closes GH-8114.
This commit is contained in:
tobil4sk 2022-04-24 20:52:53 +02:00 committed by Christoph M. Becker
parent 98a4ab2266
commit 5bb3e233db
No known key found for this signature in database
GPG Key ID: D66C9593118BCCB6
14 changed files with 169 additions and 56 deletions

3
NEWS
View File

@ -24,6 +24,9 @@ GD:
- ODBC:
. Fixed handling of single-key connection strings. (Calvin Buckley)
- PCRE:
. Implemented FR #77726 (Allow null character in regex patterns). (cmb)
- PDO_ODBC:
. Fixed handling of single-key connection strings. (Calvin Buckley)

View File

@ -221,6 +221,9 @@ PHP 8.2 UPGRADE NOTES
- OCI8:
. The minimum Oracle Client library version required is now 11.2.
- PCRE:
. NUL characters (\0) in pattern strings are now supported.
- SQLite3:
. sqlite3.defensive is now PHP_INI_USER.

View File

@ -624,7 +624,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
pcre_cache_entry new_entry;
int rc;
zend_string *key;
pcre_cache_entry *ret;
pcre_cache_entry *ret;
if (locale_aware && BG(ctype_string)) {
key = zend_string_concat2(
@ -645,16 +645,16 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
p = ZSTR_VAL(regex);
const char* end_p = ZSTR_VAL(regex) + ZSTR_LEN(regex);
/* Parse through the leading whitespace, and display a warning if we
get to the end without encountering a delimiter. */
while (isspace((int)*(unsigned char *)p)) p++;
if (*p == 0) {
if (p >= end_p) {
if (key != regex) {
zend_string_release_ex(key, 0);
}
php_error_docref(NULL, E_WARNING,
p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
php_error_docref(NULL, E_WARNING, "Empty regular expression");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@ -662,11 +662,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
/* Get the delimiter and display a warning if it is alphanumeric
or a backslash. */
delimiter = *p++;
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\' || delimiter == '\0') {
if (key != regex) {
zend_string_release_ex(key, 0);
}
php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric, backslash, or NUL");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@ -682,8 +682,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
/* We need to iterate through the pattern, searching for the ending delimiter,
but skipping the backslashed delimiters. If the ending delimiter is not
found, display a warning. */
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
while (pp < end_p) {
if (*pp == '\\' && pp + 1 < end_p) pp++;
else if (*pp == delimiter)
break;
pp++;
@ -695,8 +695,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
* reach the end of the pattern without matching, display a warning.
*/
int brackets = 1; /* brackets nesting level */
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
while (pp < end_p) {
if (*pp == '\\' && pp + 1 < end_p) pp++;
else if (*pp == end_delimiter && --brackets <= 0)
break;
else if (*pp == start_delimiter)
@ -705,13 +705,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
}
if (*pp == 0) {
if (pp >= end_p) {
if (key != regex) {
zend_string_release_ex(key, 0);
}
if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
} else if (start_delimiter == end_delimiter) {
if (start_delimiter == end_delimiter) {
php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
} else {
php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
@ -729,7 +727,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
/* Parse through the options, setting appropriate flags. Display
a warning if we encounter an unknown modifier. */
while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
while (pp < end_p) {
switch (*pp++) {
/* Perl compatible options */
case 'i': coptions |= PCRE2_CASELESS; break;
@ -764,9 +762,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
default:
if (pp[-1]) {
php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
php_error_docref(NULL, E_WARNING, "Unknown modifier '%c'", pp[-1]);
} else {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
php_error_docref(NULL, E_WARNING, "NUL is not a valid modifier");
}
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
@ -2438,12 +2436,6 @@ PHP_FUNCTION(preg_replace_callback_array)
}
ZEND_HASH_FOREACH_STR_KEY_VAL(pattern, str_idx_regex, replace) {
if (!str_idx_regex) {
php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
RETVAL_NULL();
goto error;
}
if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
zend_argument_type_error(1, "must contain only valid callbacks");
goto error;

View File

@ -21,5 +21,5 @@ var_dump(preg_replace_callback_array(
), 'a'));
?>
--EXPECTF--
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric or backslash in %sbug73392.php on line %d
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %sbug73392.php on line %d
NULL

View File

@ -12,6 +12,7 @@ var_dump(preg_match('~a', ''));
var_dump(preg_match('@\@\@@', '@@'));
var_dump(preg_match('//z', '@@'));
var_dump(preg_match('{', ''));
var_dump(preg_match("\0\0", ''));
?>
--EXPECTF--
@ -22,7 +23,7 @@ Warning: preg_match(): Empty regular expression in %sdelimiters.php on line 4
bool(false)
int(1)
Warning: preg_match(): Delimiter must not be alphanumeric or backslash in %sdelimiters.php on line 6
Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %sdelimiters.php on line 6
bool(false)
int(1)
@ -35,3 +36,6 @@ bool(false)
Warning: preg_match(): No ending matching delimiter '}' found in %sdelimiters.php on line 11
bool(false)
Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %sdelimiters.php on line 12
bool(false)

View File

@ -3,40 +3,64 @@ Zero byte test
--FILE--
<?php
preg_match("\0//i", "");
preg_match("/\0/i", "");
preg_match("//\0i", "");
preg_match("//i\0", "");
preg_match("/\\\0/i", "");
var_dump(preg_match("\0//i", ""));
var_dump(preg_match("/\0/i", ""));
var_dump(preg_match("/\0/i", "\0"));
var_dump(preg_match("//\0i", ""));
var_dump(preg_match("//i\0", ""));
var_dump(preg_match("/\\\0/i", ""));
var_dump(preg_match("/\\\0/i", "\\\0"));
preg_match("\0[]i", "");
preg_match("[\0]i", "");
preg_match("[]\0i", "");
preg_match("[]i\0", "");
preg_match("[\\\0]i", "");
var_dump(preg_match("\0[]i", ""));
var_dump(preg_match("[\0]i", ""));
var_dump(preg_match("[\0]i", "\0"));
var_dump(preg_match("[]\0i", ""));
var_dump(preg_match("[]i\0", ""));
var_dump(preg_match("[\\\0]i", ""));
var_dump(preg_match("[\\\0]i", "\\\0"));
var_dump(preg_match("/abc\0def/", "abc"));
var_dump(preg_match("/abc\0def/", "abc\0def"));
var_dump(preg_match("/abc\0def/", "abc\0fed"));
var_dump(preg_match("[abc\0def]", "abc"));
var_dump(preg_match("[abc\0def]", "abc\0def"));
var_dump(preg_match("[abc\0def]", "abc\0fed"));
preg_replace("/foo/e\0/i", "echo('Eek');", "");
?>
--EXPECTF--
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 3
Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %snull_bytes.php on line 3
bool(false)
int(0)
int(1)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 4
Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 6
bool(false)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 5
Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 7
bool(false)
int(0)
int(1)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 6
Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %snull_bytes.php on line 11
bool(false)
int(0)
int(1)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 7
Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 14
bool(false)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 9
Warning: preg_match(): NUL is not a valid modifier in %snull_bytes.php on line 15
bool(false)
int(0)
int(1)
int(0)
int(1)
int(0)
int(0)
int(1)
int(0)
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 10
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 11
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 12
Warning: preg_match(): Null byte in regex in %snull_bytes.php on line 13
Warning: preg_replace(): Null byte in regex in %snull_bytes.php on line 15
Warning: preg_replace(): NUL is not a valid modifier in %snull_bytes.php on line 27

View File

@ -37,7 +37,7 @@ echo "Done"
Arg value is abcdef
Warning: preg_grep(): Delimiter must not be alphanumeric or backslash in %spreg_grep_error1.php on line %d
Warning: preg_grep(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_grep_error1.php on line %d
bool(false)
Arg value is /[a-zA-Z]

View File

@ -38,7 +38,7 @@ var_dump($matches);
Arg value is abcdef
Warning: preg_match_all(): Delimiter must not be alphanumeric or backslash in %spreg_match_all_error1.php on line %d
Warning: preg_match_all(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_match_all_error1.php on line %d
bool(false)
NULL

View File

@ -34,7 +34,7 @@ try {
Arg value is abcdef
Warning: preg_match(): Delimiter must not be alphanumeric or backslash in %spreg_match_error1.php on line %d
Warning: preg_match(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_match_error1.php on line %d
bool(false)
Arg value is /[a-zA-Z]

View File

@ -0,0 +1,66 @@
--TEST--
preg_replace_callback_array() errors
--FILE--
<?php
function b() {
return "b";
}
// empty strings
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"" => function () { return "ok"; }), 'a'));
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
null => function () { return "ok"; }), 'a'));
// backslashes
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"\\b\\" => function () { return "ok"; }), 'a'));
// alphanumeric delimiters
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"aba" => function () { return "ok"; }), 'a'));
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"1b1" => function () { return "ok"; }), 'a'));
// null character delimiter
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"\0b\0" => function () { return "ok"; }), 'a'));
?>
--EXPECTF--
Warning: preg_replace_callback_array(): Empty regular expression in %spreg_replace_callback_array_error.php on line 12
NULL
Warning: preg_replace_callback_array(): Empty regular expression in %spreg_replace_callback_array_error.php on line 17
NULL
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 24
NULL
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 31
NULL
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 36
NULL
Warning: preg_replace_callback_array(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_callback_array_error.php on line 43
NULL

View File

@ -0,0 +1,21 @@
--TEST--
preg_replace_callback_array() invalid callable
--FILE--
<?php
function b() {
return "b";
}
// invalid callable
var_dump(preg_replace_callback_array(
array(
"/a/" => 'b',
"/b/" => 'invalid callable'), 'a'));
--EXPECTF--
Fatal error: Uncaught TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only valid callbacks in %spreg_replace_callback_array_fatal_error.php:11
Stack trace:
#0 %spreg_replace_callback_array_fatal_error.php(11): preg_replace_callback_array(Array, 'a')
#1 {main}
thrown in %spreg_replace_callback_array_fatal_error.php on line 11

View File

@ -30,7 +30,7 @@ foreach($regex_array as $regex_value) {
Arg value is abcdef
Warning: preg_replace_callback(): Delimiter must not be alphanumeric or backslash in %s on line %d
Warning: preg_replace_callback(): Delimiter must not be alphanumeric, backslash, or NUL in %s on line %d
NULL
Arg value is /[a-zA-Z]

View File

@ -33,7 +33,7 @@ try {
Arg value is abcdef
Warning: preg_replace(): Delimiter must not be alphanumeric or backslash in %spreg_replace_error1.php on line %d
Warning: preg_replace(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_replace_error1.php on line %d
NULL
Arg value is /[a-zA-Z]

View File

@ -36,7 +36,7 @@ try {
Arg value is abcdef
Warning: preg_split(): Delimiter must not be alphanumeric or backslash in %spreg_split_error1.php on line %d
Warning: preg_split(): Delimiter must not be alphanumeric, backslash, or NUL in %spreg_split_error1.php on line %d
bool(false)
Arg value is /[a-zA-Z]