Remove string length limit from levenshtein()

As noted on https://bugs.php.net/bug.php?id=80073, I don't think
having this limitation makes sense. The similar_text() function
has much worse asymptotic complexity than levenshtein() and does
not enforce such a limitation. levenshtein() does have fairly high
memory requirements, but they are a fixed factor of the string
length (and subject to memory limit).
This commit is contained in:
Nikita Popov 2020-10-06 17:12:41 +02:00
parent 240d06118c
commit 6a8c094e2d
2 changed files with 6 additions and 18 deletions

View File

@ -17,8 +17,6 @@
#include "php.h"
#include "php_string.h"
#define LEVENSHTEIN_MAX_LENGTH 255
/* {{{ reference_levdist
* reference implementation, only optimized for memory usage, not speed */
static zend_long reference_levdist(const zend_string *string1, const zend_string *string2, zend_long cost_ins, zend_long cost_rep, zend_long cost_del )
@ -75,24 +73,12 @@ PHP_FUNCTION(levenshtein)
zend_long cost_ins = 1;
zend_long cost_rep = 1;
zend_long cost_del = 1;
zend_long distance = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS|lll", &string1, &string2, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
RETURN_THROWS();
}
if (ZSTR_LEN(string1) > LEVENSHTEIN_MAX_LENGTH) {
zend_argument_value_error(1, "must be less than %d characters", LEVENSHTEIN_MAX_LENGTH + 1);
RETURN_THROWS();
}
if (ZSTR_LEN(string2) > LEVENSHTEIN_MAX_LENGTH) {
zend_argument_value_error(2, "must be less than %d characters", LEVENSHTEIN_MAX_LENGTH + 1);
RETURN_THROWS();
}
distance = reference_levdist(string1, string2, cost_ins, cost_rep, cost_del);
RETURN_LONG(distance);
RETURN_LONG(reference_levdist(string1, string2, cost_ins, cost_rep, cost_del));
}
/* }}} */

View File

@ -1,8 +1,10 @@
--TEST--
levenshtein() error conditions
levenshtein() former error conditions
--FILE--
<?php
// levenshtein no longer has a maximum string length limit.
echo '--- String 1 ---' . \PHP_EOL;
var_dump(levenshtein('AbcdefghijklmnopqrstuvwxyzAbcdefghijklmnopqrstuvwxyzAbcdefghijklmnopqrstuvwxyzAbcdefghijklmnopqrstuvwxyzAbcdefghijklmnopqrtsuvwxyzAbcdefghijklmnopqrtsuvwxyzAbcdefghijklmnopqrtsuvwxyzAbcdefghijklmnopqrtsuvwxyzAbcdefghijklmnopqrtsuvwxyzAbcdefghijklmnopqrtsu', 'A'));
try {
@ -25,7 +27,7 @@ try {
--EXPECT--
--- String 1 ---
int(254)
levenshtein(): Argument #1 ($string1) must be less than 256 characters
int(255)
--- String 2 ---
int(254)
levenshtein(): Argument #2 ($string2) must be less than 256 characters
int(255)