Fixed bug #73655 Spoofchecker::isSuspicious behavior change due to upstream changes

There are significant changes in the spoof checking reflecting
http://www.unicode.org/reports/tr39/tr39-15.html and relying on the
restriction levels. ICU 58+ removes WSC and MSC handling and otherwise
undergoes big changes in both code and data areas. Keep up with the
basic points for now, as we need to move forward and provide an
acceptable experience to PHP users linking to a newer ICU. The most
distros ATM don't provide ICU > 57.1, though. We'll need for sure to
keep up with the BC breach in ICU 58+ in possible further aspects.
This commit is contained in:
Anatol Belski 2017-10-20 19:14:22 +02:00
parent 2198d38cbc
commit f95063647c
3 changed files with 41 additions and 0 deletions

View File

@ -78,4 +78,6 @@ extern zend_class_entry *Spoofchecker_ce_ptr;
RETURN_FALSE; \ RETURN_FALSE; \
} \ } \
#define SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL USPOOF_MODERATELY_RESTRICTIVE
#endif // #ifndef SPOOFCHECKER_CLASS_H #endif // #ifndef SPOOFCHECKER_CLASS_H

View File

@ -43,12 +43,25 @@ PHP_METHOD(Spoofchecker, __construct)
co->uspoof = uspoof_open(SPOOFCHECKER_ERROR_CODE_P(co)); co->uspoof = uspoof_open(SPOOFCHECKER_ERROR_CODE_P(co));
INTL_METHOD_CHECK_STATUS(co, "spoofchecker: unable to open ICU Spoof Checker"); INTL_METHOD_CHECK_STATUS(co, "spoofchecker: unable to open ICU Spoof Checker");
#if U_ICU_VERSION_MAJOR_NUM >= 58
/* TODO save it into the object for further suspiction check comparison. */
/* ICU 58 removes WSC and MSC handling. However there are restriction
levels as defined in
http://www.unicode.org/reports/tr39/tr39-15.html#Restriction_Level_Detection
and the default is high restrictive. However the moderately restrictive
level is what seems to correspond to the setting below applicable to
ICU < 58. In further, we might want to utilize uspoof_check2 APIs when
it became stable, to use extended check result APIs. Subsequent changes
in the unicode security algos are to be watched.*/
uspoof_setRestrictionLevel(co->uspoof, SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL);
#else
/* Single-script enforcement is on by default. This fails for languages /* Single-script enforcement is on by default. This fails for languages
like Japanese that legally use multiple scripts within a single word, like Japanese that legally use multiple scripts within a single word,
so we turn it off. so we turn it off.
*/ */
checks = uspoof_getChecks(co->uspoof, SPOOFCHECKER_ERROR_CODE_P(co)); checks = uspoof_getChecks(co->uspoof, SPOOFCHECKER_ERROR_CODE_P(co));
uspoof_setChecks(co->uspoof, checks & ~USPOOF_SINGLE_SCRIPT, SPOOFCHECKER_ERROR_CODE_P(co)); uspoof_setChecks(co->uspoof, checks & ~USPOOF_SINGLE_SCRIPT, SPOOFCHECKER_ERROR_CODE_P(co));
#endif
zend_restore_error_handling(&error_handling); zend_restore_error_handling(&error_handling);
} }
/* }}} */ /* }}} */

View File

@ -0,0 +1,26 @@
--TEST--
spoofchecker suspicious character checker
--SKIPIF--
<?php if(!extension_loaded('intl') || !class_exists("Spoofchecker")) print 'skip'; ?>
<?php if (version_compare(INTL_ICU_VERSION, '57.1') < 0) die('skip for ICU >= 58.1'); ?>
--FILE--
<?php
echo "paypal with Cyrillic spoof characters\n";
$x = new Spoofchecker();
var_dump($x->isSuspicious("http://www.payp\u{0430}l.com"));
var_dump($x->isSuspicious("\u{041F}aypal.com"));
echo "certain all-uppercase Latin sequences can be spoof of Greek\n";
$x = new Spoofchecker();
$x->setAllowedLocales("gr_GR");
var_dump($x->isSuspicious("NAPKIN PEZ"));
var_dump($x->isSuspicious("napkin pez"));
?>
--EXPECTF--
paypal with Cyrillic spoof characters
bool(true)
bool(true)
certain all-uppercase Latin sequences can be spoof of Greek
bool(true)
bool(true)