mirror of
https://github.com/php/php-src.git
synced 2024-12-02 22:34:55 +08:00
9f8bcf3f51
binary strings). # These semantics seem quite broken, by the way, as it counts words # ending or starting with dashes (-). Since this shouldn't really be # used to count words in Unicode world anyway, supporting the "broken" # behavior is okay if it helps people with migration.
249 lines
5.7 KiB
Plaintext
249 lines
5.7 KiB
Plaintext
Extension Upgrade Tracking
|
|
==========================
|
|
|
|
ext/standard
|
|
------------
|
|
Status: In Progress
|
|
|
|
array.c
|
|
-------
|
|
natsort(), natcasesort()
|
|
Params API
|
|
Either port strnatcmp() to support Unicode or maybe use ICU's
|
|
numeric collation. Update: can't seem to get the right collation
|
|
parameters to duplicate strnatcmp() functionality. Conclusion: port
|
|
to support Unicode.
|
|
|
|
string.c
|
|
--------
|
|
count_chars()
|
|
Params API. Do we really want to go through the whole Unicode table?
|
|
May need to use hashtable instead of array.
|
|
|
|
hebrev(), hebrevc()
|
|
Figure out if this is something we can use ICU for, internally.
|
|
Check with Zeev.
|
|
|
|
localeconv()
|
|
Params API, update to use *_rt_* API.
|
|
|
|
money_format()
|
|
Just IS_UNICODE support with *_rt_* API.
|
|
|
|
nl_langinfo()
|
|
Params API, otherwise leave alone
|
|
|
|
parse_str()
|
|
Params API. How do we deal with encoding of the data?
|
|
|
|
quotemeta()
|
|
Params API, IS_UNICODE upgrade
|
|
|
|
sscanf()
|
|
Params API. Rest - no idea yet.
|
|
|
|
stristr()
|
|
stripos()
|
|
strripos()
|
|
str_replace()
|
|
stri_replace()
|
|
substr_compare()
|
|
These are the problematic ones. There are a few approaches:
|
|
|
|
1. Case-fold both need and haystack and then do simple search.
|
|
|
|
2. Look at the implementation behind functions like
|
|
u_strcasecmp() and try to adapt it to a string search. The
|
|
implementation case-folds both strings incrementally. For
|
|
a search, one would want to case-fold the pattern beforehand,
|
|
but not the text in which you are searching.
|
|
|
|
3. Take the first character in the pattern and get the set of
|
|
all characters that have the same case folding (see the
|
|
UnicodeSet/USet API). Then search in the string for the
|
|
occurrence of any one of the set items (which include
|
|
strings!). Then do a case-insensitive comparison, allowing
|
|
a match that does not end with the end of the text.
|
|
|
|
The problematic cases are of course those ß->ss and similar.
|
|
|
|
All other approaches bite.
|
|
|
|
strnatcmp(), strnatcasecmp()
|
|
Params API. The rest depends on porting of strnatcmp.c
|
|
|
|
strrchr()
|
|
Needs update so that it doesn't try to find half of a surrogate
|
|
pair.
|
|
|
|
strtr()
|
|
Check on Derick's progress.
|
|
|
|
substr_replace()
|
|
Params API, test
|
|
|
|
wordwrap()
|
|
Upgrade, do wordwrapping on codepoint (or glyph ?) level, maybe use
|
|
additional whitespace chars instead of just space.
|
|
|
|
|
|
|
|
|
|
Completed
|
|
=========
|
|
|
|
array.c
|
|
-------
|
|
array_change_key_case()
|
|
array_chunk()
|
|
array_combine()
|
|
array_count_values()
|
|
array_fill()
|
|
array_filter()
|
|
array_flip()
|
|
array_key_exists()
|
|
array_keys()
|
|
array_map()
|
|
array_merge()
|
|
array_merge_recursive()
|
|
array_multisort()
|
|
array_product()
|
|
array_push(), array_pop(), array_shift(), array_unshift()
|
|
array_pad()
|
|
array_rand()
|
|
array_reduce()
|
|
array_reverse()
|
|
array_search()
|
|
array_slice()
|
|
array_splice()
|
|
array_sum()
|
|
array_values()
|
|
array_unique()
|
|
array_walk()
|
|
array_walk_recursive()
|
|
compact()
|
|
count()
|
|
extract()
|
|
in_array()
|
|
min()
|
|
max()
|
|
range()
|
|
shuffle()
|
|
|
|
end(), prev(), next(), reset(), current(), key()
|
|
|
|
sort(), rsort()
|
|
asort(), arsort()
|
|
ksort(), krsort()
|
|
usort(), uasort(), uksort()
|
|
|
|
array_diff(), array_udiff()
|
|
array_diff_assoc(), array_diff_uassoc(),
|
|
array_udiff_assoc(), array_udiff_uassoc()
|
|
array_diff_key(), array_diff_ukey()
|
|
|
|
array_intersect(), array_uintersect()
|
|
array_intersect_assoc(), array_uintersect_assoc()
|
|
array_intersect_uassoc(), array_uintersect_uassoc()
|
|
array_intersect_key(), array_intersect_ukey()
|
|
|
|
|
|
string.c
|
|
--------
|
|
addcslashes()
|
|
addslashes()
|
|
basename()
|
|
bin2hex()
|
|
chr()
|
|
chunk_split()
|
|
dirname()
|
|
explode()
|
|
implode()
|
|
levenshtein()
|
|
nl2br()
|
|
ord()
|
|
pathinfo()
|
|
range()
|
|
similar_text()
|
|
str_pad()
|
|
str_repeat()
|
|
str_rot13()
|
|
str_shuffle()
|
|
str_split()
|
|
str_word_count()
|
|
strcoll()
|
|
strcspn()
|
|
strip_tags()
|
|
stripcslashes()
|
|
stripslashes()
|
|
strpbrk()
|
|
strpos()
|
|
strrchr()
|
|
strrev()
|
|
strrpos()
|
|
strspn()
|
|
strstr()
|
|
strtok()
|
|
strtolower()
|
|
strtoupper()
|
|
substr()
|
|
substr_count()
|
|
substr_replace()
|
|
trim()
|
|
ucfirst()
|
|
ucwords()
|
|
|
|
|
|
Zend Engine
|
|
-----------
|
|
Status: In Progress
|
|
|
|
TBD:
|
|
debug_backtrace()
|
|
Completed:
|
|
class_exists()
|
|
create_function()
|
|
debug_print_backtrace()
|
|
define()
|
|
defined()
|
|
each()
|
|
error_reporting()
|
|
extension_loaded()
|
|
func_get_arg()
|
|
func_get_args()
|
|
func_num_args()
|
|
function_exists()
|
|
get_class()
|
|
get_class_methods()
|
|
get_class_vars()
|
|
get_declared_classes()
|
|
get_declared_interfaces()
|
|
get_defined_constants()
|
|
get_defined_functions()
|
|
get_defined_vars()
|
|
get_extension_funcs()
|
|
get_included_files()
|
|
get_loaded_extensions()
|
|
get_object_vars()
|
|
get_parent_class()
|
|
get_resource_type()
|
|
interface_exists()
|
|
is_a()
|
|
is_subclass_of()
|
|
method_exists()
|
|
property_exists()
|
|
restore_error_handler()
|
|
restore_exception_handler()
|
|
set_error_handler()
|
|
set_exception_handler()
|
|
strcasecmp()
|
|
strcmp()
|
|
strlen()
|
|
strncasecmp()
|
|
strncmp()
|
|
trigger_error()
|
|
zend_thread_id()
|
|
zend_version()
|
|
|
|
vim: set et ts=4 sts=4:
|