diff --git a/ext/mbstring/libmbfl/filters/mbfilter_7bit.c b/ext/mbstring/libmbfl/filters/mbfilter_7bit.c index 54744aa4b8e..f4367261c38 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_7bit.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_7bit.c @@ -65,7 +65,8 @@ const mbfl_encoding mbfl_encoding_7bit = { &vtbl_wchar_7bit, mb_7bit_to_wchar, mb_wchar_to_7bit, - NULL + NULL, + NULL, }; #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_base64.c b/ext/mbstring/libmbfl/filters/mbfilter_base64.c index b5a732224f0..a2e51067494 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_base64.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_base64.c @@ -45,7 +45,8 @@ const mbfl_encoding mbfl_encoding_base64 = { NULL, mb_base64_to_wchar, mb_wchar_to_base64, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_8bit_b64 = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c index 13635764326..7c312cb68e6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -4392,7 +4392,8 @@ const mbfl_encoding mbfl_encoding_jis = { &vtbl_wchar_jis, mb_iso2022jp_to_wchar, mb_wchar_to_jis, - mb_check_jis + mb_check_jis, + NULL, }; static const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { @@ -4426,7 +4427,8 @@ const mbfl_encoding mbfl_encoding_2022jp = { &vtbl_wchar_2022jp, mb_iso2022jp_to_wchar, mb_wchar_to_iso2022jp, - mb_check_iso2022jp + mb_check_iso2022jp, + NULL, }; static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; @@ -4462,7 +4464,8 @@ const mbfl_encoding mbfl_encoding_2022jp_kddi = { &vtbl_wchar_2022jp_kddi, mb_iso2022jp_kddi_to_wchar, mb_wchar_to_iso2022jp_kddi, - NULL + NULL, + NULL, }; static const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { @@ -4496,7 +4499,8 @@ const mbfl_encoding mbfl_encoding_2022jp_2004 = { &vtbl_wchar_2022jp_2004, mb_iso2022jp2004_to_wchar, mb_wchar_to_iso2022jp2004, - NULL + NULL, + NULL, }; /* Previously, a dubious 'encoding' called 'cp50220raw' was supported @@ -4581,7 +4585,8 @@ const mbfl_encoding mbfl_encoding_cp50220 = { &vtbl_wchar_cp50220, mb_cp5022x_to_wchar, mb_wchar_to_cp50220, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_cp50221 = { @@ -4595,7 +4600,8 @@ const mbfl_encoding mbfl_encoding_cp50221 = { &vtbl_wchar_cp50221, mb_cp5022x_to_wchar, mb_wchar_to_cp50221, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_cp50222 = { @@ -4609,7 +4615,8 @@ const mbfl_encoding mbfl_encoding_cp50222 = { &vtbl_wchar_cp50222, mb_cp5022x_to_wchar, mb_wchar_to_cp50222, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; @@ -4645,7 +4652,8 @@ const mbfl_encoding mbfl_encoding_2022jpms = { &vtbl_wchar_2022jpms, mb_iso2022jpms_to_wchar, mb_wchar_to_iso2022jpms, - NULL + NULL, + NULL, }; /* ISO-2022-KR is defined in RFC 1557 @@ -4687,7 +4695,8 @@ const mbfl_encoding mbfl_encoding_2022kr = { &vtbl_wchar_2022kr, mb_iso2022kr_to_wchar, mb_wchar_to_iso2022kr, - NULL + NULL, + NULL, }; /* @@ -7832,7 +7841,8 @@ const mbfl_encoding mbfl_encoding_sjis = { &vtbl_wchar_sjis, mb_sjis_to_wchar, mb_wchar_to_sjis, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL}; @@ -7868,7 +7878,8 @@ const mbfl_encoding mbfl_encoding_sjis_mac = { &vtbl_wchar_sjis_mac, mb_sjismac_to_wchar, mb_wchar_to_sjismac, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; @@ -7906,7 +7917,8 @@ const mbfl_encoding mbfl_encoding_sjis_docomo = { &vtbl_wchar_sjis_docomo, mb_sjis_docomo_to_wchar, mb_wchar_to_sjis_docomo, - NULL + NULL, + NULL, }; static const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = { @@ -7940,7 +7952,8 @@ const mbfl_encoding mbfl_encoding_sjis_kddi = { &vtbl_wchar_sjis_kddi, mb_sjis_kddi_to_wchar, mb_wchar_to_sjis_kddi, - NULL + NULL, + NULL, }; static const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = { @@ -7974,7 +7987,8 @@ const mbfl_encoding mbfl_encoding_sjis_sb = { &vtbl_wchar_sjis_sb, mb_sjis_sb_to_wchar, mb_wchar_to_sjis_sb, - NULL + NULL, + NULL, }; /* Although the specification for Shift-JIS-2004 indicates that 0x5C and @@ -8017,7 +8031,8 @@ const mbfl_encoding mbfl_encoding_sjis2004 = { &vtbl_wchar_sjis2004, mb_sjis2004_to_wchar, mb_wchar_to_sjis2004, - NULL + NULL, + NULL, }; /* CP932 is Microsoft's version of Shift-JIS. @@ -8103,7 +8118,8 @@ const mbfl_encoding mbfl_encoding_cp932 = { &vtbl_wchar_cp932, mb_cp932_to_wchar, mb_wchar_to_cp932, - NULL + NULL, + NULL, }; static const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { @@ -8137,7 +8153,8 @@ const mbfl_encoding mbfl_encoding_sjiswin = { &vtbl_wchar_sjiswin, mb_cp932_to_wchar, mb_wchar_to_sjiswin, - NULL + NULL, + NULL, }; /* @@ -10346,7 +10363,8 @@ const mbfl_encoding mbfl_encoding_euc_jp = { &vtbl_wchar_eucjp, mb_eucjp_to_wchar, mb_wchar_to_eucjp, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; @@ -10382,7 +10400,8 @@ const mbfl_encoding mbfl_encoding_eucjp2004 = { &vtbl_wchar_eucjp2004, mb_eucjp2004_to_wchar, mb_wchar_to_eucjp2004, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; @@ -10418,7 +10437,8 @@ const mbfl_encoding mbfl_encoding_eucjp_win = { &vtbl_wchar_eucjpwin, mb_eucjpwin_to_wchar, mb_wchar_to_eucjpwin, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; @@ -10454,7 +10474,8 @@ const mbfl_encoding mbfl_encoding_cp51932 = { &vtbl_wchar_cp51932, mb_cp51932_to_wchar, mb_wchar_to_cp51932, - NULL + NULL, + NULL, }; static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ @@ -10509,7 +10530,8 @@ const mbfl_encoding mbfl_encoding_euc_cn = { &vtbl_wchar_euccn, mb_euccn_to_wchar, mb_wchar_to_euccn, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; @@ -10545,7 +10567,8 @@ const mbfl_encoding mbfl_encoding_euc_tw = { &vtbl_wchar_euctw, mb_euctw_to_wchar, mb_wchar_to_euctw, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; @@ -10581,7 +10604,8 @@ const mbfl_encoding mbfl_encoding_euc_kr = { &vtbl_wchar_euckr, mb_euckr_to_wchar, mb_wchar_to_euckr, - NULL + NULL, + NULL, }; /* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949. @@ -10640,7 +10664,8 @@ const mbfl_encoding mbfl_encoding_uhc = { &vtbl_wchar_uhc, mb_uhc_to_wchar, mb_wchar_to_uhc, - NULL + NULL, + NULL, }; /* @@ -11555,7 +11580,8 @@ const mbfl_encoding mbfl_encoding_gb18030 = { &vtbl_wchar_gb18030, mb_gb18030_to_wchar, mb_wchar_to_gb18030, - NULL + NULL, + NULL, }; static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL}; @@ -11591,7 +11617,8 @@ const mbfl_encoding mbfl_encoding_cp936 = { &vtbl_wchar_cp936, mb_cp936_to_wchar, mb_wchar_to_cp936, - NULL + NULL, + NULL, }; /* @@ -12160,7 +12187,8 @@ const mbfl_encoding mbfl_encoding_big5 = { &vtbl_wchar_big5, mb_big5_to_wchar, mb_wchar_to_big5, - NULL + NULL, + NULL, }; static const struct mbfl_convert_vtbl vtbl_cp950_wchar = { @@ -12194,7 +12222,8 @@ const mbfl_encoding mbfl_encoding_cp950 = { &vtbl_wchar_cp950, mb_cp950_to_wchar, mb_wchar_to_cp950, - NULL + NULL, + NULL, }; /* @@ -12567,5 +12596,6 @@ const mbfl_encoding mbfl_encoding_hz = { &vtbl_wchar_hz, mb_hz_to_wchar, mb_wchar_to_hz, - NULL + NULL, + NULL, }; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index a75a9c757cb..6bfc48d5f53 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -67,7 +67,8 @@ const mbfl_encoding mbfl_encoding_html_ent = { &vtbl_wchar_html, mb_htmlent_to_wchar, mb_wchar_to_htmlent, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_wchar_html = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c index c743942d0c5..50297de87cc 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c @@ -46,7 +46,8 @@ const mbfl_encoding mbfl_encoding_qprint = { NULL, mb_qprint_to_wchar, mb_wchar_to_qprint, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_8bit_qprint = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c b/ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c index c5872335a85..7ced00fa536 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_singlebyte.c @@ -87,6 +87,7 @@ static int mbfl_conv_reverselookup_table(int c, mbfl_convert_filter *filter, int &vtbl_wchar_##id, \ mb_##id##_to_wchar, \ mb_wchar_to_##id, \ + NULL, \ NULL \ } diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c index e6711d82f8a..01b569482b6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_ucs2.c @@ -57,7 +57,8 @@ const mbfl_encoding mbfl_encoding_ucs2 = { &vtbl_wchar_ucs2, mb_ucs2_to_wchar, mb_wchar_to_ucs2be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_ucs2be = { @@ -71,7 +72,8 @@ const mbfl_encoding mbfl_encoding_ucs2be = { &vtbl_wchar_ucs2be, mb_ucs2be_to_wchar, mb_wchar_to_ucs2be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_ucs2le = { @@ -85,7 +87,8 @@ const mbfl_encoding mbfl_encoding_ucs2le = { &vtbl_wchar_ucs2le, mb_ucs2le_to_wchar, mb_wchar_to_ucs2le, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_ucs2_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_ucs4.c b/ext/mbstring/libmbfl/filters/mbfilter_ucs4.c index 1585cb82e3f..10b57061f7d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_ucs4.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_ucs4.c @@ -57,7 +57,8 @@ const mbfl_encoding mbfl_encoding_ucs4 = { &vtbl_wchar_ucs4, mb_ucs4_to_wchar, mb_wchar_to_ucs4be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_ucs4be = { @@ -71,7 +72,8 @@ const mbfl_encoding mbfl_encoding_ucs4be = { &vtbl_wchar_ucs4be, mb_ucs4be_to_wchar, mb_wchar_to_ucs4be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_ucs4le = { @@ -85,7 +87,8 @@ const mbfl_encoding mbfl_encoding_ucs4le = { &vtbl_wchar_ucs4le, mb_ucs4le_to_wchar, mb_wchar_to_ucs4le, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_ucs4_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf16.c b/ext/mbstring/libmbfl/filters/mbfilter_utf16.c index 6e687c941c2..9957ae11896 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf16.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf16.c @@ -189,7 +189,8 @@ const mbfl_encoding mbfl_encoding_utf16 = { &vtbl_wchar_utf16, mb_utf16_to_wchar, mb_wchar_to_utf16be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf16be = { @@ -203,7 +204,8 @@ const mbfl_encoding mbfl_encoding_utf16be = { &vtbl_wchar_utf16be, mb_utf16be_to_wchar, mb_wchar_to_utf16be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf16le = { @@ -217,7 +219,8 @@ const mbfl_encoding mbfl_encoding_utf16le = { &vtbl_wchar_utf16le, mb_utf16le_to_wchar, mb_wchar_to_utf16le, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_utf16_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c index b49f5df5369..81057d8c6e9 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf32.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf32.c @@ -50,7 +50,8 @@ const mbfl_encoding mbfl_encoding_utf32 = { &vtbl_wchar_utf32, mb_utf32_to_wchar, mb_wchar_to_utf32be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf32be = { @@ -64,7 +65,8 @@ const mbfl_encoding mbfl_encoding_utf32be = { &vtbl_wchar_utf32be, mb_utf32be_to_wchar, mb_wchar_to_utf32be, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf32le = { @@ -78,7 +80,8 @@ const mbfl_encoding mbfl_encoding_utf32le = { &vtbl_wchar_utf32le, mb_utf32le_to_wchar, mb_wchar_to_utf32le, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_utf32_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c index af84602ae18..87bdc590e6d 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7.c @@ -62,7 +62,8 @@ const mbfl_encoding mbfl_encoding_utf7 = { &vtbl_wchar_utf7, mb_utf7_to_wchar, mb_wchar_to_utf7, - mb_check_utf7 + mb_check_utf7, + NULL, }; const struct mbfl_convert_vtbl vtbl_utf7_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c b/ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c index 4a9d47d5cd6..ac49e6a165b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c @@ -98,7 +98,8 @@ const mbfl_encoding mbfl_encoding_utf7imap = { &vtbl_wchar_utf7imap, mb_utf7imap_to_wchar, mb_wchar_to_utf7imap, - mb_check_utf7imap + mb_check_utf7imap, + NULL, }; const struct mbfl_convert_vtbl vtbl_utf7imap_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c index 92d7c389309..d079c2a73b6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8.c @@ -51,6 +51,7 @@ const unsigned char mblen_table_utf8[] = { static size_t mb_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); static void mb_wchar_to_utf8(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); +static zend_string* mb_cut_utf8(unsigned char *str, size_t from, size_t len, unsigned char *end); static const char *mbfl_encoding_utf8_aliases[] = {"utf8", NULL}; @@ -65,7 +66,8 @@ const mbfl_encoding mbfl_encoding_utf8 = { &vtbl_wchar_utf8, mb_utf8_to_wchar, mb_wchar_to_utf8, - NULL + NULL, + mb_cut_utf8 }; const struct mbfl_convert_vtbl vtbl_utf8_wchar = { @@ -335,3 +337,21 @@ static void mb_wchar_to_utf8(uint32_t *in, size_t len, mb_convert_buf *buf, bool MB_CONVERT_BUF_STORE(buf, out, limit); } + +static zend_string* mb_cut_utf8(unsigned char *str, size_t from, size_t len, unsigned char *end) +{ + unsigned char *start = str + from; + /* Byte values less than -64 are UTF-8 continuation bytes, that is, + * the 2nd, 3rd, or 4th byte of a multi-byte character */ + while (start > str && ((signed char)*start) < -64) { + start--; + } + unsigned char *_end = start + len; + if (_end >= end) { + return zend_string_init_fast((char*)start, end - start); + } + while (_end > start && ((signed char)*_end) < -64) { + _end--; + } + return zend_string_init_fast((char*)start, _end - start); +} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c index dd253cfe689..75bf19ceb6f 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c @@ -124,7 +124,8 @@ const mbfl_encoding mbfl_encoding_utf8_docomo = { &vtbl_wchar_utf8_docomo, mb_utf8_docomo_to_wchar, mb_wchar_to_utf8_docomo, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf8_kddi_a = { @@ -138,7 +139,8 @@ const mbfl_encoding mbfl_encoding_utf8_kddi_a = { &vtbl_wchar_utf8_kddi_a, mb_utf8_kddi_a_to_wchar, mb_wchar_to_utf8_kddi_a, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf8_kddi_b = { @@ -152,7 +154,8 @@ const mbfl_encoding mbfl_encoding_utf8_kddi_b = { &vtbl_wchar_utf8_kddi_b, mb_utf8_kddi_b_to_wchar, mb_wchar_to_utf8_kddi_b, - NULL + NULL, + NULL, }; const mbfl_encoding mbfl_encoding_utf8_sb = { @@ -166,7 +169,8 @@ const mbfl_encoding mbfl_encoding_utf8_sb = { &vtbl_wchar_utf8_sb, mb_utf8_sb_to_wchar, mb_wchar_to_utf8_sb, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_utf8_docomo_wchar = { diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c b/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c index 83a56977d3e..600d019b9d4 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_uuencode.c @@ -44,7 +44,8 @@ const mbfl_encoding mbfl_encoding_uuencode = { NULL, mb_uuencode_to_wchar, mb_wchar_to_uuencode, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_uuencode_8bit = { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c index 43db2f7f5b2..e0f95466ca4 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.c @@ -52,7 +52,8 @@ const mbfl_encoding mbfl_encoding_8bit = { &vtbl_wchar_8bit, mb_8bit_to_wchar, mb_wchar_to_8bit, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_8bit_wchar = { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c index b932603e1c5..45b784b388c 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.c @@ -45,7 +45,8 @@ const mbfl_encoding mbfl_encoding_pass = { NULL, NULL, NULL, - NULL + NULL, + NULL, }; const struct mbfl_convert_vtbl vtbl_pass = { diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c index 93a8d91e7a5..758cfa0f71d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.c @@ -43,5 +43,6 @@ const mbfl_encoding mbfl_encoding_wchar = { NULL, NULL, NULL, - NULL + NULL, + NULL, }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index b25ec71eef9..eee913c600c 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -145,6 +145,7 @@ typedef struct { typedef size_t (*mb_to_wchar_fn)(unsigned char **in, size_t *in_len, uint32_t *out, size_t out_len, unsigned int *state); typedef void (*mb_from_wchar_fn)(uint32_t *in, size_t in_len, mb_convert_buf *out, bool end); typedef bool (*mb_check_fn)(unsigned char *in, size_t in_len); +typedef zend_string* (*mb_cut_fn)(unsigned char *str, size_t from, size_t len, unsigned char *end); /* When converting encoded text to a buffer of wchars (Unicode codepoints) using `mb_to_wchar_fn`, * the buffer must be at least this size (to work with all supported text encodings) */ @@ -251,6 +252,7 @@ typedef struct { mb_to_wchar_fn to_wchar; mb_from_wchar_fn from_wchar; mb_check_fn check; + mb_cut_fn cut; } mbfl_encoding; extern const mbfl_encoding mbfl_encoding_utf8; diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 7bf6ef02ee2..a374539c290 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2403,19 +2403,20 @@ PHP_FUNCTION(mb_strcut) Z_PARAM_STR_OR_NULL(encoding) ZEND_PARSE_PARAMETERS_END(); - string.val = (unsigned char*)string_val; - string.encoding = php_mb_get_encoding(encoding, 4); - if (!string.encoding) { + const mbfl_encoding *enc = php_mb_get_encoding(encoding, 4); + if (!enc) { RETURN_THROWS(); } + string.val = (unsigned char*)string_val; + string.encoding = enc; + if (len_is_null) { len = string.len; } /* if "from" position is negative, count start position from the end - * of the string - */ + * of the string */ if (from < 0) { from = string.len + from; if (from < 0) { @@ -2424,8 +2425,7 @@ PHP_FUNCTION(mb_strcut) } /* if "length" position is negative, set it to the length - * needed to stop that many chars from the end of the string - */ + * needed to stop that many chars from the end of the string */ if (len < 0) { len = (string.len - from) + len; if (len < 0) { @@ -2437,12 +2437,14 @@ PHP_FUNCTION(mb_strcut) RETURN_EMPTY_STRING(); } - ret = mbfl_strcut(&string, &result, from, len); - ZEND_ASSERT(ret != NULL); - - // TODO: avoid reallocation ??? - RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ - efree(ret->val); + if (enc->cut) { + RETURN_STR(enc->cut(string.val, from, len, string.val + string.len)); + } else { + ret = mbfl_strcut(&string, &result, from, len); + ZEND_ASSERT(ret != NULL); + RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ + efree(ret->val); + } } /* }}} */