MFH Fix bug #46944 - UTF-8 characters outside the BMP aren't encoded correctly.

This commit is contained in:
Scott MacVicar 2009-01-02 03:02:22 +00:00
parent 8d62f3dd02
commit 0bdbc4e356
3 changed files with 34 additions and 2 deletions

View File

@ -0,0 +1,32 @@
--TEST--
Bug #46944 (json_encode() doesn't handle 3 byte utf8 correctly)
--SKIPIF--
<?php if (!extension_loaded('json')) print 'skip'; ?>
--FILE--
<?php
for ($i = 1; $i <= 16; $i++) {
echo json_encode(b"aa" . (0xf0|($i >> 2)) . (0x8f|($i & 3) << 4) . "\xbf\xbdzz") . "\n";
}
echo "Done\n";
?>
--EXPECT--
"aa\ud83f\udffdzz"
"aa\ud87f\udffdzz"
"aa\ud8bf\udffdzz"
"aa\ud8ff\udffdzz"
"aa\ud93f\udffdzz"
"aa\ud97f\udffdzz"
"aa\ud9bf\udffdzz"
"aa\ud9ff\udffdzz"
"aa\uda3f\udffdzz"
"aa\uda7f\udffdzz"
"aa\udabf\udffdzz"
"aa\udaff\udffdzz"
"aa\udb3f\udffdzz"
"aa\udb7f\udffdzz"
"aa\udbbf\udffdzz"
"aa\udbff\udffdzz"
Done

View File

@ -165,7 +165,7 @@ utf8_decode_next(json_utf8_decode *utf8)
/*
Three continuation (65536 to 1114111)
*/
if ((c & 0xF1) == 0xF0) {
if ((c & 0xF8) == 0xF0) {
int c1 = cont(utf8);
int c2 = cont(utf8);
int c3 = cont(utf8);

View File

@ -46,7 +46,7 @@ utf8_to_utf16(unsigned short w[], char p[], int length)
w[the_index] = (unsigned short)c;
the_index += 1;
} else {
c &= 0xFFFF;
c -= 0x10000;
w[the_index] = (unsigned short)(0xD800 | (c >> 10));
the_index += 1;
w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));