mirror of
https://github.com/php/php-src.git
synced 2025-01-08 20:17:28 +08:00
d1e7999d5b
1) replaced multiple htmlentities calls with one call to get_html_translation table since they share the same code internally 2) reduced the upper range of the "for" loop to 0x2710 (10000), according to http://www.w3.org/TR/html4/sgml/entities.html it's enough 3) placed additional check to make sure all entities from get_html_translation_table were checked in the test
314 lines
5.3 KiB
PHP
314 lines
5.3 KiB
PHP
--TEST--
|
|
htmlentities() conformance check (HTML 4)
|
|
--FILE--
|
|
<?php
|
|
function utf32_utf8($k) {
|
|
if ($k < 0x80) {
|
|
$retval = pack('C', $k);
|
|
} else if ($k < 0x800) {
|
|
$retval = pack('C2',
|
|
0xc0 | ($k >> 6),
|
|
0x80 | ($k & 0x3f));
|
|
} else if ($k < 0x10000) {
|
|
$retval = pack('C3',
|
|
0xe0 | ($k >> 12),
|
|
0x80 | (($k >> 6) & 0x3f),
|
|
0x80 | ($k & 0x3f));
|
|
} else if ($k < 0x200000) {
|
|
$retval = pack('C4',
|
|
0xf0 | ($k >> 18),
|
|
0x80 | (($k >> 12) & 0x3f),
|
|
0x80 | (($k >> 6) & 0x3f),
|
|
0x80 | ($k & 0x3f));
|
|
} else if ($k < 0x4000000) {
|
|
$retval = pack('C5',
|
|
0xf8 | ($k >> 24),
|
|
0x80 | (($k >> 18) & 0x3f),
|
|
0x80 | (($k >> 12) & 0x3f),
|
|
0x80 | (($k >> 6) & 0x3f),
|
|
0x80 | ($k & 0x3f));
|
|
} else {
|
|
$retval = pack('C6',
|
|
0xfc | ($k >> 30),
|
|
0x80 | (($k >> 24) & 0x3f),
|
|
0x80 | (($k >> 18) & 0x3f),
|
|
0x80 | (($k >> 12) & 0x3f),
|
|
0x80 | (($k >> 6) & 0x3f),
|
|
0x80 | ($k & 0x3f));
|
|
}
|
|
return $retval;
|
|
}
|
|
|
|
$table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES, 'UTF-8');
|
|
|
|
for ($i = 0; $i < 0x2710; $i++) {
|
|
if ($i >= 0xd800 && $i < 0xe000)
|
|
continue;
|
|
$str = utf32_utf8($i);
|
|
if (isset($table[$str])) {
|
|
printf("%s\tU+%05X\n", $table[$str], $i);
|
|
unset($table[$str]);
|
|
}
|
|
}
|
|
|
|
if (!empty($table)) {
|
|
echo "Not matched entities: ";
|
|
var_dump($table);
|
|
}
|
|
|
|
?>
|
|
--EXPECT--
|
|
" U+00022
|
|
& U+00026
|
|
' U+00027
|
|
< U+0003C
|
|
> U+0003E
|
|
U+000A0
|
|
¡ U+000A1
|
|
¢ U+000A2
|
|
£ U+000A3
|
|
¤ U+000A4
|
|
¥ U+000A5
|
|
¦ U+000A6
|
|
§ U+000A7
|
|
¨ U+000A8
|
|
© U+000A9
|
|
ª U+000AA
|
|
« U+000AB
|
|
¬ U+000AC
|
|
­ U+000AD
|
|
® U+000AE
|
|
¯ U+000AF
|
|
° U+000B0
|
|
± U+000B1
|
|
² U+000B2
|
|
³ U+000B3
|
|
´ U+000B4
|
|
µ U+000B5
|
|
¶ U+000B6
|
|
· U+000B7
|
|
¸ U+000B8
|
|
¹ U+000B9
|
|
º U+000BA
|
|
» U+000BB
|
|
¼ U+000BC
|
|
½ U+000BD
|
|
¾ U+000BE
|
|
¿ U+000BF
|
|
À U+000C0
|
|
Á U+000C1
|
|
 U+000C2
|
|
à U+000C3
|
|
Ä U+000C4
|
|
Å U+000C5
|
|
Æ U+000C6
|
|
Ç U+000C7
|
|
È U+000C8
|
|
É U+000C9
|
|
Ê U+000CA
|
|
Ë U+000CB
|
|
Ì U+000CC
|
|
Í U+000CD
|
|
Î U+000CE
|
|
Ï U+000CF
|
|
Ð U+000D0
|
|
Ñ U+000D1
|
|
Ò U+000D2
|
|
Ó U+000D3
|
|
Ô U+000D4
|
|
Õ U+000D5
|
|
Ö U+000D6
|
|
× U+000D7
|
|
Ø U+000D8
|
|
Ù U+000D9
|
|
Ú U+000DA
|
|
Û U+000DB
|
|
Ü U+000DC
|
|
Ý U+000DD
|
|
Þ U+000DE
|
|
ß U+000DF
|
|
à U+000E0
|
|
á U+000E1
|
|
â U+000E2
|
|
ã U+000E3
|
|
ä U+000E4
|
|
å U+000E5
|
|
æ U+000E6
|
|
ç U+000E7
|
|
è U+000E8
|
|
é U+000E9
|
|
ê U+000EA
|
|
ë U+000EB
|
|
ì U+000EC
|
|
í U+000ED
|
|
î U+000EE
|
|
ï U+000EF
|
|
ð U+000F0
|
|
ñ U+000F1
|
|
ò U+000F2
|
|
ó U+000F3
|
|
ô U+000F4
|
|
õ U+000F5
|
|
ö U+000F6
|
|
÷ U+000F7
|
|
ø U+000F8
|
|
ù U+000F9
|
|
ú U+000FA
|
|
û U+000FB
|
|
ü U+000FC
|
|
ý U+000FD
|
|
þ U+000FE
|
|
ÿ U+000FF
|
|
Œ U+00152
|
|
œ U+00153
|
|
Š U+00160
|
|
š U+00161
|
|
Ÿ U+00178
|
|
ƒ U+00192
|
|
ˆ U+002C6
|
|
˜ U+002DC
|
|
Α U+00391
|
|
Β U+00392
|
|
Γ U+00393
|
|
Δ U+00394
|
|
Ε U+00395
|
|
Ζ U+00396
|
|
Η U+00397
|
|
Θ U+00398
|
|
Ι U+00399
|
|
Κ U+0039A
|
|
Λ U+0039B
|
|
Μ U+0039C
|
|
Ν U+0039D
|
|
Ξ U+0039E
|
|
Ο U+0039F
|
|
Π U+003A0
|
|
Ρ U+003A1
|
|
Σ U+003A3
|
|
Τ U+003A4
|
|
Υ U+003A5
|
|
Φ U+003A6
|
|
Χ U+003A7
|
|
Ψ U+003A8
|
|
Ω U+003A9
|
|
α U+003B1
|
|
β U+003B2
|
|
γ U+003B3
|
|
δ U+003B4
|
|
ε U+003B5
|
|
ζ U+003B6
|
|
η U+003B7
|
|
θ U+003B8
|
|
ι U+003B9
|
|
κ U+003BA
|
|
λ U+003BB
|
|
μ U+003BC
|
|
ν U+003BD
|
|
ξ U+003BE
|
|
ο U+003BF
|
|
π U+003C0
|
|
ρ U+003C1
|
|
ς U+003C2
|
|
σ U+003C3
|
|
τ U+003C4
|
|
υ U+003C5
|
|
φ U+003C6
|
|
χ U+003C7
|
|
ψ U+003C8
|
|
ω U+003C9
|
|
ϑ U+003D1
|
|
ϒ U+003D2
|
|
ϖ U+003D6
|
|
  U+02002
|
|
  U+02003
|
|
  U+02009
|
|
‌ U+0200C
|
|
‍ U+0200D
|
|
‎ U+0200E
|
|
‏ U+0200F
|
|
– U+02013
|
|
— U+02014
|
|
‘ U+02018
|
|
’ U+02019
|
|
‚ U+0201A
|
|
“ U+0201C
|
|
” U+0201D
|
|
„ U+0201E
|
|
† U+02020
|
|
‡ U+02021
|
|
• U+02022
|
|
… U+02026
|
|
‰ U+02030
|
|
′ U+02032
|
|
″ U+02033
|
|
‹ U+02039
|
|
› U+0203A
|
|
‾ U+0203E
|
|
⁄ U+02044
|
|
€ U+020AC
|
|
ℑ U+02111
|
|
℘ U+02118
|
|
ℜ U+0211C
|
|
™ U+02122
|
|
ℵ U+02135
|
|
← U+02190
|
|
↑ U+02191
|
|
→ U+02192
|
|
↓ U+02193
|
|
↔ U+02194
|
|
↵ U+021B5
|
|
⇐ U+021D0
|
|
⇑ U+021D1
|
|
⇒ U+021D2
|
|
⇓ U+021D3
|
|
⇔ U+021D4
|
|
∀ U+02200
|
|
∂ U+02202
|
|
∃ U+02203
|
|
∅ U+02205
|
|
∇ U+02207
|
|
∈ U+02208
|
|
∉ U+02209
|
|
∋ U+0220B
|
|
∏ U+0220F
|
|
∑ U+02211
|
|
− U+02212
|
|
∗ U+02217
|
|
√ U+0221A
|
|
∝ U+0221D
|
|
∞ U+0221E
|
|
∠ U+02220
|
|
∧ U+02227
|
|
∨ U+02228
|
|
∩ U+02229
|
|
∪ U+0222A
|
|
∫ U+0222B
|
|
∴ U+02234
|
|
∼ U+0223C
|
|
≅ U+02245
|
|
≈ U+02248
|
|
≠ U+02260
|
|
≡ U+02261
|
|
≤ U+02264
|
|
≥ U+02265
|
|
⊂ U+02282
|
|
⊃ U+02283
|
|
⊄ U+02284
|
|
⊆ U+02286
|
|
⊇ U+02287
|
|
⊕ U+02295
|
|
⊗ U+02297
|
|
⊥ U+022A5
|
|
⋅ U+022C5
|
|
⌈ U+02308
|
|
⌉ U+02309
|
|
⌊ U+0230A
|
|
⌋ U+0230B
|
|
⟨ U+02329
|
|
⟩ U+0232A
|
|
◊ U+025CA
|
|
♠ U+02660
|
|
♣ U+02663
|
|
♥ U+02665
|
|
♦ U+02666
|