mirror of
https://github.com/php/php-src.git
synced 2024-12-23 00:40:33 +08:00
- Removed out-of-date comments and one enum constant from html_tables.h and its generator.
- Other minor aesthetic improvements in the generator.
This commit is contained in:
parent
3f804701b5
commit
03de44f23e
@ -1,4 +1,4 @@
|
|||||||
/*
|
/*
|
||||||
+----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
| PHP Version 5 |
|
| PHP Version 5 |
|
||||||
+----------------------------------------------------------------------+
|
+----------------------------------------------------------------------+
|
||||||
@ -28,17 +28,9 @@
|
|||||||
***************************************************************************
|
***************************************************************************
|
||||||
**************************************************************************/
|
**************************************************************************/
|
||||||
|
|
||||||
/* cs_terminator is overloaded in the following fashion:
|
enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
|
||||||
* - It terminates the list entity maps.
|
cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
|
||||||
* - In BG(inverse_ent_maps), it's the key of the inverse map that stores
|
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
||||||
* only the basic entities.
|
|
||||||
* - When passed to traverse_for_entities (or via php_unescape_entities with !all),
|
|
||||||
* we don't care about the encoding (UTF-8 is chosen, but it should be used
|
|
||||||
* when it doesn't matter).
|
|
||||||
*/
|
|
||||||
enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
|
|
||||||
cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
|
|
||||||
cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
|
||||||
cs_numelems /* used to count the number of charsets */
|
cs_numelems /* used to count the number of charsets */
|
||||||
};
|
};
|
||||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
||||||
@ -49,36 +41,36 @@ static const struct {
|
|||||||
const char *codeset;
|
const char *codeset;
|
||||||
enum entity_charset charset;
|
enum entity_charset charset;
|
||||||
} charset_map[] = {
|
} charset_map[] = {
|
||||||
{ "ISO-8859-1", cs_8859_1 },
|
{ "ISO-8859-1", cs_8859_1 },
|
||||||
{ "ISO8859-1", cs_8859_1 },
|
{ "ISO8859-1", cs_8859_1 },
|
||||||
{ "ISO-8859-15", cs_8859_15 },
|
{ "ISO-8859-15", cs_8859_15 },
|
||||||
{ "ISO8859-15", cs_8859_15 },
|
{ "ISO8859-15", cs_8859_15 },
|
||||||
{ "utf-8", cs_utf_8 },
|
{ "utf-8", cs_utf_8 },
|
||||||
{ "cp1252", cs_cp1252 },
|
{ "cp1252", cs_cp1252 },
|
||||||
{ "Windows-1252", cs_cp1252 },
|
{ "Windows-1252", cs_cp1252 },
|
||||||
{ "1252", cs_cp1252 },
|
{ "1252", cs_cp1252 },
|
||||||
{ "BIG5", cs_big5 },
|
{ "BIG5", cs_big5 },
|
||||||
{ "950", cs_big5 },
|
{ "950", cs_big5 },
|
||||||
{ "GB2312", cs_gb2312 },
|
{ "GB2312", cs_gb2312 },
|
||||||
{ "936", cs_gb2312 },
|
{ "936", cs_gb2312 },
|
||||||
{ "BIG5-HKSCS", cs_big5hkscs },
|
{ "BIG5-HKSCS", cs_big5hkscs },
|
||||||
{ "Shift_JIS", cs_sjis },
|
{ "Shift_JIS", cs_sjis },
|
||||||
{ "SJIS", cs_sjis },
|
{ "SJIS", cs_sjis },
|
||||||
{ "932", cs_sjis },
|
{ "932", cs_sjis },
|
||||||
{ "EUCJP", cs_eucjp },
|
{ "EUCJP", cs_eucjp },
|
||||||
{ "EUC-JP", cs_eucjp },
|
{ "EUC-JP", cs_eucjp },
|
||||||
{ "KOI8-R", cs_koi8r },
|
{ "KOI8-R", cs_koi8r },
|
||||||
{ "koi8-ru", cs_koi8r },
|
{ "koi8-ru", cs_koi8r },
|
||||||
{ "koi8r", cs_koi8r },
|
{ "koi8r", cs_koi8r },
|
||||||
{ "cp1251", cs_cp1251 },
|
{ "cp1251", cs_cp1251 },
|
||||||
{ "Windows-1251", cs_cp1251 },
|
{ "Windows-1251", cs_cp1251 },
|
||||||
{ "win-1251", cs_cp1251 },
|
{ "win-1251", cs_cp1251 },
|
||||||
{ "iso8859-5", cs_8859_5 },
|
{ "iso8859-5", cs_8859_5 },
|
||||||
{ "iso-8859-5", cs_8859_5 },
|
{ "iso-8859-5", cs_8859_5 },
|
||||||
{ "cp866", cs_cp866 },
|
{ "cp866", cs_cp866 },
|
||||||
{ "866", cs_cp866 },
|
{ "866", cs_cp866 },
|
||||||
{ "ibm866", cs_cp866 },
|
{ "ibm866", cs_cp866 },
|
||||||
{ "MacRoman", cs_macroman },
|
{ "MacRoman", cs_macroman },
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -475,7 +467,6 @@ static const enc_to_uni enc_to_uni_macroman = {
|
|||||||
|
|
||||||
/* {{{ Index of tables for encoding conversion */
|
/* {{{ Index of tables for encoding conversion */
|
||||||
static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {
|
static const enc_to_uni *const enc_to_uni_index[cs_numelems] = {
|
||||||
NULL,
|
|
||||||
NULL,
|
NULL,
|
||||||
&enc_to_uni_iso88591,
|
&enc_to_uni_iso88591,
|
||||||
&enc_to_uni_win1252,
|
&enc_to_uni_win1252,
|
||||||
@ -1144,7 +1135,7 @@ typedef struct {
|
|||||||
const entity_stage3_row *table;
|
const entity_stage3_row *table;
|
||||||
} entity_table_opt;
|
} entity_table_opt;
|
||||||
|
|
||||||
/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
|
/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
|
||||||
|
|
||||||
/* {{{ Start of HTML5 multi-stage table for codepoint -> entity */
|
/* {{{ Start of HTML5 multi-stage table for codepoint -> entity */
|
||||||
|
|
||||||
|
@ -51,17 +51,9 @@ $t = <<<CODE
|
|||||||
***************************************************************************
|
***************************************************************************
|
||||||
**************************************************************************/
|
**************************************************************************/
|
||||||
|
|
||||||
/* cs_terminator is overloaded in the following fashion:
|
enum entity_charset { cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15, cs_cp1251,
|
||||||
* - It terminates the list entity maps.
|
cs_8859_5, cs_cp866, cs_macroman, cs_koi8r, cs_big5,
|
||||||
* - In BG(inverse_ent_maps), it's the key of the inverse map that stores
|
cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
||||||
* only the basic entities.
|
|
||||||
* - When passed to traverse_for_entities (or via php_unescape_entities with !all),
|
|
||||||
* we don't care about the encoding (UTF-8 is chosen, but it should be used
|
|
||||||
* when it doesn't matter).
|
|
||||||
*/
|
|
||||||
enum entity_charset { cs_terminator, cs_utf_8, cs_8859_1, cs_cp1252, cs_8859_15,
|
|
||||||
cs_cp1251, cs_8859_5, cs_cp866, cs_macroman, cs_koi8r,
|
|
||||||
cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp,
|
|
||||||
cs_numelems /* used to count the number of charsets */
|
cs_numelems /* used to count the number of charsets */
|
||||||
};
|
};
|
||||||
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8)
|
||||||
@ -72,36 +64,36 @@ static const struct {
|
|||||||
const char *codeset;
|
const char *codeset;
|
||||||
enum entity_charset charset;
|
enum entity_charset charset;
|
||||||
} charset_map[] = {
|
} charset_map[] = {
|
||||||
{ "ISO-8859-1", cs_8859_1 },
|
{ "ISO-8859-1", cs_8859_1 },
|
||||||
{ "ISO8859-1", cs_8859_1 },
|
{ "ISO8859-1", cs_8859_1 },
|
||||||
{ "ISO-8859-15", cs_8859_15 },
|
{ "ISO-8859-15", cs_8859_15 },
|
||||||
{ "ISO8859-15", cs_8859_15 },
|
{ "ISO8859-15", cs_8859_15 },
|
||||||
{ "utf-8", cs_utf_8 },
|
{ "utf-8", cs_utf_8 },
|
||||||
{ "cp1252", cs_cp1252 },
|
{ "cp1252", cs_cp1252 },
|
||||||
{ "Windows-1252", cs_cp1252 },
|
{ "Windows-1252", cs_cp1252 },
|
||||||
{ "1252", cs_cp1252 },
|
{ "1252", cs_cp1252 },
|
||||||
{ "BIG5", cs_big5 },
|
{ "BIG5", cs_big5 },
|
||||||
{ "950", cs_big5 },
|
{ "950", cs_big5 },
|
||||||
{ "GB2312", cs_gb2312 },
|
{ "GB2312", cs_gb2312 },
|
||||||
{ "936", cs_gb2312 },
|
{ "936", cs_gb2312 },
|
||||||
{ "BIG5-HKSCS", cs_big5hkscs },
|
{ "BIG5-HKSCS", cs_big5hkscs },
|
||||||
{ "Shift_JIS", cs_sjis },
|
{ "Shift_JIS", cs_sjis },
|
||||||
{ "SJIS", cs_sjis },
|
{ "SJIS", cs_sjis },
|
||||||
{ "932", cs_sjis },
|
{ "932", cs_sjis },
|
||||||
{ "EUCJP", cs_eucjp },
|
{ "EUCJP", cs_eucjp },
|
||||||
{ "EUC-JP", cs_eucjp },
|
{ "EUC-JP", cs_eucjp },
|
||||||
{ "KOI8-R", cs_koi8r },
|
{ "KOI8-R", cs_koi8r },
|
||||||
{ "koi8-ru", cs_koi8r },
|
{ "koi8-ru", cs_koi8r },
|
||||||
{ "koi8r", cs_koi8r },
|
{ "koi8r", cs_koi8r },
|
||||||
{ "cp1251", cs_cp1251 },
|
{ "cp1251", cs_cp1251 },
|
||||||
{ "Windows-1251", cs_cp1251 },
|
{ "Windows-1251", cs_cp1251 },
|
||||||
{ "win-1251", cs_cp1251 },
|
{ "win-1251", cs_cp1251 },
|
||||||
{ "iso8859-5", cs_8859_5 },
|
{ "iso8859-5", cs_8859_5 },
|
||||||
{ "iso-8859-5", cs_8859_5 },
|
{ "iso-8859-5", cs_8859_5 },
|
||||||
{ "cp866", cs_cp866 },
|
{ "cp866", cs_cp866 },
|
||||||
{ "866", cs_cp866 },
|
{ "866", cs_cp866 },
|
||||||
{ "ibm866", cs_cp866 },
|
{ "ibm866", cs_cp866 },
|
||||||
{ "MacRoman", cs_macroman },
|
{ "MacRoman", cs_macroman },
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -132,51 +124,51 @@ echo $t;
|
|||||||
$encodings = array(
|
$encodings = array(
|
||||||
array(
|
array(
|
||||||
"ident" => "iso88591",
|
"ident" => "iso88591",
|
||||||
"enumid" => 2,
|
"enumid" => 1,
|
||||||
"name" => "ISO-8859-1",
|
"name" => "ISO-8859-1",
|
||||||
"file" => "mappings/8859-1.TXT",
|
"file" => "mappings/8859-1.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "iso88595",
|
"ident" => "iso88595",
|
||||||
"enumid" => 6,
|
"enumid" => 5,
|
||||||
"name" => "ISO-8859-5",
|
"name" => "ISO-8859-5",
|
||||||
"file" => "mappings/8859-5.TXT",
|
"file" => "mappings/8859-5.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "iso885915",
|
"ident" => "iso885915",
|
||||||
"enumid" => 4,
|
"enumid" => 3,
|
||||||
"name" => "ISO-8859-15",
|
"name" => "ISO-8859-15",
|
||||||
"file" => "mappings/8859-15.TXT",
|
"file" => "mappings/8859-15.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "win1252",
|
"ident" => "win1252",
|
||||||
"enumid" => 3,
|
"enumid" => 2,
|
||||||
"enumident" => "cp1252",
|
"enumident" => "cp1252",
|
||||||
"name" => "Windows-1252",
|
"name" => "Windows-1252",
|
||||||
"file" => "mappings/CP1252.TXT",
|
"file" => "mappings/CP1252.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "win1251",
|
"ident" => "win1251",
|
||||||
"enumid" => 5,
|
"enumid" => 4,
|
||||||
"enumident" => "cp1252",
|
"enumident" => "cp1252",
|
||||||
"name" => "Windows-1251",
|
"name" => "Windows-1251",
|
||||||
"file" => "mappings/CP1251.TXT",
|
"file" => "mappings/CP1251.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "koi8r",
|
"ident" => "koi8r",
|
||||||
"enumid" => 9,
|
"enumid" => 8,
|
||||||
"name" => "KOI8-R",
|
"name" => "KOI8-R",
|
||||||
"file" => "mappings/KOI8-R.TXT",
|
"file" => "mappings/KOI8-R.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "cp866",
|
"ident" => "cp866",
|
||||||
"enumid" => 7,
|
"enumid" => 6,
|
||||||
"name" => "CP-866",
|
"name" => "CP-866",
|
||||||
"file" => "mappings/CP866.TXT",
|
"file" => "mappings/CP866.TXT",
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
"ident" => "macroman",
|
"ident" => "macroman",
|
||||||
"enumid" => 8,
|
"enumid" => 7,
|
||||||
"name" => "MacRoman",
|
"name" => "MacRoman",
|
||||||
"file" => "mappings/ROMAN.TXT",
|
"file" => "mappings/ROMAN.TXT",
|
||||||
),
|
),
|
||||||
@ -336,7 +328,7 @@ foreach ($encodings as $e) {
|
|||||||
$lines = explode("\n", file_get_contents($e{'file'}));
|
$lines = explode("\n", file_get_contents($e{'file'}));
|
||||||
foreach ($lines as $l) {
|
foreach ($lines as $l) {
|
||||||
if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
|
if (preg_match("/^0x([0-9A-Z]{2})\t0x([0-9A-Z]{2,})\s+#\s*(.*)$/i", $l, $matches))
|
||||||
$map[] = array($matches[1], $matches[2], $matches[3]);
|
$map[] = array($matches[1], $matches[2], rtrim($matches[3]));
|
||||||
}
|
}
|
||||||
|
|
||||||
$mappy = array();
|
$mappy = array();
|
||||||
@ -420,7 +412,7 @@ typedef struct {
|
|||||||
const entity_stage3_row *table;
|
const entity_stage3_row *table;
|
||||||
} entity_table_opt;
|
} entity_table_opt;
|
||||||
|
|
||||||
/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistentcy's sake. */
|
/* Replaced "GT" > "gt" and "QUOT" > "quot" for consistency's sake. */
|
||||||
|
|
||||||
|
|
||||||
CODE;
|
CODE;
|
||||||
|
Loading…
Reference in New Issue
Block a user