mirror of
https://github.com/php/php-src.git
synced 2024-11-24 18:34:21 +08:00
Combine control into one character group
Same as with punct, we're currently not interested in distinguishing between Cc and Cf, so only store their union.
This commit is contained in:
parent
d0897b3602
commit
425c2e3ba1
@ -40,39 +40,38 @@
|
||||
#define UC_ZS 6 /* Separator, Space */
|
||||
#define UC_ZL 7 /* Separator, Line */
|
||||
#define UC_ZP 8 /* Separator, Paragraph */
|
||||
#define UC_CC 9 /* Other, Control */
|
||||
#define UC_CF 10 /* Other, Format */
|
||||
#define UC_OS 11 /* Other, Surrogate */
|
||||
#define UC_CO 12 /* Other, Private Use */
|
||||
#define UC_CN 13 /* Other, Not Assigned */
|
||||
#define UC_LU 14 /* Letter, Uppercase */
|
||||
#define UC_LL 15 /* Letter, Lowercase */
|
||||
#define UC_LT 16 /* Letter, Titlecase */
|
||||
#define UC_LM 17 /* Letter, Modifier */
|
||||
#define UC_LO 18 /* Letter, Other */
|
||||
#define UC_SM 19 /* Symbol, Math */
|
||||
#define UC_SC 20 /* Symbol, Currency */
|
||||
#define UC_SK 21 /* Symbol, Modifier */
|
||||
#define UC_SO 22 /* Symbol, Other */
|
||||
#define UC_L 23 /* Left-To-Right */
|
||||
#define UC_R 24 /* Right-To-Left */
|
||||
#define UC_EN 25 /* European Number */
|
||||
#define UC_ES 26 /* European Number Separator */
|
||||
#define UC_ET 27 /* European Number Terminator */
|
||||
#define UC_AN 28 /* Arabic Number */
|
||||
#define UC_CS 29 /* Common Number Separator */
|
||||
#define UC_B 30 /* Block Separator */
|
||||
#define UC_S 31 /* Segment Separator */
|
||||
#define UC_WS 32 /* Whitespace */
|
||||
#define UC_ON 33 /* Other Neutrals */
|
||||
#define UC_AL 34 /* Arabic Letter */
|
||||
#define UC_OS 9 /* Other, Surrogate */
|
||||
#define UC_CO 10 /* Other, Private Use */
|
||||
#define UC_CN 11 /* Other, Not Assigned */
|
||||
#define UC_LU 12 /* Letter, Uppercase */
|
||||
#define UC_LL 13 /* Letter, Lowercase */
|
||||
#define UC_LT 14 /* Letter, Titlecase */
|
||||
#define UC_LM 15 /* Letter, Modifier */
|
||||
#define UC_LO 16 /* Letter, Other */
|
||||
#define UC_SM 17 /* Symbol, Math */
|
||||
#define UC_SC 18 /* Symbol, Currency */
|
||||
#define UC_SK 19 /* Symbol, Modifier */
|
||||
#define UC_SO 20 /* Symbol, Other */
|
||||
#define UC_L 21 /* Left-To-Right */
|
||||
#define UC_R 22 /* Right-To-Left */
|
||||
#define UC_EN 23 /* European Number */
|
||||
#define UC_ES 24 /* European Number Separator */
|
||||
#define UC_ET 25 /* European Number Terminator */
|
||||
#define UC_AN 26 /* Arabic Number */
|
||||
#define UC_CS 27 /* Common Number Separator */
|
||||
#define UC_B 28 /* Block Separator */
|
||||
#define UC_S 29 /* Segment Separator */
|
||||
#define UC_WS 30 /* Whitespace */
|
||||
#define UC_ON 31 /* Other Neutrals */
|
||||
#define UC_AL 32 /* Arabic Letter */
|
||||
|
||||
/* Merged property categories */
|
||||
#define UC_P 35
|
||||
#define UC_C 33
|
||||
#define UC_P 34
|
||||
|
||||
/* Derived properties from DerivedCoreProperties.txt */
|
||||
#define UC_CASED 36
|
||||
#define UC_CASE_IGNORABLE 37
|
||||
#define UC_CASED 35
|
||||
#define UC_CASE_IGNORABLE 36
|
||||
|
||||
|
||||
MBSTRING_API bool php_unicode_is_prop(unsigned long code, ...);
|
||||
@ -113,7 +112,7 @@ static inline int php_unicode_is_upper(unsigned long code) {
|
||||
#define php_unicode_is_alpha(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, -1)
|
||||
#define php_unicode_is_digit(cc) php_unicode_is_prop1(cc, UC_ND)
|
||||
#define php_unicode_is_alnum(cc) php_unicode_is_prop(cc, UC_LU, UC_LL, UC_LM, UC_LO, UC_LT, UC_ND, -1)
|
||||
#define php_unicode_is_cntrl(cc) php_unicode_is_prop(cc, UC_CC, UC_CF, -1)
|
||||
#define php_unicode_is_cntrl(cc) php_unicode_is_prop1(cc, UC_C)
|
||||
#define php_unicode_is_blank(cc) php_unicode_is_prop1(cc, UC_ZS)
|
||||
#define php_unicode_is_punct(cc) php_unicode_is_prop1(cc, UC_P)
|
||||
#define php_unicode_is_graph(cc) php_unicode_is_prop(cc, \
|
||||
@ -126,9 +125,6 @@ static inline int php_unicode_is_upper(unsigned long code) {
|
||||
UC_SM, UC_SM, UC_SC, UC_SK, UC_SO, UC_ZS, -1)
|
||||
#define php_unicode_is_title(cc) php_unicode_is_prop1(cc, UC_LT)
|
||||
|
||||
#define php_unicode_is_isocntrl(cc) php_unicode_is_prop1(cc, UC_CC)
|
||||
#define php_unicode_is_fmtcntrl(cc) php_unicode_is_prop1(cc, UC_CF)
|
||||
|
||||
#define php_unicode_is_symbol(cc) php_unicode_is_prop(cc, UC_SM, UC_SC, UC_SO, UC_SK, -1)
|
||||
#define php_unicode_is_number(cc) php_unicode_is_prop(cc, UC_ND, UC_NO, UC_NL, -1)
|
||||
#define php_unicode_is_nonspacing(cc) php_unicode_is_prop1(cc, UC_MN)
|
||||
|
@ -101,12 +101,12 @@ class UnicodeData {
|
||||
*/
|
||||
$this->propIndexes = array_flip([
|
||||
"Mn", "Mc", "Me", "Nd", "Nl", "No",
|
||||
"Zs", "Zl", "Zp", "Cc", "Cf", "Cs",
|
||||
"Co", "Cn", "Lu", "Ll", "Lt", "Lm",
|
||||
"Lo", "Sm", "Sc", "Sk", "So", "L",
|
||||
"R", "EN", "ES", "ET", "AN", "CS",
|
||||
"B", "S", "WS", "ON", "AL",
|
||||
"P", "Cased", "Case_Ignorable"
|
||||
"Zs", "Zl", "Zp", "Cs", "Co", "Cn",
|
||||
"Lu", "Ll", "Lt", "Lm", "Lo", "Sm",
|
||||
"Sc", "Sk", "So", "L", "R", "EN",
|
||||
"ES", "ET", "AN", "CS", "B", "S",
|
||||
"WS", "ON", "AL",
|
||||
"C", "P", "Cased", "Case_Ignorable"
|
||||
]);
|
||||
$this->numProps = count($this->propIndexes);
|
||||
|
||||
@ -135,6 +135,10 @@ class UnicodeData {
|
||||
if (in_array($prop, ["Pc", "Pd", "Ps", "Pe", "Po", "Pi", "Pf"])) {
|
||||
$prop = "P";
|
||||
}
|
||||
/* Same for control. */
|
||||
if (in_array($prop, ["Cc", "Cf"])) {
|
||||
$prop = "C";
|
||||
}
|
||||
|
||||
if (!isset($this->propIndexes[$prop])) {
|
||||
throw new Exception("Unknown property $prop");
|
||||
|
@ -10,14 +10,14 @@
|
||||
* the project's page doesn't seem to be live anymore, so you can use
|
||||
* OpenLDAP's modified copy (look in libraries/liblunicode/ucdata) */
|
||||
|
||||
static const unsigned short _ucprop_size = 38;
|
||||
static const unsigned short _ucprop_size = 37;
|
||||
|
||||
static const unsigned short _ucprop_offsets[] = {
|
||||
0x0000, 0x028e, 0x03ec, 0x03f6, 0x0470, 0x0488, 0x0516, 0x0524,
|
||||
0x0526, 0x0528, 0x052c, 0x0554, 0x0556, 0x055c, 0x055c, 0x0a58,
|
||||
0x0f62, 0x0f76, 0x0ff0, 0x13c2, 0x1442, 0x146c, 0x14a8, 0x1614,
|
||||
0x1b90, 0x1c1e, 0x1c38, 0x1c4a, 0x1c7a, 0x1c88, 0x1ca2, 0x1cac,
|
||||
0x1cb2, 0x1cc0, 0x20ae, 0x212a, 0x229c, 0x23b6, 0x26ea, 0x0000
|
||||
0x0526, 0x0528, 0x052a, 0x0530, 0x0530, 0x0a2c, 0x0f36, 0x0f4a,
|
||||
0x0fc4, 0x1396, 0x1416, 0x1440, 0x147c, 0x15e8, 0x1b64, 0x1bf2,
|
||||
0x1c0c, 0x1c1e, 0x1c4e, 0x1c5c, 0x1c76, 0x1c80, 0x1c86, 0x1c94,
|
||||
0x2082, 0x20fe, 0x212a, 0x229c, 0x23b6, 0x26ea, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
static const unsigned int _ucprop_ranges[] = {
|
||||
@ -351,17 +351,6 @@ static const unsigned int _ucprop_ranges[] = {
|
||||
0x00002000, 0x0000200a, 0x0000202f, 0x0000202f,
|
||||
0x0000205f, 0x0000205f, 0x00003000, 0x00003000,
|
||||
0x00002028, 0x00002028, 0x00002029, 0x00002029,
|
||||
0x00000000, 0x0000001f, 0x0000007f, 0x0000009f,
|
||||
0x000000ad, 0x000000ad, 0x00000600, 0x00000605,
|
||||
0x0000061c, 0x0000061c, 0x000006dd, 0x000006dd,
|
||||
0x0000070f, 0x0000070f, 0x000008e2, 0x000008e2,
|
||||
0x0000180e, 0x0000180e, 0x0000200b, 0x0000200f,
|
||||
0x0000202a, 0x0000202e, 0x00002060, 0x00002064,
|
||||
0x00002066, 0x0000206f, 0x0000feff, 0x0000feff,
|
||||
0x0000fff9, 0x0000fffb, 0x000110bd, 0x000110bd,
|
||||
0x000110cd, 0x000110cd, 0x00013430, 0x00013438,
|
||||
0x0001bca0, 0x0001bca3, 0x0001d173, 0x0001d17a,
|
||||
0x000e0001, 0x000e0001, 0x000e0020, 0x000e007f,
|
||||
0x0000d800, 0x0000dfff, 0x0000e000, 0x0000f8ff,
|
||||
0x000f0000, 0x000ffffd, 0x00100000, 0x0010fffd,
|
||||
0x00000041, 0x0000005a, 0x000000c0, 0x000000d6,
|
||||
@ -2143,7 +2132,18 @@ static const unsigned int _ucprop_ranges[] = {
|
||||
0x0001ee79, 0x0001ee7c, 0x0001ee7e, 0x0001ee7e,
|
||||
0x0001ee80, 0x0001ee89, 0x0001ee8b, 0x0001ee9b,
|
||||
0x0001eea1, 0x0001eea3, 0x0001eea5, 0x0001eea9,
|
||||
0x0001eeab, 0x0001eebb, 0x00000021, 0x00000023,
|
||||
0x0001eeab, 0x0001eebb, 0x00000000, 0x0000001f,
|
||||
0x0000007f, 0x0000009f, 0x000000ad, 0x000000ad,
|
||||
0x00000600, 0x00000605, 0x0000061c, 0x0000061c,
|
||||
0x000006dd, 0x000006dd, 0x0000070f, 0x0000070f,
|
||||
0x000008e2, 0x000008e2, 0x0000180e, 0x0000180e,
|
||||
0x0000200b, 0x0000200f, 0x0000202a, 0x0000202e,
|
||||
0x00002060, 0x00002064, 0x00002066, 0x0000206f,
|
||||
0x0000feff, 0x0000feff, 0x0000fff9, 0x0000fffb,
|
||||
0x000110bd, 0x000110bd, 0x000110cd, 0x000110cd,
|
||||
0x00013430, 0x00013438, 0x0001bca0, 0x0001bca3,
|
||||
0x0001d173, 0x0001d17a, 0x000e0001, 0x000e0001,
|
||||
0x000e0020, 0x000e007f, 0x00000021, 0x00000023,
|
||||
0x00000025, 0x0000002a, 0x0000002c, 0x0000002f,
|
||||
0x0000003a, 0x0000003b, 0x0000003f, 0x00000040,
|
||||
0x0000005b, 0x0000005d, 0x0000005f, 0x0000005f,
|
||||
|
Loading…
Reference in New Issue
Block a user