mirror of
https://github.com/php/php-src.git
synced 2024-11-24 10:24:11 +08:00
Upgraded bundled PCRE to version 8.02.
This commit is contained in:
parent
71ec12cc78
commit
6e92347ddf
2
NEWS
2
NEWS
@ -1,6 +1,8 @@
|
||||
PHP NEWS
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
?? ??? 201?, PHP 5.3.99
|
||||
- Upgraded bundled PCRE to version 8.02. (Ilia)
|
||||
|
||||
- Added Tokyo Cabinet abstract DB support to ext/dba. (Michael Maclean)
|
||||
- Added Jenkins's one-at-a-time hash support to ext/hash. (Martin Jansen)
|
||||
- Added FNV-1 hash support to ext/hash. (Michael Maclean)
|
||||
|
@ -1,6 +1,71 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 8.02 19-Mar-2010
|
||||
------------------------
|
||||
|
||||
1. The Unicode data tables have been updated to Unicode 5.2.0.
|
||||
|
||||
2. Added the option --libs-cpp to pcre-config, but only when C++ support is
|
||||
configured.
|
||||
|
||||
3. Updated the licensing terms in the pcregexp.pas file, as agreed with the
|
||||
original author of that file, following a query about its status.
|
||||
|
||||
4. On systems that do not have stdint.h (e.g. Solaris), check for and include
|
||||
inttypes.h instead. This fixes a bug that was introduced by change 8.01/8.
|
||||
|
||||
5. A pattern such as (?&t)*+(?(DEFINE)(?<t>.)) which has a possessive
|
||||
quantifier applied to a forward-referencing subroutine call, could compile
|
||||
incorrect code or give the error "internal error: previously-checked
|
||||
referenced subpattern not found".
|
||||
|
||||
6. Both MS Visual Studio and Symbian OS have problems with initializing
|
||||
variables to point to external functions. For these systems, therefore,
|
||||
pcre_malloc etc. are now initialized to local functions that call the
|
||||
relevant global functions.
|
||||
|
||||
7. There were two entries missing in the vectors called coptable and poptable
|
||||
in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors.
|
||||
I've fixed the data, and added a kludgy way of testing at compile time that
|
||||
the lengths are correct (equal to the number of opcodes).
|
||||
|
||||
8. Following on from 7, I added a similar kludge to check the length of the
|
||||
eint vector in pcreposix.c.
|
||||
|
||||
9. Error texts for pcre_compile() are held as one long string to avoid too
|
||||
much relocation at load time. To find a text, the string is searched,
|
||||
counting zeros. There was no check for running off the end of the string,
|
||||
which could happen if a new error number was added without updating the
|
||||
string.
|
||||
|
||||
10. \K gave a compile-time error if it appeared in a lookbehind assersion.
|
||||
|
||||
11. \K was not working if it appeared in an atomic group or in a group that
|
||||
was called as a "subroutine", or in an assertion. Perl 5.11 documents that
|
||||
\K is "not well defined" if used in an assertion. PCRE now accepts it if
|
||||
the assertion is positive, but not if it is negative.
|
||||
|
||||
12. Change 11 fortuitously reduced the size of the stack frame used in the
|
||||
"match()" function of pcre_exec.c by one pointer. Forthcoming
|
||||
implementation of support for (*MARK) will need an extra pointer on the
|
||||
stack; I have reserved it now, so that the stack frame size does not
|
||||
decrease.
|
||||
|
||||
13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other
|
||||
item in branch that calls a recursion is a subroutine call - as in the
|
||||
second branch in the above example - was incorrectly given the compile-
|
||||
time error "recursive call could loop indefinitely" because pcre_compile()
|
||||
was not correctly checking the subroutine for matching a non-empty string.
|
||||
|
||||
14. The checks for overrunning compiling workspace could trigger after an
|
||||
overrun had occurred. This is a "should never occur" error, but it can be
|
||||
triggered by pathological patterns such as hundreds of nested parentheses.
|
||||
The checks now trigger 100 bytes before the end of the workspace.
|
||||
|
||||
15. Fix typo in configure.ac: "srtoq" should be "strtoq".
|
||||
|
||||
|
||||
Version 8.01 19-Jan-2010
|
||||
------------------------
|
||||
|
||||
|
@ -1,6 +1,12 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 8.02 19-Mar-2010
|
||||
------------------------
|
||||
|
||||
Another bug-fix release.
|
||||
|
||||
|
||||
Release 8.01 19-Jan-2010
|
||||
------------------------
|
||||
|
||||
|
@ -148,12 +148,12 @@ them both to 0; an emulation function will be used. */
|
||||
/* #undef HAVE_STRTOIMAX */
|
||||
|
||||
/* Define to 1 if you have `strtoll'. */
|
||||
#ifndef HAVE_STRTOLL
|
||||
#define HAVE_STRTOLL 1
|
||||
#endif
|
||||
/* #undef HAVE_STRTOLL */
|
||||
|
||||
/* Define to 1 if you have `strtoq'. */
|
||||
/* #undef HAVE_STRTOQ */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
@ -271,13 +271,13 @@ them both to 0; an emulation function will be used. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 8.01"
|
||||
#define PACKAGE_STRING "PCRE 8.02"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "8.01"
|
||||
#define PACKAGE_VERSION "8.02"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
@ -333,7 +333,7 @@ them both to 0; an emulation function will be used. */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "8.01"
|
||||
#define VERSION "8.02"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
|
@ -29,7 +29,7 @@ INTRODUCTION
|
||||
5.10, including support for UTF-8 encoded strings and Unicode general
|
||||
category properties. However, UTF-8 and Unicode support has to be
|
||||
explicitly enabled; it is not the default. The Unicode tables corre-
|
||||
spond to Unicode release 5.1.
|
||||
spond to Unicode release 5.2.0.
|
||||
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
alternative function that matches the same compiled patterns in a dif-
|
||||
@ -263,8 +263,8 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 28 September 2009
|
||||
Copyright (c) 1997-2009 University of Cambridge.
|
||||
Last updated: 01 March 2010
|
||||
Copyright (c) 1997-2010 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -3488,15 +3488,20 @@ BACKSLASH
|
||||
Those that are not part of an identified script are lumped together as
|
||||
"Common". The current list of scripts is:
|
||||
|
||||
Arabic, Armenian, Balinese, Bengali, Bopomofo, Braille, Buginese,
|
||||
Buhid, Canadian_Aboriginal, Cherokee, Common, Coptic, Cuneiform,
|
||||
Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian, Glagolitic,
|
||||
Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hira-
|
||||
gana, Inherited, Kannada, Katakana, Kharoshthi, Khmer, Lao, Latin,
|
||||
Limbu, Linear_B, Malayalam, Mongolian, Myanmar, New_Tai_Lue, Nko,
|
||||
Ogham, Old_Italic, Old_Persian, Oriya, Osmanya, Phags_Pa, Phoenician,
|
||||
Runic, Shavian, Sinhala, Syloti_Nagri, Syriac, Tagalog, Tagbanwa,
|
||||
Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh, Ugaritic, Yi.
|
||||
Arabic, Armenian, Avestan, Balinese, Bamum, Bengali, Bopomofo, Braille,
|
||||
Buginese, Buhid, Canadian_Aboriginal, Carian, Cham, Cherokee, Common,
|
||||
Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, Devanagari, Egyp-
|
||||
tian_Hieroglyphs, Ethiopic, Georgian, Glagolitic, Gothic, Greek,
|
||||
Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hiragana, Impe-
|
||||
rial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian,
|
||||
Javanese, Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao,
|
||||
Latin, Lepcha, Limbu, Linear_B, Lisu, Lycian, Lydian, Malayalam,
|
||||
Meetei_Mayek, Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Old_Italic,
|
||||
Old_Persian, Old_South_Arabian, Old_Turkic, Ol_Chiki, Oriya, Osmanya,
|
||||
Phags_Pa, Phoenician, Rejang, Runic, Samaritan, Saurashtra, Shavian,
|
||||
Sinhala, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le,
|
||||
Tai_Tham, Tai_Viet, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh,
|
||||
Ugaritic, Vai, Yi.
|
||||
|
||||
Each character has exactly one general category property, specified by
|
||||
a two-letter abbreviation. For compatibility with Perl, negation can be
|
||||
@ -3614,6 +3619,10 @@ BACKSLASH
|
||||
|
||||
matches "foobar", the first substring is still set to "foo".
|
||||
|
||||
Perl documents that the use of \K within assertions is "not well
|
||||
defined". In PCRE, \K is acted upon when it occurs inside positive
|
||||
assertions, but is ignored in negative assertions.
|
||||
|
||||
Simple assertions
|
||||
|
||||
The final use of backslash is for certain simple assertions. An asser-
|
||||
@ -5251,7 +5260,7 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 11 January 2010
|
||||
Last updated: 06 March 2010
|
||||
Copyright (c) 1997-2010 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
@ -5363,16 +5372,19 @@ GENERAL CATEGORY PROPERTY CODES FOR \p and \P
|
||||
|
||||
SCRIPT NAMES FOR \p AND \P
|
||||
|
||||
Arabic, Armenian, Balinese, Bengali, Bopomofo, Braille, Buginese,
|
||||
Buhid, Canadian_Aboriginal, Carian, Cham, Cherokee, Common, Coptic, Cu-
|
||||
neiform, Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian,
|
||||
Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul, Hanunoo,
|
||||
Hebrew, Hiragana, Inherited, Kannada, Katakana, Kayah_Li, Kharoshthi,
|
||||
Khmer, Lao, Latin, Lepcha, Limbu, Linear_B, Lycian, Lydian, Malayalam,
|
||||
Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Old_Italic, Old_Persian,
|
||||
Ol_Chiki, Oriya, Osmanya, Phags_Pa, Phoenician, Rejang, Runic, Saurash-
|
||||
tra, Shavian, Sinhala, Sudanese, Syloti_Nagri, Syriac, Tagalog, Tag-
|
||||
banwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh,
|
||||
Arabic, Armenian, Avestan, Balinese, Bamum, Bengali, Bopomofo, Braille,
|
||||
Buginese, Buhid, Canadian_Aboriginal, Carian, Cham, Cherokee, Common,
|
||||
Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, Devanagari, Egyp-
|
||||
tian_Hieroglyphs, Ethiopic, Georgian, Glagolitic, Gothic, Greek,
|
||||
Gujarati, Gurmukhi, Han, Hangul, Hanunoo, Hebrew, Hiragana, Impe-
|
||||
rial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian,
|
||||
Javanese, Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao,
|
||||
Latin, Lepcha, Limbu, Linear_B, Lisu, Lycian, Lydian, Malayalam,
|
||||
Meetei_Mayek, Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Old_Italic,
|
||||
Old_Persian, Old_South_Arabian, Old_Turkic, Ol_Chiki, Oriya, Osmanya,
|
||||
Phags_Pa, Phoenician, Rejang, Runic, Samaritan, Saurashtra, Shavian,
|
||||
Sinhala, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le,
|
||||
Tai_Tham, Tai_Viet, Tamil, Telugu, Thaana, Thai, Tibetan, Tifinagh,
|
||||
Ugaritic, Vai, Yi.
|
||||
|
||||
|
||||
@ -5604,8 +5616,8 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 11 April 2009
|
||||
Copyright (c) 1997-2009 University of Cambridge.
|
||||
Last updated: 01 March 2010
|
||||
Copyright (c) 1997-2010 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -6129,14 +6141,14 @@ PCRE PERFORMANCE
|
||||
can affect both of them.
|
||||
|
||||
|
||||
MEMORY USAGE
|
||||
COMPILED PATTERN MEMORY USAGE
|
||||
|
||||
Patterns are compiled by PCRE into a reasonably efficient byte code, so
|
||||
that most simple patterns do not use much memory. However, there is one
|
||||
case where memory usage can be unexpectedly large. When a parenthesized
|
||||
subpattern has a quantifier with a minimum greater than 1 and/or a lim-
|
||||
ited maximum, the whole subpattern is repeated in the compiled code.
|
||||
For example, the pattern
|
||||
case where the memory usage of a compiled pattern can be unexpectedly
|
||||
large. If a parenthesized subpattern has a quantifier with a minimum
|
||||
greater than 1 and/or a limited maximum, the whole subpattern is
|
||||
repeated in the compiled code. For example, the pattern
|
||||
|
||||
(abc|def){2,4}
|
||||
|
||||
@ -6178,6 +6190,16 @@ MEMORY USAGE
|
||||
otherwise handle.
|
||||
|
||||
|
||||
STACK USAGE AT RUN TIME
|
||||
|
||||
When pcre_exec() is used for matching, certain kinds of pattern can
|
||||
cause it to use large amounts of the process stack. In some environ-
|
||||
ments the default process stack is quite small, and if it runs out the
|
||||
result is often SIGSEGV. This issue is probably the most frequently
|
||||
raised problem with PCRE. Rewriting your pattern can often help. The
|
||||
pcrestack documentation discusses this issue in detail.
|
||||
|
||||
|
||||
PROCESSING TIME
|
||||
|
||||
Certain items in regular expression patterns are processed more effi-
|
||||
@ -6260,8 +6282,8 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 06 March 2007
|
||||
Copyright (c) 1997-2007 University of Cambridge.
|
||||
Last updated: 07 March 2010
|
||||
Copyright (c) 1997-2010 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 01
|
||||
#define PCRE_MINOR 02
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2010-01-19
|
||||
#define PCRE_DATE 2010-03-19
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
|
@ -90,6 +90,11 @@ is 4 there is plenty of room. */
|
||||
|
||||
#define COMPILE_WORK_SIZE (4096)
|
||||
|
||||
/* The overrun tests check for a slightly smaller size so that they detect the
|
||||
overrun before it actually does run off the end of the data block. */
|
||||
|
||||
#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
|
||||
|
||||
|
||||
/* Table for handling escaped characters in the range '0'-'z'. Positive returns
|
||||
are simple data values; negative values are for special things like \d and so
|
||||
@ -261,7 +266,11 @@ the number of relocations needed when a shared library is loaded dynamically,
|
||||
it is now one long string. We cannot use a table of offsets, because the
|
||||
lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
|
||||
simply count through to the one we want - this isn't a performance issue
|
||||
because these strings are used only when there is a compilation error. */
|
||||
because these strings are used only when there is a compilation error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const char error_texts[] =
|
||||
"no error\0"
|
||||
@ -342,8 +351,7 @@ static const char error_texts[] =
|
||||
"digit expected after (?+\0"
|
||||
"] is an invalid data character in JavaScript compatibility mode\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed";
|
||||
|
||||
"different names for subpatterns of the same number are not allowed\0";
|
||||
|
||||
/* Table to identify digits and hex digits. This is used when compiling
|
||||
patterns. Note that the tables in chartables are dependent on the locale, and
|
||||
@ -501,7 +509,11 @@ static const char *
|
||||
find_error_text(int n)
|
||||
{
|
||||
const char *s = error_texts;
|
||||
for (; n > 0; n--) while (*s++ != 0) {};
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*s++ != 0) {};
|
||||
if (*s == 0) return "Error text not found (please report)";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1441,6 +1453,7 @@ for (;;)
|
||||
case OP_CALLOUT:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_SET_SOM:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_CIRC:
|
||||
@ -1775,12 +1788,14 @@ Arguments:
|
||||
code points to start of search
|
||||
endcode points to where to stop
|
||||
utf8 TRUE if in UTF8 mode
|
||||
cd contains pointers to tables etc.
|
||||
|
||||
Returns: TRUE if what is matched could be empty
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
|
||||
could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
|
||||
compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
|
||||
@ -1811,6 +1826,28 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For a recursion/subroutine call, if its end has been reached, which
|
||||
implies a subroutine call, we can scan it. */
|
||||
|
||||
if (c == OP_RECURSE)
|
||||
{
|
||||
BOOL empty_branch = FALSE;
|
||||
const uschar *scode = cd->start_code + GET(code, 1);
|
||||
if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
|
||||
do
|
||||
{
|
||||
if (could_be_empty_branch(scode, endcode, utf8, cd))
|
||||
{
|
||||
empty_branch = TRUE;
|
||||
break;
|
||||
}
|
||||
scode += GET(scode, 1);
|
||||
}
|
||||
while (*scode == OP_ALT);
|
||||
if (!empty_branch) return FALSE; /* All branches are non-empty */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other groups, scan the branches. */
|
||||
|
||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
|
||||
@ -1829,7 +1866,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
empty_branch = FALSE;
|
||||
do
|
||||
{
|
||||
if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
|
||||
if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
|
||||
empty_branch = TRUE;
|
||||
code += GET(code, 1);
|
||||
}
|
||||
@ -1963,6 +2000,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* None of the remaining opcodes are required to match a character. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1985,17 +2027,18 @@ Arguments:
|
||||
endcode points to where to stop (current RECURSE item)
|
||||
bcptr points to the chain of current (unclosed) branch starts
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd pointers to tables etc
|
||||
|
||||
Returns: TRUE if what is matched could be empty
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
|
||||
BOOL utf8)
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
while (bcptr != NULL && bcptr->current_branch >= code)
|
||||
{
|
||||
if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
|
||||
if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
|
||||
return FALSE;
|
||||
bcptr = bcptr->outer;
|
||||
}
|
||||
@ -2720,7 +2763,7 @@ for (;; ptr++)
|
||||
#ifdef PCRE_DEBUG
|
||||
if (code > cd->hwm) cd->hwm = code; /* High water info */
|
||||
#endif
|
||||
if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
|
||||
if (code > cd->start_workspace + WORK_SIZE_CHECK) /* Check for overrun */
|
||||
{
|
||||
*errorcodeptr = ERR52;
|
||||
goto FAILED;
|
||||
@ -2769,7 +2812,7 @@ for (;; ptr++)
|
||||
/* In the real compile phase, just check the workspace used by the forward
|
||||
reference list. */
|
||||
|
||||
else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)
|
||||
else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)
|
||||
{
|
||||
*errorcodeptr = ERR52;
|
||||
goto FAILED;
|
||||
@ -4353,7 +4396,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
uschar *scode = bracode;
|
||||
do
|
||||
{
|
||||
if (could_be_empty_branch(scode, ketcode, utf8))
|
||||
if (could_be_empty_branch(scode, ketcode, utf8, cd))
|
||||
{
|
||||
*bracode += OP_SBRA - OP_BRA;
|
||||
break;
|
||||
@ -4428,7 +4471,12 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
|
||||
case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break;
|
||||
|
||||
/* Because we are moving code along, we must ensure that any
|
||||
pending recursive references are updated. */
|
||||
|
||||
default:
|
||||
*code = OP_END;
|
||||
adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
|
||||
memmove(tempcode + 1+LINK_SIZE, tempcode, len);
|
||||
code += 1 + LINK_SIZE;
|
||||
len += 1 + LINK_SIZE;
|
||||
@ -5147,6 +5195,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* Fudge the value of "called" so that when it is inserted as an
|
||||
offset below, what it actually inserted is the reference number
|
||||
of the group. */
|
||||
|
||||
called = cd->start_code + recno;
|
||||
PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
|
||||
}
|
||||
@ -5156,7 +5209,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
recursion that could loop for ever, and diagnose that case. */
|
||||
|
||||
else if (GET(called, 1) == 0 &&
|
||||
could_be_empty(called, code, bcptr, utf8))
|
||||
could_be_empty(called, code, bcptr, utf8, cd))
|
||||
{
|
||||
*errorcodeptr = ERR40;
|
||||
goto FAILED;
|
||||
@ -6802,7 +6855,6 @@ if (reqbyte >= 0 &&
|
||||
case when building a production library. */
|
||||
|
||||
#ifdef PCRE_DEBUG
|
||||
|
||||
printf("Length = %d top_bracket = %d top_backref = %d\n",
|
||||
length, re->top_bracket, re->top_backref);
|
||||
|
||||
|
@ -247,7 +247,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
||||
|
||||
/* These versions of the macros use the stack, as normal. There are debugging
|
||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||
actuall used in this definition. */
|
||||
actually used in this definition. */
|
||||
|
||||
#ifndef NO_RECURSE
|
||||
#define REGISTER register
|
||||
@ -256,7 +256,7 @@ actuall used in this definition. */
|
||||
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
|
||||
{ \
|
||||
printf("match() called in line %d\n", __LINE__); \
|
||||
rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
|
||||
rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
|
||||
printf("to line %d\n", __LINE__); \
|
||||
}
|
||||
#define RRETURN(ra) \
|
||||
@ -266,7 +266,7 @@ actuall used in this definition. */
|
||||
}
|
||||
#else
|
||||
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
|
||||
rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
|
||||
rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
|
||||
#define RRETURN(ra) return ra
|
||||
#endif
|
||||
|
||||
@ -286,6 +286,7 @@ argument of match(), which never changes. */
|
||||
newframe->Xeptr = ra;\
|
||||
newframe->Xecode = rb;\
|
||||
newframe->Xmstart = mstart;\
|
||||
newframe->Xmarkptr = markptr;\
|
||||
newframe->Xoffset_top = rc;\
|
||||
newframe->Xims = re;\
|
||||
newframe->Xeptrb = rf;\
|
||||
@ -323,6 +324,7 @@ typedef struct heapframe {
|
||||
USPTR Xeptr;
|
||||
const uschar *Xecode;
|
||||
USPTR Xmstart;
|
||||
USPTR Xmarkptr;
|
||||
int Xoffset_top;
|
||||
long int Xims;
|
||||
eptrblock *Xeptrb;
|
||||
@ -430,6 +432,7 @@ Arguments:
|
||||
ecode pointer to current position in compiled code
|
||||
mstart pointer to the current match start position (can be modified
|
||||
by encountering \K)
|
||||
markptr pointer to the most recent MARK name, or NULL
|
||||
offset_top current top pointer
|
||||
md pointer to "static" info for the match
|
||||
ims current /i, /m, and /s options
|
||||
@ -448,9 +451,9 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
|
||||
*/
|
||||
|
||||
static int
|
||||
match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
|
||||
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
|
||||
int flags, unsigned int rdepth)
|
||||
match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
|
||||
markptr, int offset_top, match_data *md, unsigned long int ims,
|
||||
eptrblock *eptrb, int flags, unsigned int rdepth)
|
||||
{
|
||||
/* These variables do not need to be preserved over recursion in this function,
|
||||
so they can be ordinary variables in all cases. Mark some of them with
|
||||
@ -478,6 +481,7 @@ frame->Xprevframe = NULL; /* Marks the top level */
|
||||
frame->Xeptr = eptr;
|
||||
frame->Xecode = ecode;
|
||||
frame->Xmstart = mstart;
|
||||
frame->Xmarkptr = markptr;
|
||||
frame->Xoffset_top = offset_top;
|
||||
frame->Xims = ims;
|
||||
frame->Xeptrb = eptrb;
|
||||
@ -493,6 +497,7 @@ HEAP_RECURSE:
|
||||
#define eptr frame->Xeptr
|
||||
#define ecode frame->Xecode
|
||||
#define mstart frame->Xmstart
|
||||
#define markptr frame->Xmarkptr
|
||||
#define offset_top frame->Xoffset_top
|
||||
#define ims frame->Xims
|
||||
#define eptrb frame->Xeptrb
|
||||
@ -1068,7 +1073,6 @@ for (;;)
|
||||
memmove(md->offset_vector, rec->offset_save,
|
||||
rec->saved_max * sizeof(int));
|
||||
offset_top = rec->save_offset_top;
|
||||
mstart = rec->save_start;
|
||||
ims = original_ims;
|
||||
ecode = rec->after_call;
|
||||
break;
|
||||
@ -1112,7 +1116,11 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
|
||||
RM4);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc == MATCH_MATCH)
|
||||
{
|
||||
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||
break;
|
||||
}
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
@ -1265,9 +1273,7 @@ for (;;)
|
||||
|
||||
memcpy(new_recursive.offset_save, md->offset_vector,
|
||||
new_recursive.saved_max * sizeof(int));
|
||||
new_recursive.save_start = mstart;
|
||||
new_recursive.save_offset_top = offset_top;
|
||||
mstart = eptr;
|
||||
|
||||
/* OK, now we can do the recursion. For each top-level alternative we
|
||||
restore the offset and recursion data. */
|
||||
@ -1314,7 +1320,8 @@ for (;;)
|
||||
a move back into the brackets. Friedl calls these "atomic" subpatterns.
|
||||
Check the alternative branches in turn - the matching won't pass the KET
|
||||
for this kind of subpattern. If any one branch matches, we carry on as at
|
||||
the end of a normal bracket, leaving the subject pointer. */
|
||||
the end of a normal bracket, leaving the subject pointer, but resetting
|
||||
the start-of-match value in case it was changed by \K. */
|
||||
|
||||
case OP_ONCE:
|
||||
prev = ecode;
|
||||
@ -1323,7 +1330,11 @@ for (;;)
|
||||
do
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc == MATCH_MATCH)
|
||||
{
|
||||
mstart = md->start_match_ptr;
|
||||
break;
|
||||
}
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
@ -1442,9 +1453,10 @@ for (;;)
|
||||
}
|
||||
else saved_eptr = NULL;
|
||||
|
||||
/* If we are at the end of an assertion group, stop matching and return
|
||||
MATCH_MATCH, but record the current high water mark for use by positive
|
||||
assertions. Do this also for the "once" (atomic) groups. */
|
||||
/* If we are at the end of an assertion group or an atomic group, stop
|
||||
matching and return MATCH_MATCH, but record the current high water mark for
|
||||
use by positive assertions. We also need to record the match start in case
|
||||
it was changed by \K. */
|
||||
|
||||
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
|
||||
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
|
||||
@ -1452,6 +1464,7 @@ for (;;)
|
||||
{
|
||||
md->end_match_ptr = eptr; /* For ONCE */
|
||||
md->end_offset_top = offset_top;
|
||||
md->start_match_ptr = mstart;
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
|
||||
@ -1488,7 +1501,6 @@ for (;;)
|
||||
recursion_info *rec = md->recursive;
|
||||
DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
|
||||
md->recursive = rec->prevrec;
|
||||
mstart = rec->save_start;
|
||||
memcpy(md->offset_vector, rec->offset_save,
|
||||
rec->saved_max * sizeof(int));
|
||||
offset_top = rec->save_offset_top;
|
||||
@ -5649,7 +5661,8 @@ for(;;)
|
||||
md->start_match_ptr = start_match;
|
||||
md->start_used_ptr = start_match;
|
||||
md->match_call_count = 0;
|
||||
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
|
||||
rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
|
||||
0, 0);
|
||||
if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
|
||||
|
||||
switch(rc)
|
||||
|
@ -43,14 +43,35 @@ PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
all threads.
|
||||
|
||||
For MS Visual Studio and Symbian OS, there are problems in initializing these
|
||||
variables to non-local functions. In these cases, therefore, an indirection via
|
||||
a local function is used.
|
||||
|
||||
Also, when compiling for Virtual Pascal, things are done differently, and
|
||||
global variables are not used. */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
return malloc(aSize);
|
||||
}
|
||||
static void LocalPcreFree(void* aPtr)
|
||||
{
|
||||
free(aPtr);
|
||||
}
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
|
@ -188,15 +188,14 @@ preprocessor time in standard C environments. */
|
||||
large integers. If a 64-bit integer type is available, we can use that.
|
||||
Otherwise we have to cast to double, which of course requires floating point
|
||||
arithmetic. Handle this by defining a macro for the appropriate type. If
|
||||
stdint.h is available, include it; it may define INT64_MAX. The macro int64_t
|
||||
may be set by "configure". */
|
||||
stdint.h is available, include it; it may define INT64_MAX. Systems that do not
|
||||
have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
|
||||
by "configure". */
|
||||
|
||||
#if HAVE_STDINT_H
|
||||
# ifdef PHP_WIN32
|
||||
# include "win32/php_stdint.h"
|
||||
# else
|
||||
# include <stdint.h>
|
||||
# endif
|
||||
#include <stdint.h>
|
||||
#elif HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#if defined INT64_MAX || defined int64_t
|
||||
@ -1392,7 +1391,13 @@ enum {
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO /* 114 */
|
||||
OP_SKIPZERO, /* 114 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
some in the past. */
|
||||
|
||||
OP_TABLE_LENGTH
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
@ -1440,8 +1445,9 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */ \
|
||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
3, 3, /* \P, \p */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, /* \X */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Charnc - the minimum length */ \
|
||||
@ -1496,8 +1502,9 @@ condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
||||
/* Error code numbers. They are given names so that they can more easily be
|
||||
tracked. */
|
||||
/* Compile time error code numbers. They are given names so that they can more
|
||||
easily be tracked. When a new number is added, the table called eint in
|
||||
pcreposix.c must be updated. */
|
||||
|
||||
enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
||||
@ -1505,7 +1512,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -1610,7 +1617,6 @@ typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
USPTR save_start; /* Old value of mstart */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
int save_offset_top; /* Current value of offset_top */
|
||||
|
@ -190,6 +190,25 @@ for(;;)
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* These cases are never obeyed. This is a fudge that causes a compile-
|
||||
time error if the vectors OP_names or _pcre_OP_lengths, which are indexed
|
||||
by opcode, are not the correct length. It seems to be the only way to do
|
||||
such a check at compile time, as the sizeof() operator does not work in
|
||||
the C preprocessor. We do this while compiling pcretest, because that
|
||||
#includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this
|
||||
when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
|
||||
know the size of _pcre_OP_lengths. */
|
||||
|
||||
#ifdef COMPILING_PCRETEST
|
||||
case OP_TABLE_LENGTH:
|
||||
case OP_TABLE_LENGTH +
|
||||
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
|
||||
(sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)):
|
||||
break;
|
||||
#endif
|
||||
/* ========================================================================== */
|
||||
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
|
@ -118,7 +118,9 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
@ -141,6 +143,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
@ -153,7 +156,12 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
|
||||
#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
|
||||
#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
|
||||
#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
|
||||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
@ -166,6 +174,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
#define STRING_Lm0 STR_L STR_m "\0"
|
||||
#define STRING_Lo0 STR_L STR_o "\0"
|
||||
@ -177,6 +186,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
@ -190,6 +200,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
@ -205,6 +217,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
|
||||
#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
|
||||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
@ -218,6 +231,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
|
||||
#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
|
||||
#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
|
||||
#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
|
||||
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
@ -236,7 +251,9 @@ const char _pcre_utt_names[] =
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
STRING_Braille0
|
||||
@ -259,6 +276,7 @@ const char _pcre_utt_names[] =
|
||||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
@ -271,7 +289,12 @@ const char _pcre_utt_names[] =
|
||||
STRING_Hanunoo0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
STRING_Inherited0
|
||||
STRING_Inscriptional_Pahlavi0
|
||||
STRING_Inscriptional_Parthian0
|
||||
STRING_Javanese0
|
||||
STRING_Kaithi0
|
||||
STRING_Kannada0
|
||||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
@ -284,6 +307,7 @@ const char _pcre_utt_names[] =
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
STRING_Lm0
|
||||
STRING_Lo0
|
||||
@ -295,6 +319,7 @@ const char _pcre_utt_names[] =
|
||||
STRING_Malayalam0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Mn0
|
||||
STRING_Mongolian0
|
||||
STRING_Myanmar0
|
||||
@ -308,6 +333,8 @@ const char _pcre_utt_names[] =
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
@ -323,6 +350,7 @@ const char _pcre_utt_names[] =
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
STRING_Samaritan0
|
||||
STRING_Saurashtra0
|
||||
STRING_Sc0
|
||||
STRING_Shavian0
|
||||
@ -336,6 +364,8 @@ const char _pcre_utt_names[] =
|
||||
STRING_Tagalog0
|
||||
STRING_Tagbanwa0
|
||||
STRING_Tai_Le0
|
||||
STRING_Tai_Tham0
|
||||
STRING_Tai_Viet0
|
||||
STRING_Tamil0
|
||||
STRING_Telugu0
|
||||
STRING_Thaana0
|
||||
@ -354,119 +384,134 @@ const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_SC, ucp_Carian },
|
||||
{ 98, PT_PC, ucp_Cc },
|
||||
{ 101, PT_PC, ucp_Cf },
|
||||
{ 104, PT_SC, ucp_Cham },
|
||||
{ 109, PT_SC, ucp_Cherokee },
|
||||
{ 118, PT_PC, ucp_Cn },
|
||||
{ 121, PT_PC, ucp_Co },
|
||||
{ 124, PT_SC, ucp_Common },
|
||||
{ 131, PT_SC, ucp_Coptic },
|
||||
{ 138, PT_PC, ucp_Cs },
|
||||
{ 141, PT_SC, ucp_Cuneiform },
|
||||
{ 151, PT_SC, ucp_Cypriot },
|
||||
{ 159, PT_SC, ucp_Cyrillic },
|
||||
{ 168, PT_SC, ucp_Deseret },
|
||||
{ 176, PT_SC, ucp_Devanagari },
|
||||
{ 187, PT_SC, ucp_Ethiopic },
|
||||
{ 196, PT_SC, ucp_Georgian },
|
||||
{ 205, PT_SC, ucp_Glagolitic },
|
||||
{ 216, PT_SC, ucp_Gothic },
|
||||
{ 223, PT_SC, ucp_Greek },
|
||||
{ 229, PT_SC, ucp_Gujarati },
|
||||
{ 238, PT_SC, ucp_Gurmukhi },
|
||||
{ 247, PT_SC, ucp_Han },
|
||||
{ 251, PT_SC, ucp_Hangul },
|
||||
{ 258, PT_SC, ucp_Hanunoo },
|
||||
{ 266, PT_SC, ucp_Hebrew },
|
||||
{ 273, PT_SC, ucp_Hiragana },
|
||||
{ 282, PT_SC, ucp_Inherited },
|
||||
{ 292, PT_SC, ucp_Kannada },
|
||||
{ 300, PT_SC, ucp_Katakana },
|
||||
{ 309, PT_SC, ucp_Kayah_Li },
|
||||
{ 318, PT_SC, ucp_Kharoshthi },
|
||||
{ 329, PT_SC, ucp_Khmer },
|
||||
{ 335, PT_GC, ucp_L },
|
||||
{ 337, PT_LAMP, 0 },
|
||||
{ 340, PT_SC, ucp_Lao },
|
||||
{ 344, PT_SC, ucp_Latin },
|
||||
{ 350, PT_SC, ucp_Lepcha },
|
||||
{ 357, PT_SC, ucp_Limbu },
|
||||
{ 363, PT_SC, ucp_Linear_B },
|
||||
{ 372, PT_PC, ucp_Ll },
|
||||
{ 375, PT_PC, ucp_Lm },
|
||||
{ 378, PT_PC, ucp_Lo },
|
||||
{ 381, PT_PC, ucp_Lt },
|
||||
{ 384, PT_PC, ucp_Lu },
|
||||
{ 387, PT_SC, ucp_Lycian },
|
||||
{ 394, PT_SC, ucp_Lydian },
|
||||
{ 401, PT_GC, ucp_M },
|
||||
{ 403, PT_SC, ucp_Malayalam },
|
||||
{ 413, PT_PC, ucp_Mc },
|
||||
{ 416, PT_PC, ucp_Me },
|
||||
{ 419, PT_PC, ucp_Mn },
|
||||
{ 422, PT_SC, ucp_Mongolian },
|
||||
{ 432, PT_SC, ucp_Myanmar },
|
||||
{ 440, PT_GC, ucp_N },
|
||||
{ 442, PT_PC, ucp_Nd },
|
||||
{ 445, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 457, PT_SC, ucp_Nko },
|
||||
{ 461, PT_PC, ucp_Nl },
|
||||
{ 464, PT_PC, ucp_No },
|
||||
{ 467, PT_SC, ucp_Ogham },
|
||||
{ 473, PT_SC, ucp_Ol_Chiki },
|
||||
{ 482, PT_SC, ucp_Old_Italic },
|
||||
{ 493, PT_SC, ucp_Old_Persian },
|
||||
{ 505, PT_SC, ucp_Oriya },
|
||||
{ 511, PT_SC, ucp_Osmanya },
|
||||
{ 519, PT_GC, ucp_P },
|
||||
{ 521, PT_PC, ucp_Pc },
|
||||
{ 524, PT_PC, ucp_Pd },
|
||||
{ 527, PT_PC, ucp_Pe },
|
||||
{ 530, PT_PC, ucp_Pf },
|
||||
{ 533, PT_SC, ucp_Phags_Pa },
|
||||
{ 542, PT_SC, ucp_Phoenician },
|
||||
{ 553, PT_PC, ucp_Pi },
|
||||
{ 556, PT_PC, ucp_Po },
|
||||
{ 559, PT_PC, ucp_Ps },
|
||||
{ 562, PT_SC, ucp_Rejang },
|
||||
{ 569, PT_SC, ucp_Runic },
|
||||
{ 575, PT_GC, ucp_S },
|
||||
{ 577, PT_SC, ucp_Saurashtra },
|
||||
{ 588, PT_PC, ucp_Sc },
|
||||
{ 591, PT_SC, ucp_Shavian },
|
||||
{ 599, PT_SC, ucp_Sinhala },
|
||||
{ 607, PT_PC, ucp_Sk },
|
||||
{ 610, PT_PC, ucp_Sm },
|
||||
{ 613, PT_PC, ucp_So },
|
||||
{ 616, PT_SC, ucp_Sundanese },
|
||||
{ 626, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 639, PT_SC, ucp_Syriac },
|
||||
{ 646, PT_SC, ucp_Tagalog },
|
||||
{ 654, PT_SC, ucp_Tagbanwa },
|
||||
{ 663, PT_SC, ucp_Tai_Le },
|
||||
{ 670, PT_SC, ucp_Tamil },
|
||||
{ 676, PT_SC, ucp_Telugu },
|
||||
{ 683, PT_SC, ucp_Thaana },
|
||||
{ 690, PT_SC, ucp_Thai },
|
||||
{ 695, PT_SC, ucp_Tibetan },
|
||||
{ 703, PT_SC, ucp_Tifinagh },
|
||||
{ 712, PT_SC, ucp_Ugaritic },
|
||||
{ 721, PT_SC, ucp_Vai },
|
||||
{ 725, PT_SC, ucp_Yi },
|
||||
{ 728, PT_GC, ucp_Z },
|
||||
{ 730, PT_PC, ucp_Zl },
|
||||
{ 733, PT_PC, ucp_Zp },
|
||||
{ 736, PT_PC, ucp_Zs }
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Bengali },
|
||||
{ 51, PT_SC, ucp_Bopomofo },
|
||||
{ 60, PT_SC, ucp_Braille },
|
||||
{ 68, PT_SC, ucp_Buginese },
|
||||
{ 77, PT_SC, ucp_Buhid },
|
||||
{ 83, PT_GC, ucp_C },
|
||||
{ 85, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 105, PT_SC, ucp_Carian },
|
||||
{ 112, PT_PC, ucp_Cc },
|
||||
{ 115, PT_PC, ucp_Cf },
|
||||
{ 118, PT_SC, ucp_Cham },
|
||||
{ 123, PT_SC, ucp_Cherokee },
|
||||
{ 132, PT_PC, ucp_Cn },
|
||||
{ 135, PT_PC, ucp_Co },
|
||||
{ 138, PT_SC, ucp_Common },
|
||||
{ 145, PT_SC, ucp_Coptic },
|
||||
{ 152, PT_PC, ucp_Cs },
|
||||
{ 155, PT_SC, ucp_Cuneiform },
|
||||
{ 165, PT_SC, ucp_Cypriot },
|
||||
{ 173, PT_SC, ucp_Cyrillic },
|
||||
{ 182, PT_SC, ucp_Deseret },
|
||||
{ 190, PT_SC, ucp_Devanagari },
|
||||
{ 201, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 222, PT_SC, ucp_Ethiopic },
|
||||
{ 231, PT_SC, ucp_Georgian },
|
||||
{ 240, PT_SC, ucp_Glagolitic },
|
||||
{ 251, PT_SC, ucp_Gothic },
|
||||
{ 258, PT_SC, ucp_Greek },
|
||||
{ 264, PT_SC, ucp_Gujarati },
|
||||
{ 273, PT_SC, ucp_Gurmukhi },
|
||||
{ 282, PT_SC, ucp_Han },
|
||||
{ 286, PT_SC, ucp_Hangul },
|
||||
{ 293, PT_SC, ucp_Hanunoo },
|
||||
{ 301, PT_SC, ucp_Hebrew },
|
||||
{ 308, PT_SC, ucp_Hiragana },
|
||||
{ 317, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 334, PT_SC, ucp_Inherited },
|
||||
{ 344, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 366, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 389, PT_SC, ucp_Javanese },
|
||||
{ 398, PT_SC, ucp_Kaithi },
|
||||
{ 405, PT_SC, ucp_Kannada },
|
||||
{ 413, PT_SC, ucp_Katakana },
|
||||
{ 422, PT_SC, ucp_Kayah_Li },
|
||||
{ 431, PT_SC, ucp_Kharoshthi },
|
||||
{ 442, PT_SC, ucp_Khmer },
|
||||
{ 448, PT_GC, ucp_L },
|
||||
{ 450, PT_LAMP, 0 },
|
||||
{ 453, PT_SC, ucp_Lao },
|
||||
{ 457, PT_SC, ucp_Latin },
|
||||
{ 463, PT_SC, ucp_Lepcha },
|
||||
{ 470, PT_SC, ucp_Limbu },
|
||||
{ 476, PT_SC, ucp_Linear_B },
|
||||
{ 485, PT_SC, ucp_Lisu },
|
||||
{ 490, PT_PC, ucp_Ll },
|
||||
{ 493, PT_PC, ucp_Lm },
|
||||
{ 496, PT_PC, ucp_Lo },
|
||||
{ 499, PT_PC, ucp_Lt },
|
||||
{ 502, PT_PC, ucp_Lu },
|
||||
{ 505, PT_SC, ucp_Lycian },
|
||||
{ 512, PT_SC, ucp_Lydian },
|
||||
{ 519, PT_GC, ucp_M },
|
||||
{ 521, PT_SC, ucp_Malayalam },
|
||||
{ 531, PT_PC, ucp_Mc },
|
||||
{ 534, PT_PC, ucp_Me },
|
||||
{ 537, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 550, PT_PC, ucp_Mn },
|
||||
{ 553, PT_SC, ucp_Mongolian },
|
||||
{ 563, PT_SC, ucp_Myanmar },
|
||||
{ 571, PT_GC, ucp_N },
|
||||
{ 573, PT_PC, ucp_Nd },
|
||||
{ 576, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 588, PT_SC, ucp_Nko },
|
||||
{ 592, PT_PC, ucp_Nl },
|
||||
{ 595, PT_PC, ucp_No },
|
||||
{ 598, PT_SC, ucp_Ogham },
|
||||
{ 604, PT_SC, ucp_Ol_Chiki },
|
||||
{ 613, PT_SC, ucp_Old_Italic },
|
||||
{ 624, PT_SC, ucp_Old_Persian },
|
||||
{ 636, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 654, PT_SC, ucp_Old_Turkic },
|
||||
{ 665, PT_SC, ucp_Oriya },
|
||||
{ 671, PT_SC, ucp_Osmanya },
|
||||
{ 679, PT_GC, ucp_P },
|
||||
{ 681, PT_PC, ucp_Pc },
|
||||
{ 684, PT_PC, ucp_Pd },
|
||||
{ 687, PT_PC, ucp_Pe },
|
||||
{ 690, PT_PC, ucp_Pf },
|
||||
{ 693, PT_SC, ucp_Phags_Pa },
|
||||
{ 702, PT_SC, ucp_Phoenician },
|
||||
{ 713, PT_PC, ucp_Pi },
|
||||
{ 716, PT_PC, ucp_Po },
|
||||
{ 719, PT_PC, ucp_Ps },
|
||||
{ 722, PT_SC, ucp_Rejang },
|
||||
{ 729, PT_SC, ucp_Runic },
|
||||
{ 735, PT_GC, ucp_S },
|
||||
{ 737, PT_SC, ucp_Samaritan },
|
||||
{ 747, PT_SC, ucp_Saurashtra },
|
||||
{ 758, PT_PC, ucp_Sc },
|
||||
{ 761, PT_SC, ucp_Shavian },
|
||||
{ 769, PT_SC, ucp_Sinhala },
|
||||
{ 777, PT_PC, ucp_Sk },
|
||||
{ 780, PT_PC, ucp_Sm },
|
||||
{ 783, PT_PC, ucp_So },
|
||||
{ 786, PT_SC, ucp_Sundanese },
|
||||
{ 796, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 809, PT_SC, ucp_Syriac },
|
||||
{ 816, PT_SC, ucp_Tagalog },
|
||||
{ 824, PT_SC, ucp_Tagbanwa },
|
||||
{ 833, PT_SC, ucp_Tai_Le },
|
||||
{ 840, PT_SC, ucp_Tai_Tham },
|
||||
{ 849, PT_SC, ucp_Tai_Viet },
|
||||
{ 858, PT_SC, ucp_Tamil },
|
||||
{ 864, PT_SC, ucp_Telugu },
|
||||
{ 871, PT_SC, ucp_Thaana },
|
||||
{ 878, PT_SC, ucp_Thai },
|
||||
{ 883, PT_SC, ucp_Tibetan },
|
||||
{ 891, PT_SC, ucp_Tifinagh },
|
||||
{ 900, PT_SC, ucp_Ugaritic },
|
||||
{ 909, PT_SC, ucp_Vai },
|
||||
{ 913, PT_SC, ucp_Yi },
|
||||
{ 916, PT_GC, ucp_Z },
|
||||
{ 918, PT_PC, ucp_Zl },
|
||||
{ 921, PT_PC, ucp_Zp },
|
||||
{ 924, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -342,6 +342,8 @@ rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
|
||||
|
||||
if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
||||
|
||||
/* Successful match */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
@ -358,11 +360,23 @@ if (rc >= 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (allocated_ovector) free(ovector);
|
||||
switch(rc)
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (allocated_ovector) free(ovector);
|
||||
switch(rc)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* These cases are never obeyed. This is a fudge that causes a compile-time
|
||||
error if the vector eint, which is indexed by compile-time error number, is
|
||||
not the correct length. It seems to be the only way to do such a check at
|
||||
compile time, as the sizeof() operator does not work in the C preprocessor.
|
||||
As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
|
||||
|
||||
case 0:
|
||||
case (sizeof(eint)/sizeof(int) == ERRCOUNT):
|
||||
return REG_ASSERT;
|
||||
/* ========================================================================== */
|
||||
|
||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
case PCRE_ERROR_NULL: return REG_INVARG;
|
||||
case PCRE_ERROR_BADOPTION: return REG_INVARG;
|
||||
@ -374,7 +388,6 @@ else
|
||||
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
|
||||
default: return REG_ASSERT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcreposix.c */
|
||||
|
28
ext/pcre/pcrelib/testdata/testinput2
vendored
28
ext/pcre/pcrelib/testdata/testinput2
vendored
@ -3204,4 +3204,32 @@ a random value. /Ix
|
||||
/^(ab(c\1)d|x){2}$/BZ
|
||||
xabcxd
|
||||
|
||||
/^(?&t)*+(?(DEFINE)(?<t>.))$/BZ
|
||||
|
||||
/^(?&t)*(?(DEFINE)(?<t>.))$/BZ
|
||||
|
||||
/ -- The first four of these are not in the Perl 5.10 test because Perl
|
||||
documents that the use of \K in assertions is "not well defined". The
|
||||
last is here because Perl gives the match as "b" rather than "ab". I
|
||||
believe this to be a Perl bug. --/
|
||||
|
||||
/(?=a\Kb)ab/
|
||||
ab
|
||||
|
||||
/(?!a\Kb)ac/
|
||||
ac
|
||||
|
||||
/^abc(?<=b\Kc)d/
|
||||
abcd
|
||||
|
||||
/^abc(?<!b\Kq)d/
|
||||
abcd
|
||||
|
||||
/(?>a\Kb)z|(ab)/
|
||||
ab
|
||||
|
||||
/----------------------/
|
||||
|
||||
/(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/
|
||||
|
||||
/-- End of testinput2 --/
|
||||
|
10
ext/pcre/pcrelib/testdata/testinput6
vendored
10
ext/pcre/pcrelib/testdata/testinput6
vendored
@ -370,13 +370,6 @@
|
||||
\x{3b1}
|
||||
\x{ff5a}
|
||||
|
||||
/^\X/8
|
||||
A
|
||||
A\x{300}BC
|
||||
A\x{300}\x{301}\x{302}BC
|
||||
*** Failers
|
||||
\x{300}
|
||||
|
||||
/^[\X]/8
|
||||
X123
|
||||
*** Failers
|
||||
@ -756,4 +749,7 @@
|
||||
/[\p{Lu}\x20]+/
|
||||
\x41\x20\x50\xC2\x54\xC9\x20\x54\x4F\x44\x41\x59
|
||||
|
||||
/\p{Avestan}\p{Bamum}\p{Egyptian_Hieroglyphs}\p{Imperial_Aramaic}\p{Inscriptional_Pahlavi}\p{Inscriptional_Parthian}\p{Javanese}\p{Kaithi}\p{Lisu}\p{Meetei_Mayek}\p{Old_South_Arabian}\p{Old_Turkic}\p{Samaritan}\p{Tai_Tham}\p{Tai_Viet}/8
|
||||
\x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{0800}\x{1aad}\x{aac0}
|
||||
|
||||
/-- End of testinput6 --/
|
||||
|
71
ext/pcre/pcrelib/testdata/testoutput2
vendored
71
ext/pcre/pcrelib/testdata/testoutput2
vendored
@ -10597,4 +10597,75 @@ No match
|
||||
1: abcxd
|
||||
2: cx
|
||||
|
||||
/^(?&t)*+(?(DEFINE)(?<t>.))$/BZ
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
Once
|
||||
Brazero
|
||||
Once
|
||||
Recurse
|
||||
KetRmax
|
||||
Ket
|
||||
Cond
|
||||
Cond def
|
||||
CBra 1
|
||||
Any
|
||||
Ket
|
||||
Ket
|
||||
$
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^(?&t)*(?(DEFINE)(?<t>.))$/BZ
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
Brazero
|
||||
Once
|
||||
Recurse
|
||||
KetRmax
|
||||
Cond
|
||||
Cond def
|
||||
CBra 1
|
||||
Any
|
||||
Ket
|
||||
Ket
|
||||
$
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/ -- The first four of these are not in the Perl 5.10 test because Perl
|
||||
documents that the use of \K in assertions is "not well defined". The
|
||||
last is here because Perl gives the match as "b" rather than "ab". I
|
||||
believe this to be a Perl bug. --/
|
||||
|
||||
/(?=a\Kb)ab/
|
||||
ab
|
||||
0: b
|
||||
|
||||
/(?!a\Kb)ac/
|
||||
ac
|
||||
0: ac
|
||||
|
||||
/^abc(?<=b\Kc)d/
|
||||
abcd
|
||||
0: cd
|
||||
|
||||
/^abc(?<!b\Kq)d/
|
||||
abcd
|
||||
0: abcd
|
||||
|
||||
/(?>a\Kb)z|(ab)/
|
||||
ab
|
||||
0: ab
|
||||
1: ab
|
||||
|
||||
/----------------------/
|
||||
|
||||
/(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/
|
||||
Failed: recursive call could loop indefinitely at offset 31
|
||||
|
||||
/-- End of testinput2 --/
|
||||
|
16
ext/pcre/pcrelib/testdata/testoutput6
vendored
16
ext/pcre/pcrelib/testdata/testoutput6
vendored
@ -618,18 +618,6 @@ No match
|
||||
\x{ff5a}
|
||||
0: \x{ff5a}
|
||||
|
||||
/^\X/8
|
||||
A
|
||||
0: A
|
||||
A\x{300}BC
|
||||
0: A\x{300}
|
||||
A\x{300}\x{301}\x{302}BC
|
||||
0: A\x{300}\x{301}\x{302}
|
||||
*** Failers
|
||||
0: *
|
||||
\x{300}
|
||||
No match
|
||||
|
||||
/^[\X]/8
|
||||
X123
|
||||
0: X
|
||||
@ -1293,4 +1281,8 @@ No match
|
||||
\x41\x20\x50\xC2\x54\xC9\x20\x54\x4F\x44\x41\x59
|
||||
0: A P\xc2T\xc9 TODAY
|
||||
|
||||
/\p{Avestan}\p{Bamum}\p{Egyptian_Hieroglyphs}\p{Imperial_Aramaic}\p{Inscriptional_Pahlavi}\p{Inscriptional_Parthian}\p{Javanese}\p{Kaithi}\p{Lisu}\p{Meetei_Mayek}\p{Old_South_Arabian}\p{Old_Turkic}\p{Samaritan}\p{Tai_Tham}\p{Tai_Viet}/8
|
||||
\x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{0800}\x{1aad}\x{aac0}
|
||||
0: \x{10b00}\x{a6ef}\x{13007}\x{10857}\x{10b78}\x{10b58}\x{a980}\x{110c1}\x{a4ff}\x{abc0}\x{10a7d}\x{10c48}\x{800}\x{1aad}\x{aac0}
|
||||
|
||||
/-- End of testinput6 --/
|
||||
|
@ -137,7 +137,23 @@ enum {
|
||||
ucp_Rejang,
|
||||
ucp_Saurashtra,
|
||||
ucp_Sundanese,
|
||||
ucp_Vai
|
||||
ucp_Vai,
|
||||
/* New for Unicode 5.2: */
|
||||
ucp_Avestan,
|
||||
ucp_Bamum,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
ucp_Imperial_Aramaic,
|
||||
ucp_Inscriptional_Pahlavi,
|
||||
ucp_Inscriptional_Parthian,
|
||||
ucp_Javanese,
|
||||
ucp_Kaithi,
|
||||
ucp_Lisu,
|
||||
ucp_Meetei_Mayek,
|
||||
ucp_Old_South_Arabian,
|
||||
ucp_Old_Turkic,
|
||||
ucp_Samaritan,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user