mirror of
https://github.com/php/php-src.git
synced 2024-11-28 04:14:26 +08:00
upgrade PCRE to version 7.2 RC3
# I'll update to the final version early next week when its released
This commit is contained in:
parent
17db5db759
commit
4e51d2ec73
1
NEWS
1
NEWS
@ -1,6 +1,7 @@
|
||||
PHP NEWS
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
?? ??? 2007, PHP 5.2.4
|
||||
- Upgraded PCRE to version 7.2 (Nuno)
|
||||
- HTTP 500 is sent to browser in case of PHP error instead of blank page.
|
||||
(Dmitry, Andrei Nigmatulin)
|
||||
- Improved fix for MOPB-03-2007. (Ilia)
|
||||
|
@ -5,7 +5,7 @@ ARG_WITH("pcre-regex", "Perl Compatible Regular Expressions", "yes");
|
||||
|
||||
if (PHP_PCRE_REGEX == "yes") {
|
||||
EXTENSION("pcre", "php_pcre.c", PHP_PCRE_REGEX_SHARED,
|
||||
"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DEBCDIC=0 -DNO_RECURSE -Iext/pcre/pcrelib");
|
||||
"-DNO_RECURSE -Iext/pcre/pcrelib");
|
||||
ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
|
||||
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
|
||||
|
||||
|
@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
if test "$PHP_PCRE_REGEX" = "yes"; then
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DEBCDIC=0 -I@ext_srcdir@/pcrelib)
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-I@ext_srcdir@/pcrelib)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
|
||||
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
|
||||
@ -51,7 +51,7 @@ if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
|
||||
AC_DEFINE(HAVE_PCRE, 1, [ ])
|
||||
PHP_ADD_INCLUDE($PCRE_INCDIR)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h])
|
||||
PHP_SUBST(PCRE_SHARED_LIBADD)
|
||||
fi
|
||||
|
@ -6,9 +6,9 @@ Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ THE C++ WRAPPER LIBRARY
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2006 Google Inc
|
||||
Copyright (c) 2007 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
||||
|
@ -1,68 +1,5 @@
|
||||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
||||
|
@ -1,6 +1,226 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 7.2 13-June-07
|
||||
---------------------
|
||||
|
||||
1. If the fr_FR locale cannot be found for test 3, try the "french" locale,
|
||||
which is apparently normally available under Windows.
|
||||
|
||||
2. Re-jig the pcregrep tests with different newline settings in an attempt
|
||||
to make them independent of the local environment's newline setting.
|
||||
|
||||
3. Add code to configure.ac to remove -g from the CFLAGS default settings.
|
||||
|
||||
4. Some of the "internals" tests were previously cut out when the link size
|
||||
was not 2, because the output contained actual offsets. The recent new
|
||||
"Z" feature of pcretest means that these can be cut out, making the tests
|
||||
usable with all link sizes.
|
||||
|
||||
5. Implemented Stan Switzer's goto replacement for longjmp() when not using
|
||||
stack recursion. This gives a massive performance boost under BSD, but just
|
||||
a small improvement under Linux. However, it saves one field in the frame
|
||||
in all cases.
|
||||
|
||||
6. Added more features from the forthcoming Perl 5.10:
|
||||
|
||||
(a) (?-n) (where n is a string of digits) is a relative subroutine or
|
||||
recursion call. It refers to the nth most recently opened parentheses.
|
||||
|
||||
(b) (?+n) is also a relative subroutine call; it refers to the nth next
|
||||
to be opened parentheses.
|
||||
|
||||
(c) Conditions that refer to capturing parentheses can be specified
|
||||
relatively, for example, (?(-2)... or (?(+3)...
|
||||
|
||||
(d) \K resets the start of the current match so that everything before
|
||||
is not part of it.
|
||||
|
||||
(e) \k{name} is synonymous with \k<name> and \k'name' (.NET compatible).
|
||||
|
||||
(f) \g{name} is another synonym - part of Perl 5.10's unification of
|
||||
reference syntax.
|
||||
|
||||
(g) (?| introduces a group in which the numbering of parentheses in each
|
||||
alternative starts with the same number.
|
||||
|
||||
(h) \h, \H, \v, and \V match horizontal and vertical whitespace.
|
||||
|
||||
7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and
|
||||
PCRE_INFO_JCHANGED.
|
||||
|
||||
8. A pattern such as (.*(.)?)* caused pcre_exec() to fail by either not
|
||||
terminating or by crashing. Diagnosed by Viktor Griph; it was in the code
|
||||
for detecting groups that can match an empty string.
|
||||
|
||||
9. A pattern with a very large number of alternatives (more than several
|
||||
hundred) was running out of internal workspace during the pre-compile
|
||||
phase, where pcre_compile() figures out how much memory will be needed. A
|
||||
bit of new cunning has reduced the workspace needed for groups with
|
||||
alternatives. The 1000-alternative test pattern now uses 12 bytes of
|
||||
workspace instead of running out of the 4096 that are available.
|
||||
|
||||
10. Inserted some missing (unsigned int) casts to get rid of compiler warnings.
|
||||
|
||||
11. Applied patch from Google to remove an optimization that didn't quite work.
|
||||
The report of the bug said:
|
||||
|
||||
pcrecpp::RE("a*").FullMatch("aaa") matches, while
|
||||
pcrecpp::RE("a*?").FullMatch("aaa") does not, and
|
||||
pcrecpp::RE("a*?\\z").FullMatch("aaa") does again.
|
||||
|
||||
|
||||
Version 7.1 24-Apr-07
|
||||
---------------------
|
||||
|
||||
1. Applied Bob Rossi and Daniel G's patches to convert the build system to one
|
||||
that is more "standard", making use of automake and other Autotools. There
|
||||
is some re-arrangement of the files and adjustment of comments consequent
|
||||
on this.
|
||||
|
||||
2. Part of the patch fixed a problem with the pcregrep tests. The test of -r
|
||||
for recursive directory scanning broke on some systems because the files
|
||||
are not scanned in any specific order and on different systems the order
|
||||
was different. A call to "sort" has been inserted into RunGrepTest for the
|
||||
approprate test as a short-term fix. In the longer term there may be an
|
||||
alternative.
|
||||
|
||||
3. I had an email from Eric Raymond about problems translating some of PCRE's
|
||||
man pages to HTML (despite the fact that I distribute HTML pages, some
|
||||
people do their own conversions for various reasons). The problems
|
||||
concerned the use of low-level troff macros .br and .in. I have therefore
|
||||
removed all such uses from the man pages (some were redundant, some could
|
||||
be replaced by .nf/.fi pairs). The 132html script that I use to generate
|
||||
HTML has been updated to handle .nf/.fi and to complain if it encounters
|
||||
.br or .in.
|
||||
|
||||
4. Updated comments in configure.ac that get placed in config.h.in and also
|
||||
arranged for config.h to be included in the distribution, with the name
|
||||
config.h.generic, for the benefit of those who have to compile without
|
||||
Autotools (compare pcre.h, which is now distributed as pcre.h.generic).
|
||||
|
||||
5. Updated the support (such as it is) for Virtual Pascal, thanks to Stefan
|
||||
Weber: (1) pcre_internal.h was missing some function renames; (2) updated
|
||||
makevp.bat for the current PCRE, using the additional files
|
||||
makevp_c.txt, makevp_l.txt, and pcregexp.pas.
|
||||
|
||||
6. A Windows user reported a minor discrepancy with test 2, which turned out
|
||||
to be caused by a trailing space on an input line that had got lost in his
|
||||
copy. The trailing space was an accident, so I've just removed it.
|
||||
|
||||
7. Add -Wl,-R... flags in pcre-config.in for *BSD* systems, as I'm told
|
||||
that is needed.
|
||||
|
||||
8. Mark ucp_table (in ucptable.h) and ucp_gentype (in pcre_ucp_searchfuncs.c)
|
||||
as "const" (a) because they are and (b) because it helps the PHP
|
||||
maintainers who have recently made a script to detect big data structures
|
||||
in the php code that should be moved to the .rodata section. I remembered
|
||||
to update Builducptable as well, so it won't revert if ucptable.h is ever
|
||||
re-created.
|
||||
|
||||
9. Added some extra #ifdef SUPPORT_UTF8 conditionals into pcretest.c,
|
||||
pcre_printint.src, pcre_compile.c, pcre_study.c, and pcre_tables.c, in
|
||||
order to be able to cut out the UTF-8 tables in the latter when UTF-8
|
||||
support is not required. This saves 1.5-2K of code, which is important in
|
||||
some applications.
|
||||
|
||||
Later: more #ifdefs are needed in pcre_ord2utf8.c and pcre_valid_utf8.c
|
||||
so as not to refer to the tables, even though these functions will never be
|
||||
called when UTF-8 support is disabled. Otherwise there are problems with a
|
||||
shared library.
|
||||
|
||||
10. Fixed two bugs in the emulated memmove() function in pcre_internal.h:
|
||||
|
||||
(a) It was defining its arguments as char * instead of void *.
|
||||
|
||||
(b) It was assuming that all moves were upwards in memory; this was true
|
||||
a long time ago when I wrote it, but is no longer the case.
|
||||
|
||||
The emulated memove() is provided for those environments that have neither
|
||||
memmove() nor bcopy(). I didn't think anyone used it these days, but that
|
||||
is clearly not the case, as these two bugs were recently reported.
|
||||
|
||||
11. The script PrepareRelease is now distributed: it calls 132html, CleanTxt,
|
||||
and Detrail to create the HTML documentation, the .txt form of the man
|
||||
pages, and it removes trailing spaces from listed files. It also creates
|
||||
pcre.h.generic and config.h.generic from pcre.h and config.h. In the latter
|
||||
case, it wraps all the #defines with #ifndefs. This script should be run
|
||||
before "make dist".
|
||||
|
||||
12. Fixed two fairly obscure bugs concerned with quantified caseless matching
|
||||
with Unicode property support.
|
||||
|
||||
(a) For a maximizing quantifier, if the two different cases of the
|
||||
character were of different lengths in their UTF-8 codings (there are
|
||||
some cases like this - I found 11), and the matching function had to
|
||||
back up over a mixture of the two cases, it incorrectly assumed they
|
||||
were both the same length.
|
||||
|
||||
(b) When PCRE was configured to use the heap rather than the stack for
|
||||
recursion during matching, it was not correctly preserving the data for
|
||||
the other case of a UTF-8 character when checking ahead for a match
|
||||
while processing a minimizing repeat. If the check also involved
|
||||
matching a wide character, but failed, corruption could cause an
|
||||
erroneous result when trying to check for a repeat of the original
|
||||
character.
|
||||
|
||||
13. Some tidying changes to the testing mechanism:
|
||||
|
||||
(a) The RunTest script now detects the internal link size and whether there
|
||||
is UTF-8 and UCP support by running ./pcretest -C instead of relying on
|
||||
values substituted by "configure". (The RunGrepTest script already did
|
||||
this for UTF-8.) The configure.ac script no longer substitutes the
|
||||
relevant variables.
|
||||
|
||||
(b) The debugging options /B and /D in pcretest show the compiled bytecode
|
||||
with length and offset values. This means that the output is different
|
||||
for different internal link sizes. Test 2 is skipped for link sizes
|
||||
other than 2 because of this, bypassing the problem. Unfortunately,
|
||||
there was also a test in test 3 (the locale tests) that used /B and
|
||||
failed for link sizes other than 2. Rather than cut the whole test out,
|
||||
I have added a new /Z option to pcretest that replaces the length and
|
||||
offset values with spaces. This is now used to make test 3 independent
|
||||
of link size. (Test 2 will be tidied up later.)
|
||||
|
||||
14. If erroroffset was passed as NULL to pcre_compile, it provoked a
|
||||
segmentation fault instead of returning the appropriate error message.
|
||||
|
||||
15. In multiline mode when the newline sequence was set to "any", the pattern
|
||||
^$ would give a match between the \r and \n of a subject such as "A\r\nB".
|
||||
This doesn't seem right; it now treats the CRLF combination as the line
|
||||
ending, and so does not match in that case. It's only a pattern such as ^$
|
||||
that would hit this one: something like ^ABC$ would have failed after \r
|
||||
and then tried again after \r\n.
|
||||
|
||||
16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub"
|
||||
in an attempt to make files that differ only in their line terminators
|
||||
compare equal. This works on Linux.
|
||||
|
||||
17. Under certain error circumstances pcregrep might try to free random memory
|
||||
as it exited. This is now fixed, thanks to valgrind.
|
||||
|
||||
19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
|
||||
"abc\r\n\r\n", it found an unwanted second match after the second \r. This
|
||||
was because its rules for how to advance for /g after matching an empty
|
||||
string at the end of a line did not allow for this case. They now check for
|
||||
it specially.
|
||||
|
||||
20. pcretest is supposed to handle patterns and data of any length, by
|
||||
extending its buffers when necessary. It was getting this wrong when the
|
||||
buffer for a data line had to be extended.
|
||||
|
||||
21. Added PCRE_NEWLINE_ANYCRLF which is like ANY, but matches only CR, LF, or
|
||||
CRLF as a newline sequence.
|
||||
|
||||
22. Code for handling Unicode properties in pcre_dfa_exec() wasn't being cut
|
||||
out by #ifdef SUPPORT_UCP. This did no harm, as it could never be used, but
|
||||
I have nevertheless tidied it up.
|
||||
|
||||
23. Added some casts to kill warnings from HP-UX ia64 compiler.
|
||||
|
||||
24. Added a man page for pcre-config.
|
||||
|
||||
|
||||
Version 7.0 19-Dec-06
|
||||
---------------------
|
||||
|
||||
|
@ -129,13 +129,18 @@ These items are all just one byte long
|
||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data, or after \n in multiline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or \n at end: \Z
|
||||
@ -399,4 +404,4 @@ at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
November 2006
|
||||
June 2007
|
@ -20,9 +20,9 @@ Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2006, Google Inc.
|
||||
Copyright (c) 2007, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -1,7 +1,68 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 7.0 23-Nov-06
|
||||
|
||||
Release 7.2 13-Jun-07
|
||||
---------------------
|
||||
|
||||
WARNING: saved patterns that were compiled by earlier versions of PCRE must be
|
||||
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
|
||||
and \V).
|
||||
|
||||
Correction to the notes for 7.1: the note about shared libraries for Windows is
|
||||
wrong. Previously, three libraries were built, but each could function
|
||||
independently. For example, the pcreposix library also included all the
|
||||
functions from the basic pcre library. The change is that the three libraries
|
||||
are no longer independent. They are like the Unix libraries. To use the
|
||||
pcreposix functions, for example, you need to link with both the pcreposix and
|
||||
the basic pcre library.
|
||||
|
||||
Some more features from Perl 5.10 have been added:
|
||||
|
||||
(?-n) and (?+n) relative references for recursion and subroutines.
|
||||
|
||||
(?(-n) and (?(+n) relative references as conditions.
|
||||
|
||||
\k{name} and \g{name} are synonyms for \k<name>.
|
||||
|
||||
\K to reset the start of the matched string; for example, (foo)\Kbar
|
||||
matches bar preceded by foo, but only sets bar as the matched string.
|
||||
|
||||
(?| introduces a group where the capturing parentheses in each alternative
|
||||
start from the same number; for example, (?|(abc)|(xyz)) sets capturing
|
||||
parentheses number 1 in both cases.
|
||||
|
||||
\h, \H, \v, \V match horizontal and vertical whitespace, respectively.
|
||||
|
||||
|
||||
Release 7.1 24-Apr-07
|
||||
---------------------
|
||||
|
||||
There is only one new feature in this release: a linebreak setting of
|
||||
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
|
||||
recognizes only CRLF, CR, and LF as linebreaks.
|
||||
|
||||
A few bugs are fixed (see ChangeLog for details), but the major change is a
|
||||
complete re-implementation of the build system. This now has full Autotools
|
||||
support and so is now "standard" in some sense. It should help with compiling
|
||||
PCRE in a wide variety of environments.
|
||||
|
||||
NOTE: when building shared libraries for Windows, three dlls are now built,
|
||||
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
|
||||
included in a single dll.
|
||||
|
||||
Another important change is that the dftables auxiliary program is no longer
|
||||
compiled and run at "make" time by default. Instead, a default set of character
|
||||
tables (assuming ASCII coding) is used. If you want to use dftables to generate
|
||||
the character tables as previously, add --enable-rebuild-chartables to the
|
||||
"configure" command. You must do this if you are compiling PCRE to run on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
There is a discussion about character tables in the README file. The default is
|
||||
not to use dftables so that that there is no problem when cross-compiling.
|
||||
|
||||
|
||||
Release 7.0 19-Dec-06
|
||||
---------------------
|
||||
|
||||
This release has a new major number because there have been some internal
|
||||
|
@ -1,128 +1,121 @@
|
||||
Compiling PCRE on non-Unix systems
|
||||
----------------------------------
|
||||
|
||||
See below for comments on Cygwin or MinGW and OpenVMS usage. I (Philip Hazel)
|
||||
have no knowledge of Windows or VMS sytems and how their libraries work. The
|
||||
items in the PCRE Makefile that relate to anything other than Unix-like systems
|
||||
have been contributed by PCRE users. There are some other comments and files in
|
||||
the Contrib directory on the ftp site that you may find useful. See
|
||||
This document contains the following sections:
|
||||
|
||||
General
|
||||
Generic instructions for the PCRE C library
|
||||
The C++ wrapper functions
|
||||
Building for virtual Pascal
|
||||
Comments about Win32 builds
|
||||
Building under Windows with BCC5.5
|
||||
Building PCRE on OpenVMS
|
||||
|
||||
|
||||
GENERAL
|
||||
|
||||
I (Philip Hazel) have no knowledge of Windows or VMS sytems and how their
|
||||
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||
anything other than Unix-like systems are untested by me.
|
||||
|
||||
There are some other comments and files in the Contrib directory on the ftp
|
||||
site that you may find useful. See
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
If you want to compile PCRE for a non-Unix system (or perhaps, more strictly,
|
||||
for a system that does not support "configure" and "make" files), note that
|
||||
the basic PCRE library consists entirely of code written in Standard C, and so
|
||||
should compile successfully on any system that has a Standard C compiler and
|
||||
library. The C++ wrapper functions are a separate issue (see below).
|
||||
If you want to compile PCRE for a non-Unix system (especially for a system that
|
||||
does not support "configure" and "make" files), note that the basic PCRE
|
||||
library consists entirely of code written in Standard C, and so should compile
|
||||
successfully on any system that has a Standard C compiler and library. The C++
|
||||
wrapper functions are a separate issue (see below).
|
||||
|
||||
The PCRE distribution contains some experimental support for "cmake", but this
|
||||
is incomplete and not documented. However if you are a "cmake" user you might
|
||||
like to try building with "cmake".
|
||||
|
||||
|
||||
GENERIC INSTRUCTIONS FOR THE C LIBRARY
|
||||
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||
|
||||
The following are generic comments about building PCRE. The interspersed
|
||||
indented commands are suggestions from Mark Tetrode as to which commands you
|
||||
might use on a Windows system to build a static library.
|
||||
The following are generic comments about building the PCRE C library "by hand".
|
||||
|
||||
(1) Copy or rename the file config.h.in as config.h, and change the macros that
|
||||
define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0.
|
||||
Unfortunately, because of the way Unix autoconf works, the default setting has
|
||||
to be 0. You may also want to make changes to other macros in config.h. In
|
||||
particular, if you want to force a specific value for newline, you can define
|
||||
the NEWLINE macro. The default is to use '\n', thereby using whatever value
|
||||
your compiler gives to '\n'.
|
||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||
settings that it contains to whatever is appropriate for your environment.
|
||||
In particular, if you want to force a specific value for newline, you can
|
||||
define the NEWLINE macro.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
copy config.h.in config.h
|
||||
rem Use write, because notepad cannot handle UNIX files. Change values.
|
||||
write config.h
|
||||
An alternative approach is not to edit config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need.
|
||||
|
||||
(2) Compile dftables.c as a stand-alone program, and then run it with
|
||||
the single argument "pcre_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file.
|
||||
NOTE: There have been occasions when the way in which certain parameters in
|
||||
config.h are used has changed between releases. (In the configure/make
|
||||
world, this is handled automatically.) When upgrading to a new release, you
|
||||
are strongly advised to review config.h.generic before re-using what you
|
||||
had previously.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
rem Compile & run
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP dftables.c
|
||||
dftables.exe pcre_chartables.c
|
||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||
|
||||
(3) Compile the following source files:
|
||||
(3) EITHER:
|
||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucp_searchfuncs.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
OR:
|
||||
Compile dftables.c as a stand-alone program, and then run it with the
|
||||
single argument "pcre_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on
|
||||
a system that uses EBCDIC code.
|
||||
|
||||
and link them all together into an object library in whichever form your system
|
||||
keeps such libraries. This is the pcre C library. If your system has static and
|
||||
shared libraries, you may have to do this once for each type.
|
||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||
specify alternative tables at run time.
|
||||
|
||||
rem These comments are out-of-date, referring to a previous release which
|
||||
rem had fewer source files. Replace with the file names from above.
|
||||
rem Mark Tetrode's commands, for a static library
|
||||
rem Compile & lib
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c maketables.c get.c study.c pcre.c
|
||||
lib /OUT:pcre.lib maketables.obj get.obj study.obj pcre.obj
|
||||
(4) Compile the following source files:
|
||||
|
||||
(4) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
|
||||
library.
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucp_searchfuncs.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
|
||||
rem Mark Tetrode's commands, for a static library
|
||||
rem Compile & lib
|
||||
cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c pcreposix.c
|
||||
lib /OUT:pcreposix.lib pcreposix.obj
|
||||
Now link them all together into an object library in whichever form your
|
||||
system keeps such libraries. This is the basic PCRE C library. If your
|
||||
system has static and shared libraries, you may have to do this once for
|
||||
each type.
|
||||
|
||||
(5) Compile the test program pcretest.c. This needs the functions in the
|
||||
pcre and pcreposix libraries when linking.
|
||||
(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
|
||||
library.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
rem compile & link
|
||||
cl /F0x400000 pcretest.c pcre.lib pcreposix.lib
|
||||
(6) Compile the test program pcretest.c. This needs the functions in the
|
||||
pcre and pcreposix libraries when linking.
|
||||
|
||||
(6) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. Note that the
|
||||
supplied files are in Unix format, with just LF characters as line terminators.
|
||||
You may need to edit them to change this if your system uses a different
|
||||
convention.
|
||||
(7) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. Note that the
|
||||
supplied files are in Unix format, with just LF characters as line
|
||||
terminators. You may need to edit them to change this if your system uses a
|
||||
different convention.
|
||||
|
||||
rem Mark Tetrode's commands
|
||||
pcretest testdata\testinput1 testdata\myoutput1
|
||||
windiff testdata\testoutput1 testdata\myoutput1
|
||||
pcretest -i testdata\testinput2 testdata\myoutput2
|
||||
windiff testdata\testoutput2 testdata\myoutput2
|
||||
pcretest testdata\testinput3 testdata\myoutput3
|
||||
windiff testdata\testoutput3 testdata\myoutput3
|
||||
pcretest testdata\testinput4 testdata\myoutput4
|
||||
windiff testdata\testoutput4 testdata\myoutput4
|
||||
pcretest testdata\testinput5 testdata\myoutput5
|
||||
windiff testdata\testoutput5 testdata\myoutput5
|
||||
pcretest testdata\testinput6 testdata\myoutput6
|
||||
windiff testdata\testoutput6 testdata\myoutput6
|
||||
|
||||
Note that there are now three more tests (7, 8, 9) that did not exist when Mark
|
||||
wrote those comments. The test the new pcre_dfa_exec() function.
|
||||
|
||||
(7) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library.
|
||||
(8) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library (it does not need the pcreposix library).
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
The PCRE distribution now contains some C++ wrapper functions and tests,
|
||||
The PCRE distribution also contains some C++ wrapper functions and tests,
|
||||
contributed by Google Inc. On a system that can use "configure" and "make",
|
||||
the functions are automatically built into a library called pcrecpp. It should
|
||||
be straightforward to compile the .cc files manually on other systems. The
|
||||
@ -130,11 +123,90 @@ files called xxx_unittest.cc are test programs for each of the corresponding
|
||||
xxx.cc files.
|
||||
|
||||
|
||||
FURTHER REMARKS
|
||||
BUILDING FOR VIRTUAL PASCAL
|
||||
|
||||
If you have a system without "configure" but where you can use a Makefile, edit
|
||||
Makefile.in to create Makefile, substituting suitable values for the variables
|
||||
at the head of the file.
|
||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
|
||||
additional files. The following files in the distribution are for building PCRE
|
||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS
|
||||
|
||||
There are two ways of building PCRE using the "configure, make, make install"
|
||||
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
|
||||
the same thing; they are completely different from each other. There is also
|
||||
some experimental, undocumented support for building using "cmake", which you
|
||||
might like to try if you are familiar with "cmake". However, at the present
|
||||
time, the "cmake" process builds only a static library (not a dll), and the
|
||||
tests are not automatically run.
|
||||
|
||||
The MinGW home page (http://www.mingw.org/) says this:
|
||||
|
||||
MinGW: A collection of freely available and freely distributable Windows
|
||||
specific header files and import libraries combined with GNU toolsets that
|
||||
allow one to produce native Windows programs that do not rely on any
|
||||
3rd-party C runtime DLLs.
|
||||
|
||||
The Cygwin home page (http://www.cygwin.com/) says this:
|
||||
|
||||
Cygwin is a Linux-like environment for Windows. It consists of two parts:
|
||||
|
||||
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
|
||||
substantial Linux API functionality
|
||||
|
||||
. A collection of tools which provide Linux look and feel.
|
||||
|
||||
The Cygwin DLL currently works with all recent, commercially released x86 32
|
||||
bit and 64 bit versions of Windows, with the exception of Windows CE.
|
||||
|
||||
On both MinGW and Cygwin, PCRE should build correctly using:
|
||||
|
||||
./configure && make && make install
|
||||
|
||||
This should create two libraries called libpcre and libpcreposix, and, if you
|
||||
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
|
||||
independent libraries: when you like with libpcreposix or libpcrecpp you must
|
||||
also link with libpcre, which contains the basic functions. (Some earlier
|
||||
releases of PCRE included the basic libpcre functions in libpcreposix. This no
|
||||
longer happens.)
|
||||
|
||||
If you want to statically link your program against a non-dll .a file, you must
|
||||
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
|
||||
pcre_free() exported functions will be declared __declspec(dllimport), with
|
||||
unwanted results.
|
||||
|
||||
Using Cygwin's compiler generates libraries and executables that depend on
|
||||
cygwin1.dll. If a library that is generated this way is distributed,
|
||||
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
|
||||
licence, this forces not only PCRE to be under the GPL, but also the entire
|
||||
application. A distributor who wants to keep their own code proprietary must
|
||||
purchase an appropriate Cygwin licence.
|
||||
|
||||
MinGW has no such restrictions. The MinGW compiler generates a library or
|
||||
executable that can run standalone on Windows without any third party dll or
|
||||
licensing issues.
|
||||
|
||||
But there is more complication:
|
||||
|
||||
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
|
||||
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
|
||||
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
|
||||
gcc and MinGW's gcc). So, a user can:
|
||||
|
||||
. Build native binaries by using MinGW or by getting Cygwin and using
|
||||
-mno-cygwin.
|
||||
|
||||
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
|
||||
compiler flags.
|
||||
|
||||
The test files that are supplied with PCRE are in Unix format, with LF
|
||||
characters as line terminators. It may be necessary to change the line
|
||||
terminators in order to get some of the tests to work. We hope to improve
|
||||
things in this area in future.
|
||||
|
||||
|
||||
BUILDING UNDER WINDOWS WITH BCC5.5
|
||||
|
||||
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
|
||||
@ -147,72 +219,6 @@ Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
|
||||
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
|
||||
line.
|
||||
|
||||
Some help in building a Win32 DLL of PCRE in GnuWin32 environments was
|
||||
contributed by Paul Sokolovsky. These environments are Mingw32
|
||||
(http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and CygWin
|
||||
(http://sourceware.cygnus.com/cygwin/). Paul comments:
|
||||
|
||||
For CygWin, set CFLAGS=-mno-cygwin, and do 'make dll'. You'll get
|
||||
pcre.dll (containing pcreposix also), libpcre.dll.a, and dynamically
|
||||
linked pgrep and pcretest. If you have /bin/sh, run RunTest (three
|
||||
main test go ok, locale not supported).
|
||||
|
||||
Changes to do MinGW with autoconf 2.50 were supplied by Fred Cox
|
||||
<sailorFred@yahoo.com>, who comments as follows:
|
||||
|
||||
If you are using the PCRE DLL, the normal Unix style configure && make &&
|
||||
make check && make install should just work[*]. If you want to statically
|
||||
link against the .a file, you must define PCRE_STATIC before including
|
||||
pcre.h, otherwise the pcre_malloc and pcre_free exported functions will be
|
||||
declared __declspec(dllimport), with hilarious results. See the configure.in
|
||||
and pcretest.c for how it is done for the static test.
|
||||
|
||||
Also, there will only be a libpcre.la, not a libpcreposix.la, as you
|
||||
would expect from the Unix version. The single DLL includes the pcreposix
|
||||
interface.
|
||||
|
||||
[*] But note that the supplied test files are in Unix format, with just LF
|
||||
characters as line terminators. You will have to edit them to change to CR LF
|
||||
terminators.
|
||||
|
||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||
was contributed by Alexander Tokarev. It is called makevp.bat.
|
||||
|
||||
These are some further comments about Win32 builds from Mark Evans. They
|
||||
were contributed before Fred Cox's changes were made, so it is possible that
|
||||
they may no longer be relevant.
|
||||
|
||||
"The documentation for Win32 builds is a bit shy. Under MSVC6 I
|
||||
followed their instructions to the letter, but there were still
|
||||
some things missing.
|
||||
|
||||
(1) Must #define STATIC for entire project if linking statically.
|
||||
(I see no reason to use DLLs for code this compact.) This of
|
||||
course is a project setting in MSVC under Preprocessor.
|
||||
|
||||
(2) Missing some #ifdefs relating to the function pointers
|
||||
pcre_malloc and pcre_free. See my solution below. (The stubs
|
||||
may not be mandatory but they made me feel better.)"
|
||||
|
||||
=========================
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
|
||||
void* malloc_stub(size_t N)
|
||||
{ return malloc(N); }
|
||||
void free_stub(void* p)
|
||||
{ free(p); }
|
||||
void *(*pcre_malloc)(size_t) = &malloc_stub;
|
||||
void (*pcre_free)(void *) = &free_stub;
|
||||
|
||||
#else
|
||||
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
|
||||
#endif
|
||||
=========================
|
||||
|
||||
|
||||
BUILDING PCRE ON OPENVMS
|
||||
|
||||
@ -278,4 +284,5 @@ $! Locale could not be set to fr
|
||||
$!
|
||||
=========================
|
||||
|
||||
Last Updated: 13 June 2007
|
||||
****
|
||||
|
@ -5,51 +5,82 @@ The latest release of PCRE is always available from
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
|
||||
pcre-dev@exim.org
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
|
||||
The PCRE APIs
|
||||
Documentation for PCRE
|
||||
Contributions by users of PCRE
|
||||
Building PCRE on non-Unix systems
|
||||
Building PCRE on Unix-like systems
|
||||
Retrieving configuration information on Unix-like systems
|
||||
Shared libraries on Unix-like systems
|
||||
Cross-compiling on Unix-like systems
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
Making new tarballs
|
||||
Testing PCRE
|
||||
Character tables
|
||||
File manifest
|
||||
|
||||
|
||||
The PCRE APIs
|
||||
-------------
|
||||
|
||||
PCRE is written in C, and it has its own API. The distribution now includes a
|
||||
set of C++ wrapper functions, courtesy of Google Inc. (see the pcrecpp man page
|
||||
for details).
|
||||
PCRE is written in C, and it has its own API. The distribution also includes a
|
||||
set of C++ wrapper functions (see the pcrecpp man page for details), courtesy
|
||||
of Google Inc.
|
||||
|
||||
Also included are a set of C wrapper functions that are based on the POSIX
|
||||
API. These end up in the library called libpcreposix. Note that this just
|
||||
provides a POSIX calling interface to PCRE: the regular expressions themselves
|
||||
still follow Perl syntax and semantics. The header file for the POSIX-style
|
||||
functions is called pcreposix.h. The official POSIX name is regex.h, but I
|
||||
didn't want to risk possible problems with existing files of that name by
|
||||
distributing it that way. To use it with an existing program that uses the
|
||||
POSIX API, it will have to be renamed or pointed at by a link.
|
||||
In addition, there is a set of C wrapper functions that are based on the POSIX
|
||||
regular expression API (see the pcreposix man page). These end up in the
|
||||
library called libpcreposix. Note that this just provides a POSIX calling
|
||||
interface to PCRE; the regular expressions themselves still follow Perl syntax
|
||||
and semantics. The POSIX API is restricted, and does not give full access to
|
||||
all of PCRE's facilities.
|
||||
|
||||
The header file for the POSIX-style functions is called pcreposix.h. The
|
||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE with
|
||||
an existing program that uses the POSIX API, pcreposix.h will have to be
|
||||
renamed or pointed at by a link.
|
||||
|
||||
If you are using the POSIX interface to PCRE and there is already a POSIX regex
|
||||
library installed on your system, you must take care when linking programs to
|
||||
library installed on your system, as well as worrying about the regex.h header
|
||||
file (as mentioned above), you must also take care when linking programs to
|
||||
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
|
||||
up the "real" POSIX functions of the same name.
|
||||
up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE with the addition of
|
||||
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
|
||||
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
|
||||
effect of renaming the functions so that the names no longer clash. Of course,
|
||||
you have to do the same thing for your applications, or write them using the
|
||||
new names.
|
||||
|
||||
|
||||
Documentation for PCRE
|
||||
----------------------
|
||||
|
||||
If you install PCRE in the normal way, you will end up with an installed set of
|
||||
man pages whose names all start with "pcre". The one that is just called "pcre"
|
||||
lists all the others. In addition to these man pages, the PCRE documentation is
|
||||
supplied in two other forms; however, as there is no standard place to install
|
||||
them, they are left in the doc directory of the unpacked source distribution.
|
||||
These forms are:
|
||||
If you install PCRE in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre". The one that is just
|
||||
called "pcre" lists all the others. In addition to these man pages, the PCRE
|
||||
documentation is supplied in two other forms:
|
||||
|
||||
1. Files called doc/pcre.txt, doc/pcregrep.txt, and doc/pcretest.txt. The
|
||||
first of these is a concatenation of the text forms of all the section 3
|
||||
man pages except those that summarize individual functions. The other two
|
||||
are the text forms of the section 1 man pages for the pcregrep and
|
||||
pcretest commands. Text forms are provided for ease of scanning with text
|
||||
editors or similar tools.
|
||||
1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
|
||||
doc/pcretest.txt in the source distribution. The first of these is a
|
||||
concatenation of the text forms of all the section 3 man pages except
|
||||
those that summarize individual functions. The other two are the text
|
||||
forms of the section 1 man pages for the pcregrep and pcretest commands.
|
||||
These text forms are provided for ease of scanning with text editors or
|
||||
similar tools. They are installed in <prefix>/share/doc/pcre, where
|
||||
<prefix> is the installation prefix (defaulting to /usr/local).
|
||||
|
||||
2. A subdirectory called doc/html contains all the documentation in HTML
|
||||
form, hyperlinked in various ways, and rooted in a file called
|
||||
doc/index.html.
|
||||
2. A set of files containing all the documentation in HTML form, hyperlinked
|
||||
in various ways, and rooted in a file called index.html, is distributed in
|
||||
doc/html and installed in <prefix>/share/doc/pcre/html.
|
||||
|
||||
|
||||
Contributions by users of PCRE
|
||||
@ -59,27 +90,46 @@ You can find contributions from PCRE users in the directory
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
|
||||
|
||||
where there is also a README file giving brief descriptions of what they are.
|
||||
Several of them provide support for compiling PCRE on various flavours of
|
||||
Windows systems (I myself do not use Windows). Some are complete in themselves;
|
||||
others are pointers to URLs containing relevant files.
|
||||
There is a README file giving brief descriptions of what they are. Some are
|
||||
complete in themselves; others are pointers to URLs containing relevant files.
|
||||
Some of this material is likely to be well out-of-date. Several of the earlier
|
||||
contributions provided support for compiling PCRE on various flavours of
|
||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
|
||||
Building PCRE on a Unix-like system
|
||||
-----------------------------------
|
||||
Building PCRE on non-Unix systems
|
||||
---------------------------------
|
||||
|
||||
For a non-Unix system, please read the comments in the file NON-UNIX-USE,
|
||||
though if your system supports the use of "configure" and "make" you may be
|
||||
able to build PCRE in the same way as for Unix-like systems.
|
||||
|
||||
PCRE has been compiled on many different operating systems. It should be
|
||||
straightforward to build PCRE on any system that has a Standard C compiler and
|
||||
library, because it uses only Standard C functions.
|
||||
|
||||
|
||||
Building PCRE on Unix-like systems
|
||||
----------------------------------
|
||||
|
||||
If you are using HP's ANSI C++ compiler (aCC), please see the special note
|
||||
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
|
||||
|
||||
The following instructions assume the use of the widely used "configure, make,
|
||||
make install" process. There is also some experimental support for "cmake" in
|
||||
the PCRE distribution, but it is incomplete and not documented. However, if you
|
||||
are a "cmake" user, you might want to try it.
|
||||
|
||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
||||
PCRE distribution directory, with your current directory set to the directory
|
||||
where you want the files to be created. This command is a standard GNU
|
||||
"autoconf" configuration script, for which generic instructions are supplied in
|
||||
INSTALL.
|
||||
the file INSTALL.
|
||||
|
||||
Most commonly, people build PCRE within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient, but the
|
||||
usual methods of changing standard defaults are available. For example:
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
||||
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
|
||||
|
||||
@ -103,8 +153,8 @@ library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
. If you want to suppress the building of the C++ wrapper library, you can add
|
||||
--disable-cpp to the "configure" command. Otherwise, when "configure" is run,
|
||||
will try to find a C++ compiler and C++ header files, and if it succeeds, it
|
||||
will try to build the C++ wrapper.
|
||||
it will try to find a C++ compiler and C++ header files, and if it succeeds,
|
||||
it will try to build the C++ wrapper.
|
||||
|
||||
. If you want to make use of the support for UTF-8 character strings in PCRE,
|
||||
you must add --enable-utf8 to the "configure" command. Without it, the code
|
||||
@ -119,16 +169,24 @@ library. You can read more about them in the pcrebuild man page.
|
||||
supported.
|
||||
|
||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
|
||||
of the Unicode newline sequences as indicating the end of a line. Whatever
|
||||
you specify at build time is the default; the caller of PCRE can change the
|
||||
selection at run time. The default newline indicator is a single LF character
|
||||
(the Unix standard). You can specify the default newline indicator by adding
|
||||
--newline-is-cr or --newline-is-lf or --newline-is-crlf or --newline-is-any
|
||||
to the "configure" command, respectively.
|
||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
them. You can increase this threshold by setting, for example,
|
||||
them in a pattern. You can increase this threshold by setting, for example,
|
||||
|
||||
--with-posix-malloc-threshold=20
|
||||
|
||||
@ -141,8 +199,8 @@ library. You can read more about them in the pcrebuild man page.
|
||||
--with-match-limit=500000
|
||||
|
||||
on the "configure" command. This is just the default; individual calls to
|
||||
pcre_exec() can supply their own value. There is discussion on the pcreapi
|
||||
man page.
|
||||
pcre_exec() can supply their own value. There is more discussion on the
|
||||
pcreapi man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
@ -157,37 +215,61 @@ library. You can read more about them in the pcrebuild man page.
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. You can
|
||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
||||
ever to be necessary. If you build PCRE with an increased link size, test 2
|
||||
(and 5 if you are using UTF-8) will fail. Part of the output of these tests
|
||||
is a representation of the compiled pattern, and this changes with the link
|
||||
size.
|
||||
ever to be necessary. Increasing the internal link size will reduce
|
||||
performance.
|
||||
|
||||
. You can build PCRE so that its internal match() function that is called from
|
||||
pcre_exec() does not call itself recursively. Instead, it uses blocks of data
|
||||
from the heap via special functions pcre_stack_malloc() and pcre_stack_free()
|
||||
to save data that would otherwise be saved on the stack. To build PCRE like
|
||||
this, use
|
||||
pcre_exec() does not call itself recursively. Instead, it uses memory blocks
|
||||
obtained from the heap via the special functions pcre_stack_malloc() and
|
||||
pcre_stack_free() to save data that would otherwise be saved on the stack. To
|
||||
build PCRE like this, use
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
on the "configure" command. PCRE runs more slowly in this mode, but it may be
|
||||
necessary in environments with limited stack sizes. This applies only to the
|
||||
pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not
|
||||
use deeply nested recursion.
|
||||
use deeply nested recursion. There is a discussion about stack sizes in the
|
||||
pcrestack man page.
|
||||
|
||||
The "configure" script builds eight files for the basic C library:
|
||||
. For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. By default, it uses a set of
|
||||
tables for ASCII encoding that is part of the distribution. If you specify
|
||||
|
||||
--enable-rebuild-chartables
|
||||
|
||||
a program called dftables is compiled and run in the default C locale when
|
||||
you obey "make". It builds a source file called pcre_chartables.c. If you do
|
||||
not specify this option, pcre_chartables.c is created as a copy of
|
||||
pcre_chartables.c.dist. See "Character tables" below for further information.
|
||||
|
||||
. It is possible to compile PCRE for use on systems that use EBCDIC as their
|
||||
default character code (as opposed to ASCII) by specifying
|
||||
|
||||
--enable-ebcdic
|
||||
|
||||
This automatically implies --enable-rebuild-chartables (see above).
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile is the makefile that builds the library
|
||||
. config.h contains build-time configuration options for the library
|
||||
. pcre.h is the public PCRE header file
|
||||
. pcre-config is a script that shows the settings of "configure" options
|
||||
. libpcre.pc is data for the pkg-config command
|
||||
. libtool is a script that builds shared and/or static libraries
|
||||
. RunTest is a script for running tests on the library
|
||||
. RunTest is a script for running tests on the basic C library
|
||||
. RunGrepTest is a script for running tests on the pcregrep command
|
||||
|
||||
In addition, if a C++ compiler is found, the following are also built:
|
||||
Versions of config.h and pcre.h are distributed in the PCRE tarballs under
|
||||
the names config.h.generic and pcre.h.generic. These are provided for the
|
||||
benefit of those who have to built PCRE without the benefit of "configure". If
|
||||
you use "configure", the .generic versions are not used.
|
||||
|
||||
. pcrecpp.h is the header file for programs that call PCRE via the C++ wrapper
|
||||
If a C++ compiler is found, the following files are also built:
|
||||
|
||||
. libpcrecpp.pc is data for the pkg-config command
|
||||
. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper
|
||||
. pcre_stringpiece.h is the header for the C++ "stringpiece" functions
|
||||
|
||||
The "configure" script also creates config.status, which is an executable
|
||||
@ -195,18 +277,65 @@ script that can be run to recreate the configuration, and config.log, which
|
||||
contains compiler output from tests that "configure" runs.
|
||||
|
||||
Once "configure" has run, you can run "make". It builds two libraries, called
|
||||
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
|
||||
command. If a C++ compiler was found on your system, it also builds the C++
|
||||
wrapper library, which is called libpcrecpp, and some test programs called
|
||||
pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
|
||||
libpcre and libpcreposix, a test program called pcretest, a demonstration
|
||||
program called pcredemo, and the pcregrep command. If a C++ compiler was found
|
||||
on your system, "make" also builds the C++ wrapper library, which is called
|
||||
libpcrecpp, and some test programs called pcrecpp_unittest,
|
||||
pcre_scanner_unittest, and pcre_stringpiece_unittest. Building the C++ wrapper
|
||||
can be disabled by adding --disable-cpp to the "configure" command.
|
||||
|
||||
The command "make test" runs all the appropriate tests. Details of the PCRE
|
||||
tests are given in a separate section of this document, below.
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE
|
||||
tests are given below in a separate section of this document.
|
||||
|
||||
You can use "make install" to copy the libraries, the public header files
|
||||
pcre.h, pcreposix.h, pcrecpp.h, and pcre_stringpiece.h (the last two only if
|
||||
the C++ wrapper was built), and the man pages to appropriate live directories
|
||||
on your system, in the normal way.
|
||||
You can use "make install" to install PCRE into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
<prefix> that is set when "configure" is run):
|
||||
|
||||
Commands (bin):
|
||||
pcretest
|
||||
pcregrep
|
||||
pcre-config
|
||||
|
||||
Libraries (lib):
|
||||
libpcre
|
||||
libpcreposix
|
||||
libpcrecpp (if C++ support is enabled)
|
||||
|
||||
Configuration information (lib/pkgconfig):
|
||||
libpcre.pc
|
||||
libpcrecpp.pc (if C++ support is enabled)
|
||||
|
||||
Header files (include):
|
||||
pcre.h
|
||||
pcreposix.h
|
||||
pcre_scanner.h )
|
||||
pcre_stringpiece.h ) if C++ support is enabled
|
||||
pcrecpp.h )
|
||||
pcrecpparg.h )
|
||||
|
||||
Man pages (share/man/man{1,3}):
|
||||
pcregrep.1
|
||||
pcretest.1
|
||||
pcre.3
|
||||
pcre*.3 (lots more pages, all starting "pcre")
|
||||
|
||||
HTML documentation (share/doc/pcre/html):
|
||||
index.html
|
||||
*.html (lots more pages, hyperlinked from index.html)
|
||||
|
||||
Text file documentation (share/doc/pcre):
|
||||
AUTHORS
|
||||
COPYING
|
||||
ChangeLog
|
||||
LICENCE
|
||||
NEWS
|
||||
README
|
||||
pcre.txt (a concatenation of the man(3) pages)
|
||||
pcretest.txt the pcretest man page
|
||||
pcregrep.txt the pcregrep man page
|
||||
|
||||
Note that the pcredemo program that is built by "configure" is *not* installed
|
||||
anywhere. It is a demonstration for programmers wanting to use PCRE.
|
||||
|
||||
If you want to remove PCRE from your system, you can run "make uninstall".
|
||||
This removes all the files that "make install" installed. However, it does not
|
||||
@ -216,9 +345,8 @@ remove any directories, because these are often shared with other programs.
|
||||
Retrieving configuration information on Unix-like systems
|
||||
---------------------------------------------------------
|
||||
|
||||
Running "make install" also installs the command pcre-config, which can be used
|
||||
to recall information about the PCRE configuration and installation. For
|
||||
example:
|
||||
Running "make install" installs the command pcre-config, which can be used to
|
||||
recall information about the PCRE configuration and installation. For example:
|
||||
|
||||
pcre-config --version
|
||||
|
||||
@ -237,7 +365,7 @@ single command is used. For example:
|
||||
pkg-config --cflags pcre
|
||||
|
||||
The data is held in *.pc files that are installed in a directory called
|
||||
pkgconfig.
|
||||
<prefix>/lib/pkgconfig.
|
||||
|
||||
|
||||
Shared libraries on Unix-like systems
|
||||
@ -254,7 +382,7 @@ built. The programs pcretest and pcregrep are built to use these uninstalled
|
||||
libraries (by means of wrapper scripts in the case of shared libraries). When
|
||||
you use "make install" to install shared libraries, pcregrep and pcretest are
|
||||
automatically re-built to use the newly installed shared libraries before being
|
||||
installed themselves. However, the versions left in the source directory still
|
||||
installed themselves. However, the versions left in the build directory still
|
||||
use the uninstalled libraries.
|
||||
|
||||
To build PCRE using static libraries only you must use --disable-shared when
|
||||
@ -266,25 +394,33 @@ Then run "make" in the usual way. Similarly, you can use --disable-static to
|
||||
build only shared libraries.
|
||||
|
||||
|
||||
Cross-compiling on a Unix-like system
|
||||
-------------------------------------
|
||||
Cross-compiling on Unix-like systems
|
||||
------------------------------------
|
||||
|
||||
You can specify CC and CFLAGS in the normal way to the "configure" command, in
|
||||
order to cross-compile PCRE for some other host. However, during the building
|
||||
process, the dftables.c source file is compiled *and run* on the local host, in
|
||||
order to generate the default character tables (the chartables.c file). It
|
||||
therefore needs to be compiled with the local compiler, not the cross compiler.
|
||||
You can do this by specifying CC_FOR_BUILD (and if necessary CFLAGS_FOR_BUILD;
|
||||
there are also CXX_FOR_BUILD and CXXFLAGS_FOR_BUILD for the C++ wrapper)
|
||||
when calling the "configure" command. If they are not specified, they default
|
||||
to the values of CC and CFLAGS.
|
||||
order to cross-compile PCRE for some other host. However, you should NOT
|
||||
specify --enable-rebuild-chartables, because if you do, the dftables.c source
|
||||
file is compiled and run on the local host, in order to generate the inbuilt
|
||||
character tables (the pcre_chartables.c file). This will probably not work,
|
||||
because dftables.c needs to be compiled with the local compiler, not the cross
|
||||
compiler.
|
||||
|
||||
When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
|
||||
by making a copy of pcre_chartables.c.dist, which is a default set of tables
|
||||
that assumes ASCII code. Cross-compiling with the default tables should not be
|
||||
a problem.
|
||||
|
||||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
|
||||
run it on the local host to make a new version of pcre_chartables.c.dist.
|
||||
Then when you cross-compile PCRE this new version of the tables will be used.
|
||||
|
||||
|
||||
Using HP's ANSI C++ compiler (aCC)
|
||||
----------------------------------
|
||||
|
||||
Unless C++ support is disabled by specifying the "--disable-cpp" option of the
|
||||
"configure" script, you *must* include the "-AA" option in the CXXFLAGS
|
||||
"configure" script, you must include the "-AA" option in the CXXFLAGS
|
||||
environment variable in order for the C++ components to compile correctly.
|
||||
|
||||
Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
|
||||
@ -296,34 +432,32 @@ running the "configure" script:
|
||||
CXXLDFLAGS="-lstd_v2 -lCsup_v2"
|
||||
|
||||
|
||||
Building on non-Unix systems
|
||||
----------------------------
|
||||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
For a non-Unix system, read the comments in the file NON-UNIX-USE, though if
|
||||
the system supports the use of "configure" and "make" you may be able to build
|
||||
PCRE in the same way as for Unix systems.
|
||||
The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
PCRE has been compiled on Windows systems and on Macintoshes, but I don't know
|
||||
the details because I don't use those systems. It should be straightforward to
|
||||
build PCRE on any system that has a Standard C compiler and library, because it
|
||||
uses only Standard C functions.
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
script creates the .txt and HTML forms of the documentation from the man pages.
|
||||
|
||||
|
||||
Testing PCRE
|
||||
------------
|
||||
|
||||
To test PCRE on a Unix system, run the RunTest script that is created by the
|
||||
configuring process. There is also a script called RunGrepTest that tests the
|
||||
options of the pcregrep command. If the C++ wrapper library is build, three
|
||||
test programs called pcrecpp_unittest, pcre_scanner_unittest, and
|
||||
pcre_stringpiece_unittest are provided.
|
||||
To test the basic PCRE library on a Unix system, run the RunTest script that is
|
||||
created by the configuring process. There is also a script called RunGrepTest
|
||||
that tests the options of the pcregrep command. If the C++ wrapper library is
|
||||
built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and
|
||||
pcre_stringpiece_unittest are also built.
|
||||
|
||||
Both the scripts and all the program tests are run if you obey "make runtest",
|
||||
"make check", or "make test". For other systems, see the instructions in
|
||||
NON-UNIX-USE.
|
||||
Both the scripts and all the program tests are run if you obey "make check" or
|
||||
"make test". For other systems, see the instructions in NON-UNIX-USE.
|
||||
|
||||
The RunTest script runs the pcretest test program (which is documented in its
|
||||
own man page) on each of the testinput files (in the testdata directory) in
|
||||
own man page) on each of the testinput files in the testdata directory in
|
||||
turn, and compares the output with the contents of the corresponding testoutput
|
||||
files. A file called testtry is used to hold the main output from pcretest
|
||||
(testsavedregex is also used as a working file). To run pcretest on just one of
|
||||
@ -331,14 +465,15 @@ the test files, give its number as an argument to RunTest, for example:
|
||||
|
||||
RunTest 2
|
||||
|
||||
The first test file can also be fed directly into the perltest script to check
|
||||
that Perl gives the same results. The only difference you should see is in the
|
||||
first few lines, where the Perl version is given instead of the PCRE version.
|
||||
The first test file can also be fed directly into the perltest.pl script to
|
||||
check that Perl gives the same results. The only difference you should see is
|
||||
in the first few lines, where the Perl version is given instead of the PCRE
|
||||
version.
|
||||
|
||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||
detection, and run-time flags that are specific to PCRE, as well as the POSIX
|
||||
wrapper API. It also uses the debugging flag to check some of the internals of
|
||||
wrapper API. It also uses the debugging flags to check some of the internals of
|
||||
pcre_compile().
|
||||
|
||||
If you build PCRE with a locale setting that is not the standard C locale, the
|
||||
@ -364,6 +499,9 @@ is output to say why. If running this test produces instances of the error
|
||||
in the comparison output, it means that locale is not available on your system,
|
||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||
|
||||
[If you are trying to run this test on Windows, you may be able to get it to
|
||||
work by changing "fr_FR" to "french" everywhere it occurs.]
|
||||
|
||||
The fourth test checks the UTF-8 support. It is not run automatically unless
|
||||
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
|
||||
running "configure". This file can be also fed directly to the perltest script,
|
||||
@ -373,8 +511,8 @@ commented in the script, can be be used.)
|
||||
The fifth test checks error handling with UTF-8 encoding, and internal UTF-8
|
||||
features of PCRE that are not relevant to Perl.
|
||||
|
||||
The sixth and test checks the support for Unicode character properties. It it
|
||||
not run automatically unless PCRE is built with Unicode property support. To to
|
||||
The sixth test checks the support for Unicode character properties. It it not
|
||||
run automatically unless PCRE is built with Unicode property support. To to
|
||||
this you must set --enable-unicode-properties when running "configure".
|
||||
|
||||
The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative
|
||||
@ -386,27 +524,42 @@ automatically unless PCRE is build with the relevant support.
|
||||
Character tables
|
||||
----------------
|
||||
|
||||
PCRE uses four tables for manipulating and identifying characters whose values
|
||||
are less than 256. The final argument of the pcre_compile() function is a
|
||||
pointer to a block of memory containing the concatenated tables. A call to
|
||||
pcre_maketables() can be used to generate a set of tables in the current
|
||||
locale. If the final argument for pcre_compile() is passed as NULL, a set of
|
||||
default tables that is built into the binary is used.
|
||||
For speed, PCRE uses four tables for manipulating and identifying characters
|
||||
whose code point values are less than 256. The final argument of the
|
||||
pcre_compile() function is a pointer to a block of memory containing the
|
||||
concatenated tables. A call to pcre_maketables() can be used to generate a set
|
||||
of tables in the current locale. If the final argument for pcre_compile() is
|
||||
passed as NULL, a set of default tables that is built into the binary is used.
|
||||
|
||||
The source file called chartables.c contains the default set of tables. This is
|
||||
not supplied in the distribution, but is built by the program dftables
|
||||
(compiled from dftables.c), which uses the ANSI C character handling functions
|
||||
such as isalnum(), isalpha(), isupper(), islower(), etc. to build the table
|
||||
sources. This means that the default C locale which is set for your system will
|
||||
control the contents of these default tables. You can change the default tables
|
||||
by editing chartables.c and then re-building PCRE. If you do this, you should
|
||||
probably also edit Makefile to ensure that the file doesn't ever get
|
||||
re-generated.
|
||||
The source file called pcre_chartables.c contains the default set of tables. By
|
||||
default, this is created as a copy of pcre_chartables.c.dist, which contains
|
||||
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
|
||||
for ./configure, a different version of pcre_chartables.c is built by the
|
||||
program dftables (compiled from dftables.c), which uses the ANSI C character
|
||||
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
|
||||
build the table sources. This means that the default C locale which is set for
|
||||
your system will control the contents of these default tables. You can change
|
||||
the default tables by editing pcre_chartables.c and then re-building PCRE. If
|
||||
you do this, you should take care to ensure that the file does not get
|
||||
automatically re-generated. The best way to do this is to move
|
||||
pcre_chartables.c.dist out of the way and replace it with your customized
|
||||
tables.
|
||||
|
||||
When the dftables program is run as a result of --enable-rebuild-chartables,
|
||||
it uses the default C locale that is set on your system. It does not pay
|
||||
attention to the LC_xxx environment variables. In other words, it uses the
|
||||
system's default locale rather than whatever the compiling user happens to have
|
||||
set. If you really do want to build a source set of character tables in a
|
||||
locale that is specified by the LC_xxx variables, you can run the dftables
|
||||
program by hand with the -L option. For example:
|
||||
|
||||
./dftables -L pcre_chartables.c.special
|
||||
|
||||
The first two 256-byte tables provide lower casing and case flipping functions,
|
||||
respectively. The next table consists of three 32-byte bit maps which identify
|
||||
digits, "word" characters, and white space, respectively. These are used when
|
||||
building 32-byte bit maps that represent character classes.
|
||||
building 32-byte bit maps that represent character classes for code points less
|
||||
than 256.
|
||||
|
||||
The final 256-byte table has bits indicating various character types, as
|
||||
follows:
|
||||
@ -422,108 +575,143 @@ You should not alter the set of characters that contain the 128 bit, as that
|
||||
will cause PCRE to malfunction.
|
||||
|
||||
|
||||
Manifest
|
||||
--------
|
||||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the following files:
|
||||
|
||||
(A) The actual source files of the PCRE library functions and their
|
||||
headers:
|
||||
(A) Source files of the PCRE library functions and their headers:
|
||||
|
||||
dftables.c auxiliary program for building chartables.c
|
||||
dftables.c auxiliary program for building pcre_chartables.c
|
||||
when --enable-rebuild-chartables is specified
|
||||
|
||||
pcreposix.c )
|
||||
pcre_compile.c )
|
||||
pcre_config.c )
|
||||
pcre_dfa_exec.c )
|
||||
pcre_exec.c )
|
||||
pcre_fullinfo.c )
|
||||
pcre_get.c ) sources for the functions in the library,
|
||||
pcre_globals.c ) and some internal functions that they use
|
||||
pcre_info.c )
|
||||
pcre_maketables.c )
|
||||
pcre_newline.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_refcount.c )
|
||||
pcre_study.c )
|
||||
pcre_tables.c )
|
||||
pcre_try_flipped.c )
|
||||
pcre_ucp_searchfuncs.c)
|
||||
pcre_valid_utf8.c )
|
||||
pcre_version.c )
|
||||
pcre_xclass.c )
|
||||
ucptable.c )
|
||||
pcre_chartables.c.dist a default set of character tables that assume ASCII
|
||||
coding; used, unless --enable-rebuild-chartables is
|
||||
specified, by copying to pcre_chartables.c
|
||||
|
||||
pcre_printint.src ) debugging function that is #included in pcretest, and
|
||||
) can also be #included in pcre_compile()
|
||||
pcreposix.c )
|
||||
pcre_compile.c )
|
||||
pcre_config.c )
|
||||
pcre_dfa_exec.c )
|
||||
pcre_exec.c )
|
||||
pcre_fullinfo.c )
|
||||
pcre_get.c ) sources for the functions in the library,
|
||||
pcre_globals.c ) and some internal functions that they use
|
||||
pcre_info.c )
|
||||
pcre_maketables.c )
|
||||
pcre_newline.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_refcount.c )
|
||||
pcre_study.c )
|
||||
pcre_tables.c )
|
||||
pcre_try_flipped.c )
|
||||
pcre_ucp_searchfuncs.c )
|
||||
pcre_valid_utf8.c )
|
||||
pcre_version.c )
|
||||
pcre_xclass.c )
|
||||
pcre_printint.src ) debugging function that is #included in pcretest,
|
||||
) and can also be #included in pcre_compile()
|
||||
pcre.h.in template for pcre.h when built by "configure"
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
ucp.h ) headers concerned with
|
||||
ucpinternal.h ) Unicode property handling
|
||||
ucptable.h ) (this one is the data table)
|
||||
|
||||
pcre.h the public PCRE header file
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
ucp.h ) headers concerned with
|
||||
ucpinternal.h ) Unicode property handling
|
||||
config.in template for config.h, which is built by configure
|
||||
config.h.in template for config.h, which is built by "configure"
|
||||
|
||||
pcrecpp.h the header file for the C++ wrapper
|
||||
pcrecpparg.h.in "source" for another C++ header file
|
||||
pcrecpp.cc )
|
||||
pcre_scanner.cc ) source for the C++ wrapper library
|
||||
pcrecpp.h public header file for the C++ wrapper
|
||||
pcrecpparg.h.in template for another C++ header file
|
||||
pcre_scanner.h public header file for C++ scanner functions
|
||||
pcrecpp.cc )
|
||||
pcre_scanner.cc ) source for the C++ wrapper library
|
||||
|
||||
pcre_stringpiece.h.in "source" for pcre_stringpiece.h, the header for the
|
||||
C++ stringpiece functions
|
||||
pcre_stringpiece.cc source for the C++ stringpiece functions
|
||||
pcre_stringpiece.h.in template for pcre_stringpiece.h, the header for the
|
||||
C++ stringpiece functions
|
||||
pcre_stringpiece.cc source for the C++ stringpiece functions
|
||||
|
||||
(B) Auxiliary files:
|
||||
(B) Source files for programs that use PCRE:
|
||||
|
||||
AUTHORS information about the author of PCRE
|
||||
ChangeLog log of changes to the code
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in template for Unix Makefile, which is built by configure
|
||||
NEWS important changes in this release
|
||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
||||
README this file
|
||||
RunTest.in template for a Unix shell script for running tests
|
||||
RunGrepTest.in template for a Unix shell script for pcregrep tests
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
config.h.in "source" for the config.h header file
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac the autoconf input used to build configure
|
||||
doc/Tech.Notes notes on the encoding
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre.txt plain text version of the man pages
|
||||
doc/pcretest.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre.pc.in "source" for libpcre.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
mkinstalldirs script for making install directories
|
||||
pcretest.c comprehensive test program
|
||||
pcredemo.c simple demonstration of coding calls to PCRE
|
||||
perltest Perl test program
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
pcrecpp_unittest.c )
|
||||
pcre_scanner_unittest.c ) test programs for the C++ wrapper
|
||||
pcre_stringpiece_unittest.c )
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcregrep tests
|
||||
pcredemo.c simple demonstration of coding calls to PCRE
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcretest.c comprehensive test program
|
||||
|
||||
(C) Auxiliary files for Win32 DLL
|
||||
(C) Auxiliary files:
|
||||
|
||||
libpcre.def
|
||||
libpcreposix.def
|
||||
132html script to turn "man" pages into HTML
|
||||
AUTHORS information about the author of PCRE
|
||||
ChangeLog log of changes to the code
|
||||
CleanTxt script to clean nroff output for txt man pages
|
||||
Detrail script to remove trailing spaces
|
||||
HACKING some notes about the internals of PCRE
|
||||
INSTALL generic installation instructions
|
||||
LICENCE conditions for the use of PCRE
|
||||
COPYING the same, using GNU's standard name
|
||||
Makefile.in ) template for Unix Makefile, which is built by
|
||||
) "configure"
|
||||
Makefile.am ) the automake input that was used to create
|
||||
) Makefile.in
|
||||
NEWS important changes in this release
|
||||
NON-UNIX-USE notes on building PCRE on non-Unix systems
|
||||
PrepareRelease script to make preparations for "make dist"
|
||||
README this file
|
||||
RunTest a Unix shell script for running tests
|
||||
RunGrepTest a Unix shell script for pcregrep tests
|
||||
aclocal.m4 m4 macros (generated by "aclocal")
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.ac ) the autoconf input that was used to build
|
||||
) "configure" and config.h
|
||||
depcomp ) script to find program dependencies, generated by
|
||||
) automake
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
doc/index.html.src the base HTML page
|
||||
doc/html/* HTML documentation
|
||||
doc/pcre.txt plain text version of the man pages
|
||||
doc/pcretest.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre.pc.in template for libpcre.pc for pkg-config
|
||||
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.pl Perl test program
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
pcrecpp_unittest.cc )
|
||||
pcre_scanner_unittest.cc ) test programs for the C++ wrapper
|
||||
pcre_stringpiece_unittest.cc )
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcregrep tests
|
||||
|
||||
(D) Auxiliary file for VPASCAL
|
||||
(D) Auxiliary files for cmake support
|
||||
|
||||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for VPASCAL
|
||||
|
||||
makevp.bat
|
||||
makevp_c.txt
|
||||
makevp_l.txt
|
||||
pcregexp.pas
|
||||
|
||||
(F) Auxiliary files for building PCRE "by hand"
|
||||
|
||||
pcre.h.generic ) a version of the public PCRE header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Miscellaneous
|
||||
|
||||
RunTest.bat a script for running tests under Windows
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
November 2006
|
||||
Last updated: 24 April 2007
|
||||
|
282
ext/pcre/pcrelib/config.h
Normal file
282
ext/pcre/pcrelib/config.h
Normal file
@ -0,0 +1,282 @@
|
||||
|
||||
#include <php_compat.h>
|
||||
#undef PACKAGE_NAME
|
||||
#undef PACKAGE_VERSION
|
||||
#undef PACKAGE_TARNAME
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
#define SUPPORT_UCP
|
||||
#define SUPPORT_UTF8
|
||||
|
||||
|
||||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
||||
should copy the distributed config.h.generic to config.h, and then set up the
|
||||
macros the way you need them. Alternatively, you can avoid editing by using -D
|
||||
on the compiler command line to set the macro values.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use
|
||||
"configure", this can be done via --enable-ebcdic. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#ifndef HAVE_BCOPY
|
||||
#define HAVE_BCOPY 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#ifndef HAVE_DIRENT_H
|
||||
#define HAVE_DIRENT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#ifndef HAVE_DLFCN_H
|
||||
#define HAVE_DLFCN_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#ifndef HAVE_INTTYPES_H
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#ifndef HAVE_LIMITS_H
|
||||
#define HAVE_LIMITS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#ifndef HAVE_LONG_LONG
|
||||
#define HAVE_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#define HAVE_MEMMOVE 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#ifndef HAVE_MEMORY_H
|
||||
#define HAVE_MEMORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#ifndef HAVE_STDINT_H
|
||||
#define HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#ifndef HAVE_STDLIB_H
|
||||
#define HAVE_STDLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#ifndef HAVE_STRERROR
|
||||
#define HAVE_STRERROR 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#ifndef HAVE_STRING
|
||||
#define HAVE_STRING 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#ifndef HAVE_STRINGS_H
|
||||
#define HAVE_STRINGS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#ifndef HAVE_STRING_H
|
||||
#define HAVE_STRING_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strtoll' function. */
|
||||
#ifndef HAVE_STRTOLL
|
||||
#define HAVE_STRTOLL 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strtoq' function. */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#ifndef HAVE_SYS_TYPES_H
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#ifndef HAVE_UNISTD_H
|
||||
#define HAVE_UNISTD_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. On systems that
|
||||
support it, "configure" can be used to override this default default. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override the default. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_DUPLENGTH
|
||||
#define MAX_DUPLENGTH 30000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* The value of NEWLINE determines the newline character sequence. On
|
||||
Unix-like systems, "configure" can be used to override the default, which
|
||||
is 10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or
|
||||
-2 (ANYCRLF). */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
||||
match() function; instead it creates its own stack by steam using
|
||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
||||
the comments and other stuff just above the match() function. On systems
|
||||
that support it, "configure" can be used to set this in the Makefile (use
|
||||
--disable-stack-for-recursion). */
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.2-RC3"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.2-RC3"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, it
|
||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
||||
compiler on non-Win32 systems. This macro apears at the start of
|
||||
every exported function that is part of the external API. It does
|
||||
not appear on functions that are "external" in the C sense, but
|
||||
which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
|
||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE_STATIC */
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#ifndef STDC_HEADERS
|
||||
#define STDC_HEADERS 1
|
||||
#endif
|
||||
|
||||
/* Define to enable support for Unicode properties */
|
||||
/* #undef SUPPORT_UCP */
|
||||
|
||||
/* Define to enable support for the UTF-8 Unicode encoding. */
|
||||
/* #undef SUPPORT_UTF8 */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.2-RC3"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -38,14 +38,15 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing default
|
||||
character tables for PCRE. The tables are built according to the default C
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE. The tables are built according to the current
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
@ -55,38 +56,48 @@ make use of its code from here in order to be consistent. */
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
FILE *f;
|
||||
const unsigned char *tables = pcre_maketables();
|
||||
const unsigned char *base_of_tables = tables;
|
||||
int i = 1;
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
if (argc != 2)
|
||||
/* By default, the default C locale is used rather than what the building user
|
||||
happens to have set. However, if the -L option is given, set the locale from
|
||||
the LC_xxx environment variables. */
|
||||
|
||||
if (argc > 1 && strcmp(argv[1], "-L") == 0)
|
||||
{
|
||||
setlocale(LC_ALL, ""); /* Set from environment variables */
|
||||
i++;
|
||||
}
|
||||
|
||||
if (argc < i + 1)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen(argv[1], "wb");
|
||||
tables = pcre_maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
||||
about the very long string otherwise. */
|
||||
/* There are several fprintf() calls here, because gcc in pedantic mode
|
||||
complains about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file is automatically written by the dftables auxiliary \n"
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
fprintf(f,
|
||||
"This file contains the default tables for characters with codes less than\n"
|
||||
"128 (ASCII characters). These tables are used when no external tables are\n"
|
||||
"passed to PCRE.\n\n");
|
||||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256.\n\n");
|
||||
fprintf(f,
|
||||
"The following #include is present because without it gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE is built into a static\n"
|
||||
@ -171,7 +182,7 @@ if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of chartables.c */\n");
|
||||
fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,20 +0,0 @@
|
||||
LIBRARY libpcre
|
||||
EXPORTS
|
||||
pcre_malloc
|
||||
pcre_free
|
||||
pcre_config
|
||||
pcre_callout
|
||||
pcre_compile
|
||||
pcre_copy_substring
|
||||
pcre_dfa_exec
|
||||
pcre_exec
|
||||
pcre_get_substring
|
||||
pcre_get_stringnumber
|
||||
pcre_get_substring_list
|
||||
pcre_free_substring
|
||||
pcre_free_substring_list
|
||||
pcre_info
|
||||
pcre_fullinfo
|
||||
pcre_maketables
|
||||
pcre_study
|
||||
pcre_version
|
@ -1,25 +0,0 @@
|
||||
LIBRARY libpcreposix
|
||||
EXPORTS
|
||||
pcre_malloc
|
||||
pcre_free
|
||||
pcre_config
|
||||
pcre_callout
|
||||
pcre_compile
|
||||
pcre_copy_substring
|
||||
pcre_dfa_exec
|
||||
pcre_exec
|
||||
pcre_get_substring
|
||||
pcre_get_stringnumber
|
||||
pcre_get_substring_list
|
||||
pcre_free_substring
|
||||
pcre_free_substring_list
|
||||
pcre_info
|
||||
pcre_fullinfo
|
||||
pcre_maketables
|
||||
pcre_study
|
||||
pcre_version
|
||||
|
||||
regcomp
|
||||
regexec
|
||||
regerror
|
||||
regfree
|
@ -1,29 +0,0 @@
|
||||
EXPORTS
|
||||
|
||||
pcre_malloc DATA
|
||||
pcre_free DATA
|
||||
|
||||
pcre_compile
|
||||
pcre_compile2
|
||||
pcre_config
|
||||
pcre_copy_named_substring
|
||||
pcre_copy_substring
|
||||
pcre_dfa_exec
|
||||
pcre_exec
|
||||
pcre_free_substring
|
||||
pcre_free_substring_list
|
||||
pcre_fullinfo
|
||||
pcre_get_named_substring
|
||||
pcre_get_stringnumber
|
||||
pcre_get_substring
|
||||
pcre_get_substring_list
|
||||
pcre_info
|
||||
pcre_maketables
|
||||
pcre_refcount
|
||||
pcre_study
|
||||
pcre_version
|
||||
|
||||
regcomp
|
||||
regexec
|
||||
regerror
|
||||
regfree
|
@ -5,7 +5,7 @@
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -39,48 +39,33 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
#include "php_compat.h"
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
/* NOTES FOR FUTURE MAINTAINERS: Do not use numbers with leading zeros, because
|
||||
they may be treated as octal constants. The PCRE_PRERELEASE feature is for
|
||||
identifying release candidates. It might be defined as -RC2, for example. In
|
||||
real releases, it should be defined empty. Do not change the alignment of these
|
||||
statments. The code in ./configure greps out the version numbers by using "cut"
|
||||
to get values from column 29 onwards. These are substituted into pcre-config
|
||||
and libpcre.pc. The values are not put into configure.ac and substituted here
|
||||
(which would simplify this issue) because that makes life harder for those who
|
||||
cannot run ./configure. As it now stands, this file need not be edited in that
|
||||
circumstance. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 0
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 18-Dec-2006
|
||||
#define PCRE_MINOR 2
|
||||
#define PCRE_PRERELEASE -RC3
|
||||
#define PCRE_DATE 2007-06-13
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change an existing definition of PCRE_EXP_DECL. */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -121,6 +106,7 @@ extern "C" {
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@ -164,6 +150,8 @@ extern "C" {
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
@ -242,52 +230,52 @@ that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_free(void *);
|
||||
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_stack_free(void *);
|
||||
PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
@ -2,13 +2,25 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE. */
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #include is present because without it gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
@ -82,11 +94,10 @@ const unsigned char _pcre_default_tables[] = {
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
@ -158,7 +169,7 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /* X - _ */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
@ -180,4 +191,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of chartables.c */
|
||||
/* End of pcre_chartables.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -58,6 +58,11 @@ used by pcretest. DEBUG is not defined when building a production library. */
|
||||
#endif
|
||||
|
||||
|
||||
/* Macro for setting individual bits in class bitmaps. */
|
||||
|
||||
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Code parameters and static tables *
|
||||
*************************************************/
|
||||
@ -82,21 +87,21 @@ are simple data values; negative values are for special things like \d and so
|
||||
on. Zero means further processing is needed (for things like \x), or the escape
|
||||
is invalid. */
|
||||
|
||||
#if !EBCDIC /* This is the "normal" table for ASCII systems */
|
||||
#ifndef EBCDIC /* This is the "normal" table for ASCII systems */
|
||||
static const short int escapes[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
|
||||
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
|
||||
'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
|
||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
|
||||
-ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0, /* H - O */
|
||||
-ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W, /* P - W */
|
||||
-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
|
||||
'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */
|
||||
0, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
||||
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */
|
||||
-ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
|
||||
-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w, /* p - w */
|
||||
0, 0, -ESC_z /* x - z */
|
||||
};
|
||||
|
||||
#else /* This is the "abnormal" table for EBCDIC systems */
|
||||
#else /* This is the "abnormal" table for EBCDIC systems */
|
||||
static const short int escapes[] = {
|
||||
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
|
||||
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
|
||||
@ -106,18 +111,18 @@ static const short int escapes[] = {
|
||||
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',
|
||||
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,
|
||||
/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0,
|
||||
/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p,
|
||||
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0,
|
||||
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0,
|
||||
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,
|
||||
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
||||
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
|
||||
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
|
||||
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0
|
||||
@ -208,7 +213,7 @@ static const char *error_texts[] = {
|
||||
"malformed number or name after (?(",
|
||||
"conditional group contains more than two branches",
|
||||
"assertion expected after (?(",
|
||||
"(?R or (?digits must be followed by )",
|
||||
"(?R or (?[+-]digits must be followed by )",
|
||||
/* 30 */
|
||||
"unknown POSIX class name",
|
||||
"POSIX collating elements are not supported",
|
||||
@ -242,7 +247,8 @@ static const char *error_texts[] = {
|
||||
/* 55 */
|
||||
"repeating a DEFINE group is not allowed",
|
||||
"inconsistent NEWLINE options",
|
||||
"\\g is not followed by an (optionally braced) non-zero number"
|
||||
"\\g is not followed by a braced name or an optionally braced non-zero number",
|
||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
|
||||
};
|
||||
|
||||
|
||||
@ -262,7 +268,7 @@ For convenience, we use the same bit definitions as in chartables:
|
||||
|
||||
Then we can use ctype_digit and ctype_xdigit in the code. */
|
||||
|
||||
#if !EBCDIC /* This is the "normal" case, for ASCII systems */
|
||||
#ifndef EBCDIC /* This is the "normal" case, for ASCII systems */
|
||||
static const unsigned char digitab[] =
|
||||
{
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
@ -298,7 +304,7 @@ static const unsigned char digitab[] =
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
#else /* This is the "abnormal" case, for EBCDIC systems */
|
||||
#else /* This is the "abnormal" case, for EBCDIC systems */
|
||||
static const unsigned char digitab[] =
|
||||
{
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
|
||||
@ -312,7 +318,7 @@ static const unsigned char digitab[] =
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
|
||||
@ -346,7 +352,7 @@ static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
|
||||
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */
|
||||
0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */
|
||||
0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */
|
||||
0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
|
||||
@ -373,8 +379,8 @@ static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
|
||||
/* Definition to allow mutual recursion */
|
||||
|
||||
static BOOL
|
||||
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,
|
||||
int *, branch_chain *, compile_data *, int *);
|
||||
compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
|
||||
int *, int *, branch_chain *, compile_data *, int *);
|
||||
|
||||
|
||||
|
||||
@ -421,11 +427,11 @@ if (c == 0) *errorcodeptr = ERR1;
|
||||
a table. A non-zero result is something that can be returned immediately.
|
||||
Otherwise further processing may be required. */
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
else if (c < '0' || c > 'z') {} /* Not alphameric */
|
||||
else if ((i = escapes[c - '0']) != 0) c = i;
|
||||
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */
|
||||
else if ((i = escapes[c - 0x48]) != 0) c = i;
|
||||
#endif
|
||||
@ -452,11 +458,22 @@ else
|
||||
|
||||
/* \g must be followed by a number, either plain or braced. If positive, it
|
||||
is an absolute backreference. If negative, it is a relative backreference.
|
||||
This is a Perl 5.10 feature. */
|
||||
This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
|
||||
reference to a named group. This is part of Perl's movement towards a
|
||||
unified syntax for back references. As this is synonymous with \k{name}, we
|
||||
fudge it up by pretending it really was \k. */
|
||||
|
||||
case 'g':
|
||||
if (ptr[1] == '{')
|
||||
{
|
||||
const uschar *p;
|
||||
for (p = ptr+2; *p != 0 && *p != '}'; p++)
|
||||
if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
|
||||
if (*p != 0 && *p != '}')
|
||||
{
|
||||
c = -ESC_k;
|
||||
break;
|
||||
}
|
||||
braced = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
@ -562,10 +579,10 @@ else
|
||||
if (c == 0 && cc == '0') continue; /* Leading zeroes */
|
||||
count++;
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||
c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
|
||||
c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||
#endif
|
||||
@ -589,10 +606,10 @@ else
|
||||
{
|
||||
int cc; /* Some compilers don't like ++ */
|
||||
cc = *(++ptr); /* in initializers */
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (cc >= 'a') cc -= 32; /* Convert to upper case */
|
||||
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (cc <= 'z') cc += 64; /* Convert to upper case */
|
||||
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
|
||||
#endif
|
||||
@ -611,10 +628,10 @@ else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if !EBCDIC /* ASCII coding */
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
if (c >= 'a' && c <= 'z') c -= 32;
|
||||
c ^= 0x40;
|
||||
#else /* EBCDIC coding */
|
||||
#else /* EBCDIC coding */
|
||||
if (c >= 'a' && c <= 'z') c += 64;
|
||||
c ^= 0xC0;
|
||||
#endif
|
||||
@ -1246,6 +1263,7 @@ for (;;)
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1266,6 +1284,7 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1309,6 +1328,7 @@ for (;;)
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1329,6 +1349,7 @@ for (;;)
|
||||
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1366,6 +1387,18 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
c = *code;
|
||||
|
||||
/* Groups with zero repeats can of course be empty; skip them. */
|
||||
|
||||
if (c == OP_BRAZERO || c == OP_BRAMINZERO)
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For other groups, scan the branches. */
|
||||
|
||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
|
||||
{
|
||||
BOOL empty_branch;
|
||||
@ -1382,12 +1415,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
if (!empty_branch) return FALSE; /* All branches are non-empty */
|
||||
|
||||
/* Move past the KET and fudge things so that the increment in the "for"
|
||||
above has no effect. */
|
||||
|
||||
c = OP_END;
|
||||
code += 1 + LINK_SIZE - _pcre_OP_lengths[c];
|
||||
c = *code;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1921,6 +1949,50 @@ if (next >= 0) switch(op_code)
|
||||
case OP_NOT_WORDCHAR:
|
||||
return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;
|
||||
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_HSPACE:
|
||||
switch(next)
|
||||
{
|
||||
case 0x09:
|
||||
case 0x20:
|
||||
case 0xa0:
|
||||
case 0x1680:
|
||||
case 0x180e:
|
||||
case 0x2000:
|
||||
case 0x2001:
|
||||
case 0x2002:
|
||||
case 0x2003:
|
||||
case 0x2004:
|
||||
case 0x2005:
|
||||
case 0x2006:
|
||||
case 0x2007:
|
||||
case 0x2008:
|
||||
case 0x2009:
|
||||
case 0x200A:
|
||||
case 0x202f:
|
||||
case 0x205f:
|
||||
case 0x3000:
|
||||
return op_code != OP_HSPACE;
|
||||
default:
|
||||
return op_code == OP_HSPACE;
|
||||
}
|
||||
|
||||
case OP_VSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
switch(next)
|
||||
{
|
||||
case 0x0a:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x0d:
|
||||
case 0x85:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
return op_code != OP_VSPACE;
|
||||
default:
|
||||
return op_code == OP_VSPACE;
|
||||
}
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
@ -1955,12 +2027,57 @@ switch(op_code)
|
||||
case ESC_W:
|
||||
return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
|
||||
|
||||
case ESC_h:
|
||||
case ESC_H:
|
||||
switch(item)
|
||||
{
|
||||
case 0x09:
|
||||
case 0x20:
|
||||
case 0xa0:
|
||||
case 0x1680:
|
||||
case 0x180e:
|
||||
case 0x2000:
|
||||
case 0x2001:
|
||||
case 0x2002:
|
||||
case 0x2003:
|
||||
case 0x2004:
|
||||
case 0x2005:
|
||||
case 0x2006:
|
||||
case 0x2007:
|
||||
case 0x2008:
|
||||
case 0x2009:
|
||||
case 0x200A:
|
||||
case 0x202f:
|
||||
case 0x205f:
|
||||
case 0x3000:
|
||||
return -next != ESC_h;
|
||||
default:
|
||||
return -next == ESC_h;
|
||||
}
|
||||
|
||||
case ESC_v:
|
||||
case ESC_V:
|
||||
switch(item)
|
||||
{
|
||||
case 0x0a:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x0d:
|
||||
case 0x85:
|
||||
case 0x2028:
|
||||
case 0x2029:
|
||||
return -next != ESC_v;
|
||||
default:
|
||||
return -next == ESC_v;
|
||||
}
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
case OP_DIGIT:
|
||||
return next == -ESC_D || next == -ESC_s || next == -ESC_W;
|
||||
return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
|
||||
next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
return next == -ESC_d;
|
||||
@ -1969,10 +2086,23 @@ switch(op_code)
|
||||
return next == -ESC_S || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
return next == -ESC_s;
|
||||
return next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_HSPACE:
|
||||
return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_HSPACE:
|
||||
return next == -ESC_h;
|
||||
|
||||
/* Can't have \S in here because VT matches \S (Perl anomaly) */
|
||||
case OP_VSPACE:
|
||||
return next == -ESC_V || next == -ESC_d || next == -ESC_w;
|
||||
|
||||
case OP_NOT_VSPACE:
|
||||
return next == -ESC_v;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
return next == -ESC_W || next == -ESC_s;
|
||||
return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
return next == -ESC_w || next == -ESC_d;
|
||||
@ -2087,10 +2217,12 @@ for (;; ptr++)
|
||||
BOOL possessive_quantifier;
|
||||
BOOL is_quantifier;
|
||||
BOOL is_recurse;
|
||||
BOOL reset_bracount;
|
||||
int class_charcount;
|
||||
int class_lastchar;
|
||||
int newoptions;
|
||||
int recno;
|
||||
int refsign;
|
||||
int skipbytes;
|
||||
int subreqbyte;
|
||||
int subfirstbyte;
|
||||
@ -2515,6 +2647,133 @@ for (;; ptr++)
|
||||
else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
|
||||
c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
|
||||
|
||||
/* We need to deal with \H, \h, \V, and \v in both phases because
|
||||
they use extra memory. */
|
||||
|
||||
if (-c == ESC_h)
|
||||
{
|
||||
SETBIT(classbits, 0x09); /* VT */
|
||||
SETBIT(classbits, 0x20); /* SPACE */
|
||||
SETBIT(classbits, 0xa0); /* NSBP */
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_H)
|
||||
{
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int x = 0xff;
|
||||
switch (c)
|
||||
{
|
||||
case 0x09/8: x ^= 1 << (0x09%8); break;
|
||||
case 0x20/8: x ^= 1 << (0x20%8); break;
|
||||
case 0xa0/8: x ^= 1 << (0xa0%8); break;
|
||||
default: break;
|
||||
}
|
||||
classbits[c] |= x;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_v)
|
||||
{
|
||||
SETBIT(classbits, 0x0a); /* LF */
|
||||
SETBIT(classbits, 0x0b); /* VT */
|
||||
SETBIT(classbits, 0x0c); /* FF */
|
||||
SETBIT(classbits, 0x0d); /* CR */
|
||||
SETBIT(classbits, 0x85); /* NEL */
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
if (-c == ESC_V)
|
||||
{
|
||||
for (c = 0; c < 32; c++)
|
||||
{
|
||||
int x = 0xff;
|
||||
switch (c)
|
||||
{
|
||||
case 0x0a/8: x ^= 1 << (0x0a%8);
|
||||
x ^= 1 << (0x0b%8);
|
||||
x ^= 1 << (0x0c%8);
|
||||
x ^= 1 << (0x0d%8);
|
||||
break;
|
||||
case 0x85/8: x ^= 1 << (0x85%8); break;
|
||||
default: break;
|
||||
}
|
||||
classbits[c] |= x;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
class_utf8 = TRUE;
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
|
||||
*class_utf8data++ = XCL_RANGE;
|
||||
class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
|
||||
class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
|
||||
}
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We need to deal with \P and \p in both phases. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
@ -2655,14 +2914,18 @@ for (;; ptr++)
|
||||
unsigned int origd = d;
|
||||
while (get_othercase_range(&cc, origd, &occ, &ocd))
|
||||
{
|
||||
if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */
|
||||
if (occ >= (unsigned int)c &&
|
||||
ocd <= (unsigned int)d)
|
||||
continue; /* Skip embedded ranges */
|
||||
|
||||
if (occ < c && ocd >= c - 1) /* Extend the basic range */
|
||||
if (occ < (unsigned int)c &&
|
||||
ocd >= (unsigned int)c - 1) /* Extend the basic range */
|
||||
{ /* if there is overlap, */
|
||||
c = occ; /* noting that if occ < c */
|
||||
continue; /* we can't have ocd > d */
|
||||
} /* because a subrange is */
|
||||
if (ocd > d && occ <= d + 1) /* always shorter than */
|
||||
if (ocd > (unsigned int)d &&
|
||||
occ <= (unsigned int)d + 1) /* always shorter than */
|
||||
{ /* the basic range. */
|
||||
d = ocd;
|
||||
continue;
|
||||
@ -3560,6 +3823,7 @@ for (;; ptr++)
|
||||
skipbytes = 0;
|
||||
bravalue = OP_CBRA;
|
||||
save_hwm = cd->hwm;
|
||||
reset_bracount = FALSE;
|
||||
|
||||
if (*(++ptr) == '?')
|
||||
{
|
||||
@ -3581,6 +3845,11 @@ for (;; ptr++)
|
||||
continue;
|
||||
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case '|': /* Reset capture count for each branch */
|
||||
reset_bracount = TRUE;
|
||||
/* Fall through */
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case ':': /* Non-capturing bracket */
|
||||
bravalue = OP_BRA;
|
||||
@ -3617,6 +3886,7 @@ for (;; ptr++)
|
||||
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
skipbytes = 3;
|
||||
refsign = -1;
|
||||
|
||||
/* Check for a test for recursion in a named group. */
|
||||
|
||||
@ -3640,7 +3910,11 @@ for (;; ptr++)
|
||||
terminator = '\'';
|
||||
ptr++;
|
||||
}
|
||||
else terminator = 0;
|
||||
else
|
||||
{
|
||||
terminator = 0;
|
||||
if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
|
||||
}
|
||||
|
||||
/* We now expect to read a name; any thing else is an error */
|
||||
|
||||
@ -3676,7 +3950,32 @@ for (;; ptr++)
|
||||
if (lengthptr != NULL) break;
|
||||
|
||||
/* In the real compile we do the work of looking for the actual
|
||||
reference. */
|
||||
reference. If the string started with "+" or "-" we require the rest to
|
||||
be digits, in which case recno will be set. */
|
||||
|
||||
if (refsign > 0)
|
||||
{
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
if (refsign == '-')
|
||||
{
|
||||
recno = cd->bracount - recno + 1;
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else recno += cd->bracount;
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Otherwise (did not start with "+" or "-"), start by looking for the
|
||||
name. */
|
||||
|
||||
slot = cd->name_table;
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
@ -3995,19 +4294,54 @@ for (;; ptr++)
|
||||
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case '-': case '+':
|
||||
case '0': case '1': case '2': case '3': case '4': /* Recursion or */
|
||||
case '5': case '6': case '7': case '8': case '9': /* subroutine */
|
||||
{
|
||||
const uschar *called;
|
||||
|
||||
if ((refsign = *ptr) == '+') ptr++;
|
||||
else if (refsign == '-')
|
||||
{
|
||||
if ((digitab[ptr[1]] & ctype_digit) == 0)
|
||||
goto OTHER_CHAR_AFTER_QUERY;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
recno = 0;
|
||||
while((digitab[*ptr] & ctype_digit) != 0)
|
||||
recno = recno * 10 + *ptr++ - '0';
|
||||
|
||||
if (*ptr != ')')
|
||||
{
|
||||
*errorcodeptr = ERR29;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
if (refsign == '-')
|
||||
{
|
||||
if (recno == 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
recno = cd->bracount - recno + 1;
|
||||
if (recno <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else if (refsign == '+')
|
||||
{
|
||||
if (recno == 0)
|
||||
{
|
||||
*errorcodeptr = ERR58;
|
||||
goto FAILED;
|
||||
}
|
||||
recno += cd->bracount;
|
||||
}
|
||||
|
||||
/* Come here from code above that handles a named recursion */
|
||||
|
||||
HANDLE_RECURSION:
|
||||
@ -4080,6 +4414,7 @@ for (;; ptr++)
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
default: /* Other characters: check option setting */
|
||||
OTHER_CHAR_AFTER_QUERY:
|
||||
set = unset = 0;
|
||||
optset = &set;
|
||||
|
||||
@ -4214,6 +4549,7 @@ for (;; ptr++)
|
||||
errorcodeptr, /* Where to put an error message */
|
||||
(bravalue == OP_ASSERTBACK ||
|
||||
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
|
||||
reset_bracount, /* True if (?| group */
|
||||
skipbytes, /* Skip over bracket number */
|
||||
&subfirstbyte, /* For possible first char */
|
||||
&subreqbyte, /* For possible last char */
|
||||
@ -4230,9 +4566,11 @@ for (;; ptr++)
|
||||
is on the bracket. */
|
||||
|
||||
/* If this is a conditional bracket, check that there are no more than
|
||||
two branches in the group, or just one if it's a DEFINE group. */
|
||||
two branches in the group, or just one if it's a DEFINE group. We do this
|
||||
in the real compile phase, not in the pre-pass, where the whole group may
|
||||
not be available. */
|
||||
|
||||
if (bravalue == OP_COND)
|
||||
if (bravalue == OP_COND && lengthptr == NULL)
|
||||
{
|
||||
uschar *tc = code;
|
||||
int condcount = 0;
|
||||
@ -4392,12 +4730,13 @@ for (;; ptr++)
|
||||
zerofirstbyte = firstbyte;
|
||||
zeroreqbyte = reqbyte;
|
||||
|
||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax) */
|
||||
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
|
||||
We also support \k{name} (.NET syntax) */
|
||||
|
||||
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))
|
||||
if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
|
||||
{
|
||||
is_recurse = FALSE;
|
||||
terminator = (*(++ptr) == '<')? '>' : '\'';
|
||||
terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
|
||||
goto NAMED_REF_OR_RECURSE;
|
||||
}
|
||||
|
||||
@ -4563,13 +4902,14 @@ This function is used during the pre-compile phase when we are trying to find
|
||||
out the amount of memory needed, as well as during the real compile phase. The
|
||||
value of lengthptr distinguishes the two phases.
|
||||
|
||||
Argument:
|
||||
Arguments:
|
||||
options option bits, including any changes for this subpattern
|
||||
oldims previous settings of ims option bits
|
||||
codeptr -> the address of the current code pointer
|
||||
ptrptr -> the address of the current pattern pointer
|
||||
errorcodeptr -> pointer to error code variable
|
||||
lookbehind TRUE if this is a lookbehind assertion
|
||||
reset_bracount TRUE to reset the count for each branch
|
||||
skipbytes skip this many bytes at start (for brackets and OP_COND)
|
||||
firstbyteptr place to put the first required character, or a negative number
|
||||
reqbyteptr place to put the last required character, or a negative number
|
||||
@ -4583,8 +4923,9 @@ Returns: TRUE on success
|
||||
|
||||
static BOOL
|
||||
compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
|
||||
int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,
|
||||
int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)
|
||||
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
|
||||
int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
|
||||
int *lengthptr)
|
||||
{
|
||||
const uschar *ptr = *ptrptr;
|
||||
uschar *code = *codeptr;
|
||||
@ -4594,6 +4935,8 @@ uschar *reverse_count = NULL;
|
||||
int firstbyte, reqbyte;
|
||||
int branchfirstbyte, branchreqbyte;
|
||||
int length;
|
||||
int orig_bracount;
|
||||
int max_bracount;
|
||||
branch_chain bc;
|
||||
|
||||
bc.outer = bcptr;
|
||||
@ -4622,8 +4965,14 @@ code += 1 + LINK_SIZE + skipbytes;
|
||||
|
||||
/* Loop for each alternative branch */
|
||||
|
||||
orig_bracount = max_bracount = cd->bracount;
|
||||
for (;;)
|
||||
{
|
||||
/* For a (?| group, reset the capturing bracket count so that each branch
|
||||
uses the same numbers. */
|
||||
|
||||
if (reset_bracount) cd->bracount = orig_bracount;
|
||||
|
||||
/* Handle a change of ims options at the start of the branch */
|
||||
|
||||
if ((options & PCRE_IMS) != oldims)
|
||||
@ -4653,6 +5002,11 @@ for (;;)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Keep the highest bracket count in case (?| was used and some branch
|
||||
has fewer than the rest. */
|
||||
|
||||
if (cd->bracount > max_bracount) max_bracount = cd->bracount;
|
||||
|
||||
/* In the real compile phase, there is some post-processing to be done. */
|
||||
|
||||
if (lengthptr == NULL)
|
||||
@ -4716,26 +5070,29 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Reached end of expression, either ')' or end of pattern. Go back through
|
||||
the alternative branches and reverse the chain of offsets, with the field in
|
||||
the BRA item now becoming an offset to the first alternative. If there are
|
||||
no alternatives, it points to the end of the group. The length in the
|
||||
terminating ket is always the length of the whole bracketed item. If any of
|
||||
the ims options were changed inside the group, compile a resetting op-code
|
||||
following, except at the very end of the pattern. Return leaving the pointer
|
||||
at the terminating char. */
|
||||
/* Reached end of expression, either ')' or end of pattern. In the real
|
||||
compile phase, go back through the alternative branches and reverse the chain
|
||||
of offsets, with the field in the BRA item now becoming an offset to the
|
||||
first alternative. If there are no alternatives, it points to the end of the
|
||||
group. The length in the terminating ket is always the length of the whole
|
||||
bracketed item. If any of the ims options were changed inside the group,
|
||||
compile a resetting op-code following, except at the very end of the pattern.
|
||||
Return leaving the pointer at the terminating char. */
|
||||
|
||||
if (*ptr != '|')
|
||||
{
|
||||
int branch_length = code - last_branch;
|
||||
do
|
||||
if (lengthptr == NULL)
|
||||
{
|
||||
int prev_length = GET(last_branch, 1);
|
||||
PUT(last_branch, 1, branch_length);
|
||||
branch_length = prev_length;
|
||||
last_branch -= branch_length;
|
||||
int branch_length = code - last_branch;
|
||||
do
|
||||
{
|
||||
int prev_length = GET(last_branch, 1);
|
||||
PUT(last_branch, 1, branch_length);
|
||||
branch_length = prev_length;
|
||||
last_branch -= branch_length;
|
||||
}
|
||||
while (branch_length > 0);
|
||||
}
|
||||
while (branch_length > 0);
|
||||
|
||||
/* Fill in the ket */
|
||||
|
||||
@ -4752,6 +5109,10 @@ for (;;)
|
||||
length += 2;
|
||||
}
|
||||
|
||||
/* Retain the highest bracket number, in case resetting was used. */
|
||||
|
||||
cd->bracount = max_bracount;
|
||||
|
||||
/* Set values to pass back */
|
||||
|
||||
*codeptr = code;
|
||||
@ -4762,17 +5123,29 @@ for (;;)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Another branch follows; insert an "or" node. Its length field points back
|
||||
/* Another branch follows. In the pre-compile phase, we can move the code
|
||||
pointer back to where it was for the start of the first branch. (That is,
|
||||
pretend that each branch is the only one.)
|
||||
|
||||
In the real compile phase, insert an ALT node. Its length field points back
|
||||
to the previous branch while the bracket remains open. At the end the chain
|
||||
is reversed. It's done like this so that the start of the bracket has a
|
||||
zero offset until it is closed, making it possible to detect recursion. */
|
||||
|
||||
*code = OP_ALT;
|
||||
PUT(code, 1, code - last_branch);
|
||||
bc.current = last_branch = code;
|
||||
code += 1 + LINK_SIZE;
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
code = *codeptr + 1 + LINK_SIZE + skipbytes;
|
||||
length += 1 + LINK_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
*code = OP_ALT;
|
||||
PUT(code, 1, code - last_branch);
|
||||
bc.current = last_branch = code;
|
||||
code += 1 + LINK_SIZE;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
length += 1 + LINK_SIZE;
|
||||
}
|
||||
/* Control never reaches here */
|
||||
}
|
||||
@ -5039,7 +5412,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
||||
with errorptr and erroroffset set
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre *
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||
int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -5047,7 +5420,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
PCRE_DATA_SCOPE pcre *
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -5096,7 +5469,7 @@ if (errorcodeptr != NULL) *errorcodeptr = ERR0;
|
||||
if (erroroffset == NULL)
|
||||
{
|
||||
errorcode = ERR16;
|
||||
goto PCRE_EARLY_ERROR_RETURN;
|
||||
goto PCRE_EARLY_ERROR_RETURN2;
|
||||
}
|
||||
|
||||
*erroroffset = 0;
|
||||
@ -5109,7 +5482,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
|
||||
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
|
||||
{
|
||||
errorcode = ERR44;
|
||||
goto PCRE_UTF8_ERROR_RETURN;
|
||||
goto PCRE_EARLY_ERROR_RETURN2;
|
||||
}
|
||||
#else
|
||||
if ((options & PCRE_UTF8) != 0)
|
||||
@ -5134,7 +5507,8 @@ cd->cbits = tables + cbits_offset;
|
||||
cd->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Handle different types of newline. The three bits give seven cases. The
|
||||
current code allows for fixed one- or two-byte sequences, plus "any". */
|
||||
current code allows for fixed one- or two-byte sequences, plus "any" and
|
||||
"anycrlf". */
|
||||
|
||||
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||
{
|
||||
@ -5144,10 +5518,15 @@ switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
case PCRE_NEWLINE_ANY: newline = -1; break;
|
||||
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
|
||||
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
|
||||
}
|
||||
|
||||
if (newline < 0)
|
||||
if (newline == -2)
|
||||
{
|
||||
cd->nltype = NLTYPE_ANYCRLF;
|
||||
}
|
||||
else if (newline < 0)
|
||||
{
|
||||
cd->nltype = NLTYPE_ANY;
|
||||
}
|
||||
@ -5208,7 +5587,8 @@ outside can help speed up starting point checks. */
|
||||
code = cworkspace;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
|
||||
&code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);
|
||||
&code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
|
||||
&length);
|
||||
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
|
||||
|
||||
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
|
||||
@ -5276,7 +5656,7 @@ ptr = (const uschar *)pattern;
|
||||
code = (uschar *)codestart;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
|
||||
&errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
||||
&errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
|
||||
re->top_bracket = cd->bracount;
|
||||
re->top_backref = cd->top_backref;
|
||||
|
||||
@ -5321,9 +5701,7 @@ if (errorcode != 0)
|
||||
(pcre_free)(re);
|
||||
PCRE_EARLY_ERROR_RETURN:
|
||||
*erroroffset = ptr - (const uschar *)pattern;
|
||||
#ifdef SUPPORT_UTF8
|
||||
PCRE_UTF8_ERROR_RETURN:
|
||||
#endif
|
||||
PCRE_EARLY_ERROR_RETURN2:
|
||||
*errorptr = error_texts[errorcode];
|
||||
if (errorcodeptr != NULL) *errorcodeptr = errorcode;
|
||||
return NULL;
|
||||
@ -5413,7 +5791,7 @@ if ((re->options & PCRE_REQCHSET) != 0)
|
||||
else printf("Req char = \\x%02x%s\n", ch, caseless);
|
||||
}
|
||||
|
||||
pcre_printint(re, stdout);
|
||||
pcre_printint(re, stdout, TRUE);
|
||||
|
||||
/* This check is done here in the debugging case so that the code that
|
||||
was compiled can be seen. */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -58,7 +58,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -61,7 +61,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
@ -140,6 +140,14 @@ switch (what)
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->options & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->options & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -46,37 +46,14 @@ indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
|
||||
/**************************************************************************
|
||||
This code used to be here for use when compiling as a C++ library. However,
|
||||
according to Dair Grant it is not needed: "
|
||||
|
||||
Including 'extern "C"' in the declaration generates an "initialized and
|
||||
declared `extern'" warning from gcc 4.0.1. Since we include pcre_internal.h,
|
||||
which includes pcre.h, which declares these prototypes within an extern "C" {}
|
||||
block, we shouldn't need the prefix here.
|
||||
|
||||
So, from Release 7.0 I have cut this out.
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" void *(*pcre_malloc)(size_t) = malloc;
|
||||
extern "C" void (*pcre_free)(void *) = free;
|
||||
extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
extern "C" void (*pcre_stack_free)(void *) = free;
|
||||
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#else
|
||||
**************************************************************************/
|
||||
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
void (*pcre_stack_free)(void *) = free;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -68,7 +68,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
|
@ -7,7 +7,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -69,11 +69,7 @@ be absolutely sure we get our version. */
|
||||
|
||||
/* Get the definitions provided by running "configure" */
|
||||
|
||||
#ifdef PHP_WIN32
|
||||
# include "config.w32.h"
|
||||
#else
|
||||
# include <php_config.h>
|
||||
#endif
|
||||
#include "config.h"
|
||||
|
||||
/* Standard C headers plus the external interface definition. The only time
|
||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
@ -87,8 +83,58 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef PCRE_SPY
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
/* When compiling a DLL for Windows, the exported symbols have to be declared
|
||||
using some MS magic. I found some useful information on this web page:
|
||||
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the
|
||||
information there, using __declspec(dllexport) without "extern" we have a
|
||||
definition; with "extern" we have a declaration. The settings here override the
|
||||
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL,
|
||||
which is all that is needed for applications (they just import the symbols). We
|
||||
use:
|
||||
|
||||
PCRE_EXP_DECL for declarations
|
||||
PCRE_EXP_DEFN for definitions of exported functions
|
||||
PCRE_EXP_DATA_DEFN for definitions of exported variables
|
||||
|
||||
The reason for the two DEFN macros is that in non-Windows environments, one
|
||||
does not want to have "extern" before variable definitions because it leads to
|
||||
compiler warnings. So we distinguish between functions and variables. In
|
||||
Windows, the two should always be the same.
|
||||
|
||||
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest,
|
||||
which is an application, but needs to import this file in order to "peek" at
|
||||
internals, can #include pcre.h first to get an application's-eye view.
|
||||
|
||||
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon,
|
||||
special-purpose environments) might want to stick other stuff in front of
|
||||
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and
|
||||
PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE_EXP_DEFN __declspec(dllexport)
|
||||
# define PCRE_EXP_DATA_DEFN __declspec(dllexport)
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# define PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
#
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DEFN PCRE_EXP_DECL
|
||||
# endif
|
||||
# ifndef PCRE_EXP_DATA_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||
@ -129,21 +175,22 @@ characters only go up to 0x7fffffff (though Unicode doesn't go beyond
|
||||
#define NOTACHAR 0xffffffff
|
||||
|
||||
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
|
||||
and "all" at present). The following macros are used to package up testing for
|
||||
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
|
||||
indicate in which datablock the parameters exist, and what the start/end of
|
||||
string field names are. */
|
||||
"any" and "anycrlf" at present). The following macros are used to package up
|
||||
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various
|
||||
modules to indicate in which datablock the parameters exist, and what the
|
||||
start/end of string field names are. */
|
||||
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */
|
||||
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */
|
||||
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */
|
||||
|
||||
/* This macro checks for a newline at the given position */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) < NLBLOCK->PSEND && \
|
||||
_pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
|
||||
) \
|
||||
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
|
||||
utf8)) \
|
||||
: \
|
||||
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
||||
(p)[0] == NLBLOCK->nl[0] && \
|
||||
@ -156,8 +203,8 @@ string field names are. */
|
||||
#define WAS_NEWLINE(p) \
|
||||
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
||||
((p) > NLBLOCK->PSSTART && \
|
||||
_pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
|
||||
) \
|
||||
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
||||
&(NLBLOCK->nllen), utf8)) \
|
||||
: \
|
||||
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
||||
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
|
||||
@ -182,10 +229,12 @@ must begin with PCRE_. */
|
||||
#define USPTR const unsigned char *
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Include the public PCRE header and the definitions of UCP character property
|
||||
values. */
|
||||
|
||||
#include "pcre.h"
|
||||
#include <pcre.h>
|
||||
#include "ucp.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
@ -193,7 +242,9 @@ need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strlen(s) _strlen(s)
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcmp(s,c,n) _memcmp(s,c,n)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
@ -202,23 +253,31 @@ option on the command line. */
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). This assumes
|
||||
that all calls to memmove are moving strings upwards in store, which is the
|
||||
case in PCRE. */
|
||||
neither (there some non-Unix environments where this is the case). */
|
||||
|
||||
#if ! HAVE_MEMMOVE
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#if HAVE_BCOPY
|
||||
#ifdef HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
static void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
pcre_memmove(void *d, const void *s, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return dest;
|
||||
unsigned char *dest = (unsigned char *)d;
|
||||
const unsigned char *src = (const unsigned char *)s;
|
||||
if (dest > src)
|
||||
{
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
return (void *)dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < n; ++i) *dest++ = *src++;
|
||||
return (void *)(dest - n);
|
||||
}
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
@ -443,7 +502,8 @@ bits. */
|
||||
/* Masks for identifying the public options that are permitted at compile
|
||||
time, run time, or study time, respectively. */
|
||||
|
||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
|
||||
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
|
||||
PCRE_NEWLINE_ANYCRLF)
|
||||
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
@ -545,9 +605,9 @@ ESC_Z to detect the types that may be repeated. These are the types that
|
||||
consume characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
|
||||
ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
@ -569,133 +629,138 @@ enum {
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
||||
OP_WORD_BOUNDARY, /* 4 \b */
|
||||
OP_NOT_DIGIT, /* 5 \D */
|
||||
OP_DIGIT, /* 6 \d */
|
||||
OP_NOT_WHITESPACE, /* 7 \S */
|
||||
OP_WHITESPACE, /* 8 \s */
|
||||
OP_NOT_WORDCHAR, /* 9 \W */
|
||||
OP_WORDCHAR, /* 10 \w */
|
||||
OP_ANY, /* 11 Match any character */
|
||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 13 \P (not Unicode property) */
|
||||
OP_PROP, /* 14 \p (Unicode property) */
|
||||
OP_ANYNL, /* 15 \R (any newline sequence) */
|
||||
OP_EXTUNI, /* 16 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 17 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 18 End of data: \z */
|
||||
OP_SET_SOM, /* 3 Set start of match (\K) */
|
||||
OP_NOT_WORD_BOUNDARY, /* 4 \B */
|
||||
OP_WORD_BOUNDARY, /* 5 \b */
|
||||
OP_NOT_DIGIT, /* 6 \D */
|
||||
OP_DIGIT, /* 7 \d */
|
||||
OP_NOT_WHITESPACE, /* 8 \S */
|
||||
OP_WHITESPACE, /* 9 \s */
|
||||
OP_NOT_WORDCHAR, /* 10 \W */
|
||||
OP_WORDCHAR, /* 11 \w */
|
||||
OP_ANY, /* 12 Match any character */
|
||||
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 14 \P (not Unicode property) */
|
||||
OP_PROP, /* 15 \p (Unicode property) */
|
||||
OP_ANYNL, /* 16 \R (any newline sequence) */
|
||||
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
|
||||
OP_HSPACE, /* 18 \h (horizontal whitespace) */
|
||||
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
|
||||
OP_VSPACE, /* 20 \v (vertical whitespace) */
|
||||
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 23 End of data: \z */
|
||||
|
||||
OP_OPT, /* 19 Set runtime options */
|
||||
OP_CIRC, /* 20 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 21 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 22 Match one character, casefully */
|
||||
OP_CHARNC, /* 23 Match one character, caselessly */
|
||||
OP_NOT, /* 24 Match one character, not the following one */
|
||||
OP_OPT, /* 24 Set runtime options */
|
||||
OP_CIRC, /* 25 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 26 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 27 Match one character, casefully */
|
||||
OP_CHARNC, /* 28 Match one character, caselessly */
|
||||
OP_NOT, /* 29 Match one character, not the following one */
|
||||
|
||||
OP_STAR, /* 25 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 26 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 27 the minimizing one second. */
|
||||
OP_MINPLUS, /* 28 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 29 */
|
||||
OP_MINQUERY, /* 30 */
|
||||
OP_STAR, /* 30 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 32 the minimizing one second. */
|
||||
OP_MINPLUS, /* 33 This first set applies to single characters.*/
|
||||
OP_QUERY, /* 34 */
|
||||
OP_MINQUERY, /* 35 */
|
||||
|
||||
OP_UPTO, /* 31 From 0 to n matches */
|
||||
OP_MINUPTO, /* 32 */
|
||||
OP_EXACT, /* 33 Exactly n matches */
|
||||
OP_UPTO, /* 36 From 0 to n matches */
|
||||
OP_MINUPTO, /* 37 */
|
||||
OP_EXACT, /* 38 Exactly n matches */
|
||||
|
||||
OP_POSSTAR, /* 34 Possessified star */
|
||||
OP_POSPLUS, /* 35 Possessified plus */
|
||||
OP_POSQUERY, /* 36 Posesssified query */
|
||||
OP_POSUPTO, /* 37 Possessified upto */
|
||||
OP_POSSTAR, /* 39 Possessified star */
|
||||
OP_POSPLUS, /* 40 Possessified plus */
|
||||
OP_POSQUERY, /* 41 Posesssified query */
|
||||
OP_POSUPTO, /* 42 Possessified upto */
|
||||
|
||||
OP_NOTSTAR, /* 38 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 39 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 40 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 41 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 42 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 43 */
|
||||
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
|
||||
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
|
||||
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
|
||||
OP_NOTMINQUERY, /* 48 */
|
||||
|
||||
OP_NOTUPTO, /* 44 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 45 */
|
||||
OP_NOTEXACT, /* 46 Exactly n matches */
|
||||
OP_NOTUPTO, /* 49 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 50 */
|
||||
OP_NOTEXACT, /* 51 Exactly n matches */
|
||||
|
||||
OP_NOTPOSSTAR, /* 47 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 48 */
|
||||
OP_NOTPOSQUERY, /* 49 */
|
||||
OP_NOTPOSUPTO, /* 50 */
|
||||
OP_NOTPOSSTAR, /* 52 Possessified versions */
|
||||
OP_NOTPOSPLUS, /* 53 */
|
||||
OP_NOTPOSQUERY, /* 54 */
|
||||
OP_NOTPOSUPTO, /* 55 */
|
||||
|
||||
OP_TYPESTAR, /* 51 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 52 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 53 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 54 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 55 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 56 */
|
||||
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 61 */
|
||||
|
||||
OP_TYPEUPTO, /* 57 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 58 */
|
||||
OP_TYPEEXACT, /* 59 Exactly n matches */
|
||||
OP_TYPEUPTO, /* 62 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 63 */
|
||||
OP_TYPEEXACT, /* 64 Exactly n matches */
|
||||
|
||||
OP_TYPEPOSSTAR, /* 60 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 61 */
|
||||
OP_TYPEPOSQUERY, /* 62 */
|
||||
OP_TYPEPOSUPTO, /* 63 */
|
||||
OP_TYPEPOSSTAR, /* 65 Possessified versions */
|
||||
OP_TYPEPOSPLUS, /* 66 */
|
||||
OP_TYPEPOSQUERY, /* 67 */
|
||||
OP_TYPEPOSUPTO, /* 68 */
|
||||
|
||||
OP_CRSTAR, /* 64 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 65 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 66 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 67 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 68 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 69 */
|
||||
OP_CRRANGE, /* 70 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 71 */
|
||||
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 73 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 74 */
|
||||
OP_CRRANGE, /* 75 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 76 */
|
||||
|
||||
OP_CLASS, /* 72 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 73 Same, but the bitmap was created from a negative
|
||||
OP_CLASS, /* 77 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a UTF-8
|
||||
character > 255 is encountered. */
|
||||
|
||||
OP_XCLASS, /* 74 Extended class for handling UTF-8 chars within the
|
||||
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
|
||||
class. This does both positive and negative. */
|
||||
|
||||
OP_REF, /* 75 Match a back reference */
|
||||
OP_RECURSE, /* 76 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 77 Call out to external function if provided */
|
||||
OP_REF, /* 80 Match a back reference */
|
||||
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 82 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 78 Start of alternation */
|
||||
OP_KET, /* 79 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 80 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 81 order. They are for groups the repeat for ever. */
|
||||
OP_ALT, /* 83 Start of alternation */
|
||||
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 85 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
|
||||
|
||||
OP_ASSERT, /* 82 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 83 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 84 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
|
||||
OP_REVERSE, /* 86 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 87 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 88 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 89 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
|
||||
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 87 Atomic group */
|
||||
OP_BRA, /* 88 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 89 Start of capturing bracket */
|
||||
OP_COND, /* 90 Conditional group */
|
||||
OP_ONCE, /* 92 Atomic group */
|
||||
OP_BRA, /* 83 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 94 Start of capturing bracket */
|
||||
OP_COND, /* 95 Conditional group */
|
||||
|
||||
/* These three must follow the previous three, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 91 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 92 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 93 Conditional group, check empty */
|
||||
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
|
||||
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 98 Conditional group, check empty */
|
||||
|
||||
OP_CREF, /* 94 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 95 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 96 The DEFINE condition */
|
||||
OP_CREF, /* 99 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 100 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 101 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 97 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 98 order. */
|
||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 103 order. */
|
||||
};
|
||||
|
||||
|
||||
@ -703,10 +768,10 @@ enum {
|
||||
for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||
"notprop", "prop", "anynl", "extuni", \
|
||||
"\\Z", "\\z", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"Opt", "^", "$", "char", "charnc", "not", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
@ -733,9 +798,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, /* Any, Anybyte */ \
|
||||
3, 3, 1, 1, /* NOTPROP, PROP, EXTUNI, ANYNL */ \
|
||||
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Charnc - the minimum length */ \
|
||||
@ -795,7 +862,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -884,21 +951,11 @@ typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
USPTR save_start; /* Old value of md->start_match */
|
||||
USPTR save_start; /* Old value of mstart */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
|
||||
step with any changes of local variable. However, the pointer to the current
|
||||
frame must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure. NOTE:
|
||||
This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
pointer at the start of each subpattern, so as to detect when an empty string
|
||||
has been matched by a subpattern - to break infinite loops. */
|
||||
@ -935,7 +992,7 @@ typedef struct match_data {
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
USPTR start_subject; /* Start of the subject string */
|
||||
USPTR end_subject; /* End of the subject string */
|
||||
USPTR start_match; /* Start of this match attempt */
|
||||
USPTR start_match_ptr; /* Start of matched string */
|
||||
USPTR end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
@ -944,7 +1001,6 @@ typedef struct match_data {
|
||||
int eptrn; /* Next free eptrblock */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
||||
} match_data;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
@ -1030,16 +1086,16 @@ extern const uschar _pcre_OP_lengths[];
|
||||
one of the exported public functions. They have to be "external" in the C
|
||||
sense, but are not part of the PCRE public API. */
|
||||
|
||||
extern BOOL _pcre_is_newline(const uschar *, const uschar *, int *,
|
||||
BOOL);
|
||||
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, const uschar *, int *,
|
||||
BOOL);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
#endif
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -42,9 +42,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
|
||||
so for now the type isn't passed into the functions. It can easily be added
|
||||
later if required. The full list of Unicode newline characters is taken from
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
@ -61,6 +60,7 @@ string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
@ -69,12 +69,23 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
switch(c)
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
@ -99,6 +110,7 @@ the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
@ -107,8 +119,8 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
|
||||
BOOL utf8)
|
||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
@ -118,7 +130,16 @@ if (utf8)
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
switch(c)
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -62,6 +62,7 @@ Returns: number of characters placed in the buffer
|
||||
int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
@ -73,6 +74,9 @@ for (j = i; j > 0; j--)
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -71,6 +71,12 @@ print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||
{
|
||||
int c = *ptr;
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
utf8 = utf8; /* Avoid compiler warning */
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
|
||||
#else
|
||||
if (!utf8 || (c & 0xc0) != 0xc0)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
@ -102,6 +108,7 @@ else
|
||||
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -134,10 +141,13 @@ return (ptype == pvalue)? "??" : "??";
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. */
|
||||
However, we assume that what we are passed is a compiled regex. The
|
||||
print_lengths flag controls whether offsets and lengths of items are printed.
|
||||
They can be turned off from pcretest so that automatic tests on bytecode can be
|
||||
written that do not depend on the value of LINK_SIZE. */
|
||||
|
||||
static void
|
||||
pcre_printint(pcre *external_re, FILE *f)
|
||||
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)external_re;
|
||||
uschar *codestart, *code;
|
||||
@ -168,7 +178,10 @@ for(;;)
|
||||
int c;
|
||||
int extra = 0;
|
||||
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
@ -205,8 +218,9 @@ for(;;)
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
|
||||
GET2(code, 1+LINK_SIZE));
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
@ -223,7 +237,9 @@ for(;;)
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
@ -339,7 +355,9 @@ for(;;)
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -63,7 +63,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
|
@ -1,194 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
#include "config.h"
|
||||
#include "pcre_scanner.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
Scanner::Scanner()
|
||||
: data_(),
|
||||
input_(data_),
|
||||
skip_(NULL),
|
||||
should_skip_(false),
|
||||
skip_repeat_(false),
|
||||
save_comments_(false),
|
||||
comments_(NULL),
|
||||
comments_offset_(0) {
|
||||
}
|
||||
|
||||
Scanner::Scanner(const string& in)
|
||||
: data_(in),
|
||||
input_(data_),
|
||||
skip_(NULL),
|
||||
should_skip_(false),
|
||||
skip_repeat_(false),
|
||||
save_comments_(false),
|
||||
comments_(NULL),
|
||||
comments_offset_(0) {
|
||||
}
|
||||
|
||||
Scanner::~Scanner() {
|
||||
delete skip_;
|
||||
delete comments_;
|
||||
}
|
||||
|
||||
void Scanner::SetSkipExpression(const char* re) {
|
||||
delete skip_;
|
||||
if (re != NULL) {
|
||||
skip_ = new RE(re);
|
||||
should_skip_ = true;
|
||||
skip_repeat_ = true;
|
||||
ConsumeSkip();
|
||||
} else {
|
||||
skip_ = NULL;
|
||||
should_skip_ = false;
|
||||
skip_repeat_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Scanner::Skip(const char* re) {
|
||||
delete skip_;
|
||||
if (re != NULL) {
|
||||
skip_ = new RE(re);
|
||||
should_skip_ = true;
|
||||
skip_repeat_ = false;
|
||||
ConsumeSkip();
|
||||
} else {
|
||||
skip_ = NULL;
|
||||
should_skip_ = false;
|
||||
skip_repeat_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void Scanner::DisableSkip() {
|
||||
assert(skip_ != NULL);
|
||||
should_skip_ = false;
|
||||
}
|
||||
|
||||
void Scanner::EnableSkip() {
|
||||
assert(skip_ != NULL);
|
||||
should_skip_ = true;
|
||||
ConsumeSkip();
|
||||
}
|
||||
|
||||
int Scanner::LineNumber() const {
|
||||
// TODO: Make it more efficient by keeping track of the last point
|
||||
// where we computed line numbers and counting newlines since then.
|
||||
// We could use std:count, but not all systems have it. :-(
|
||||
int count = 1;
|
||||
for (const char* p = data_.data(); p < input_.data(); ++p)
|
||||
if (*p == '\n')
|
||||
++count;
|
||||
return count;
|
||||
}
|
||||
|
||||
int Scanner::Offset() const {
|
||||
return input_.data() - data_.c_str();
|
||||
}
|
||||
|
||||
bool Scanner::LookingAt(const RE& re) const {
|
||||
int consumed;
|
||||
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
bool Scanner::Consume(const RE& re,
|
||||
const Arg& arg0,
|
||||
const Arg& arg1,
|
||||
const Arg& arg2) {
|
||||
const bool result = re.Consume(&input_, arg0, arg1, arg2);
|
||||
if (result && should_skip_) ConsumeSkip();
|
||||
return result;
|
||||
}
|
||||
|
||||
// helper function to consume *skip_ and honour save_comments_
|
||||
void Scanner::ConsumeSkip() {
|
||||
const char* start_data = input_.data();
|
||||
while (skip_->Consume(&input_)) {
|
||||
if (!skip_repeat_) {
|
||||
// Only one skip allowed.
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (save_comments_) {
|
||||
if (comments_ == NULL) {
|
||||
comments_ = new vector<StringPiece>;
|
||||
}
|
||||
// already pointing one past end, so no need to +1
|
||||
int length = input_.data() - start_data;
|
||||
if (length > 0) {
|
||||
comments_->push_back(StringPiece(start_data, length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
|
||||
// short circuit out if we've not yet initialized comments_
|
||||
// (e.g., when save_comments is false)
|
||||
if (!comments_) {
|
||||
return;
|
||||
}
|
||||
// TODO: if we guarantee that comments_ will contain StringPieces
|
||||
// that are ordered by their start, then we can do a binary search
|
||||
// for the first StringPiece at or past start and then scan for the
|
||||
// ones contained in the range, quit early (use equal_range or
|
||||
// lower_bound)
|
||||
for (vector<StringPiece>::const_iterator it = comments_->begin();
|
||||
it != comments_->end(); ++it) {
|
||||
if ((it->data() >= data_.c_str() + start &&
|
||||
it->data() + it->size() <= data_.c_str() + end)) {
|
||||
ranges->push_back(*it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
|
||||
// short circuit out if we've not yet initialized comments_
|
||||
// (e.g., when save_comments is false)
|
||||
if (!comments_) {
|
||||
return;
|
||||
}
|
||||
for (vector<StringPiece>::const_iterator it =
|
||||
comments_->begin() + comments_offset_;
|
||||
it != comments_->end(); ++it) {
|
||||
ranges->push_back(*it);
|
||||
++comments_offset_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pcrecpp
|
@ -1,171 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
//
|
||||
// Regular-expression based scanner for parsing an input stream.
|
||||
//
|
||||
// Example 1: parse a sequence of "var = number" entries from input:
|
||||
//
|
||||
// Scanner scanner(input);
|
||||
// string var;
|
||||
// int number;
|
||||
// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
|
||||
// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
|
||||
// ...;
|
||||
// }
|
||||
|
||||
#ifndef _PCRE_SCANNER_H
|
||||
#define _PCRE_SCANNER_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pcrecpp.h>
|
||||
#include <pcre_stringpiece.h>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class Scanner {
|
||||
public:
|
||||
Scanner();
|
||||
explicit Scanner(const std::string& input);
|
||||
~Scanner();
|
||||
|
||||
// Return current line number. The returned line-number is
|
||||
// one-based. I.e. it returns 1 + the number of consumed newlines.
|
||||
//
|
||||
// Note: this method may be slow. It may take time proportional to
|
||||
// the size of the input.
|
||||
int LineNumber() const;
|
||||
|
||||
// Return the byte-offset that the scanner is looking in the
|
||||
// input data;
|
||||
int Offset() const;
|
||||
|
||||
// Return true iff the start of the remaining input matches "re"
|
||||
bool LookingAt(const RE& re) const;
|
||||
|
||||
// Return true iff all of the following are true
|
||||
// a. the start of the remaining input matches "re",
|
||||
// b. if any arguments are supplied, matched sub-patterns can be
|
||||
// parsed and stored into the arguments.
|
||||
// If it returns true, it skips over the matched input and any
|
||||
// following input that matches the "skip" regular expression.
|
||||
bool Consume(const RE& re,
|
||||
const Arg& arg0 = no_arg,
|
||||
const Arg& arg1 = no_arg,
|
||||
const Arg& arg2 = no_arg
|
||||
// TODO: Allow more arguments?
|
||||
);
|
||||
|
||||
// Set the "skip" regular expression. If after consuming some data,
|
||||
// a prefix of the input matches this RE, it is automatically
|
||||
// skipped. For example, a programming language scanner would use
|
||||
// a skip RE that matches white space and comments.
|
||||
//
|
||||
// scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
|
||||
//
|
||||
// Skipping repeats as long as it succeeds. We used to let people do
|
||||
// this by writing "(...)*" in the regular expression, but that added
|
||||
// up to lots of recursive calls within the pcre library, so now we
|
||||
// control repetition explicitly via the function call API.
|
||||
//
|
||||
// You can pass NULL for "re" if you do not want any data to be skipped.
|
||||
void Skip(const char* re); // DEPRECATED; does *not* repeat
|
||||
void SetSkipExpression(const char* re);
|
||||
|
||||
// Temporarily pause "skip"ing. This
|
||||
// Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
|
||||
// is similar to
|
||||
// Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
|
||||
// but avoids creating/deleting new RE objects.
|
||||
void DisableSkip();
|
||||
|
||||
// Reenable previously paused skipping. Any prefix of the input
|
||||
// that matches the skip pattern is immediately dropped.
|
||||
void EnableSkip();
|
||||
|
||||
/***** Special wrappers around SetSkip() for some common idioms *****/
|
||||
|
||||
// Arranges to skip whitespace, C comments, C++ comments.
|
||||
// The overall RE is a disjunction of the following REs:
|
||||
// \\s whitespace
|
||||
// //.*\n C++ comment
|
||||
// /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
|
||||
// We get repetition via the semantics of SetSkipExpression, not by using *
|
||||
void SkipCXXComments() {
|
||||
SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
|
||||
}
|
||||
|
||||
void set_save_comments(bool comments) {
|
||||
save_comments_ = comments;
|
||||
}
|
||||
|
||||
bool save_comments() {
|
||||
return save_comments_;
|
||||
}
|
||||
|
||||
// Append to vector ranges the comments found in the
|
||||
// byte range [start,end] (inclusive) of the input data.
|
||||
// Only comments that were extracted entirely within that
|
||||
// range are returned: no range splitting of atomically-extracted
|
||||
// comments is performed.
|
||||
void GetComments(int start, int end, std::vector<StringPiece> *ranges);
|
||||
|
||||
// Append to vector ranges the comments added
|
||||
// since the last time this was called. This
|
||||
// functionality is provided for efficiency when
|
||||
// interleaving scanning with parsing.
|
||||
void GetNextComments(std::vector<StringPiece> *ranges);
|
||||
|
||||
private:
|
||||
std::string data_; // All the input data
|
||||
StringPiece input_; // Unprocessed input
|
||||
RE* skip_; // If non-NULL, RE for skipping input
|
||||
bool should_skip_; // If true, use skip_
|
||||
bool skip_repeat_; // If true, repeat skip_ as long as it works
|
||||
bool save_comments_; // If true, aggregate the skip expression
|
||||
|
||||
// the skipped comments
|
||||
// TODO: later consider requiring that the StringPieces be added
|
||||
// in order by their start position
|
||||
std::vector<StringPiece> *comments_;
|
||||
|
||||
// the offset into comments_ that has been returned by GetNextComments
|
||||
int comments_offset_;
|
||||
|
||||
// helper function to consume *skip_ and honour
|
||||
// save_comments_
|
||||
void ConsumeSkip();
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
#endif /* _PCRE_SCANNER_H */
|
@ -1,152 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Greg J. Badros
|
||||
//
|
||||
// Unittest for scanner, especially GetNextComments and GetComments()
|
||||
// functionality.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <pcre_stringpiece.h>
|
||||
#include <pcre_scanner.h>
|
||||
|
||||
#define FLAGS_unittest_stack_size 49152
|
||||
|
||||
// Dies with a fatal error if the two values are not equal.
|
||||
#define CHECK_EQ(a, b) do { \
|
||||
if ( (a) != (b) ) { \
|
||||
fprintf(stderr, "%s:%d: Check failed because %s != %s\n", \
|
||||
__FILE__, __LINE__, #a, #b); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
using std::vector;
|
||||
using pcrecpp::StringPiece;
|
||||
using pcrecpp::Scanner;
|
||||
|
||||
static void TestScanner() {
|
||||
const char input[] = "\n"
|
||||
"alpha = 1; // this sets alpha\n"
|
||||
"bravo = 2; // bravo is set here\n"
|
||||
"gamma = 33; /* and here is gamma */\n";
|
||||
|
||||
const char *re = "(\\w+) = (\\d+);";
|
||||
|
||||
Scanner s(input);
|
||||
string var;
|
||||
int number;
|
||||
s.SkipCXXComments();
|
||||
s.set_save_comments(true);
|
||||
vector<StringPiece> comments;
|
||||
|
||||
s.Consume(re, &var, &number);
|
||||
CHECK_EQ(var, "alpha");
|
||||
CHECK_EQ(number, 1);
|
||||
CHECK_EQ(s.LineNumber(), 3);
|
||||
s.GetNextComments(&comments);
|
||||
CHECK_EQ(comments.size(), 1);
|
||||
CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
|
||||
comments.resize(0);
|
||||
|
||||
s.Consume(re, &var, &number);
|
||||
CHECK_EQ(var, "bravo");
|
||||
CHECK_EQ(number, 2);
|
||||
s.GetNextComments(&comments);
|
||||
CHECK_EQ(comments.size(), 1);
|
||||
CHECK_EQ(comments[0].as_string(), " // bravo is set here\n");
|
||||
comments.resize(0);
|
||||
|
||||
s.Consume(re, &var, &number);
|
||||
CHECK_EQ(var, "gamma");
|
||||
CHECK_EQ(number, 33);
|
||||
s.GetNextComments(&comments);
|
||||
CHECK_EQ(comments.size(), 1);
|
||||
CHECK_EQ(comments[0].as_string(), " /* and here is gamma */\n");
|
||||
comments.resize(0);
|
||||
|
||||
s.GetComments(0, sizeof(input), &comments);
|
||||
CHECK_EQ(comments.size(), 3);
|
||||
CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
|
||||
CHECK_EQ(comments[1].as_string(), " // bravo is set here\n");
|
||||
CHECK_EQ(comments[2].as_string(), " /* and here is gamma */\n");
|
||||
comments.resize(0);
|
||||
|
||||
s.GetComments(0, strchr(input, '/') - input, &comments);
|
||||
CHECK_EQ(comments.size(), 0);
|
||||
comments.resize(0);
|
||||
|
||||
s.GetComments(strchr(input, '/') - input - 1, sizeof(input),
|
||||
&comments);
|
||||
CHECK_EQ(comments.size(), 3);
|
||||
CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
|
||||
CHECK_EQ(comments[1].as_string(), " // bravo is set here\n");
|
||||
CHECK_EQ(comments[2].as_string(), " /* and here is gamma */\n");
|
||||
comments.resize(0);
|
||||
|
||||
s.GetComments(strchr(input, '/') - input - 1,
|
||||
strchr(input + 1, '\n') - input + 1, &comments);
|
||||
CHECK_EQ(comments.size(), 1);
|
||||
CHECK_EQ(comments[0].as_string(), " // this sets alpha\n");
|
||||
comments.resize(0);
|
||||
}
|
||||
|
||||
static void TestBigComment() {
|
||||
string input;
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
char buf[1024];
|
||||
snprintf(buf, sizeof(buf), " # Comment %d\n", i);
|
||||
input += buf;
|
||||
}
|
||||
input += "name = value;\n";
|
||||
|
||||
Scanner s(input.c_str());
|
||||
s.SetSkipExpression("\\s+|#.*\n");
|
||||
|
||||
string name;
|
||||
string value;
|
||||
s.Consume("(\\w+) = (\\w+);", &name, &value);
|
||||
CHECK_EQ(name, "name");
|
||||
CHECK_EQ(value, "value");
|
||||
}
|
||||
|
||||
// TODO: also test scanner and big-comment in a thread with a
|
||||
// small stack size
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
TestScanner();
|
||||
TestBigComment();
|
||||
|
||||
// Done
|
||||
printf("OK\n");
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: wilsonh@google.com (Wilson Hsieh)
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include "config.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece) {
|
||||
return (o << piece.as_string());
|
||||
}
|
@ -1,172 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
//
|
||||
// A string like object that points into another piece of memory.
|
||||
// Useful for providing an interface that allows clients to easily
|
||||
// pass in either a "const char*" or a "string".
|
||||
//
|
||||
// Arghh! I wish C++ literals were automatically of type "string".
|
||||
|
||||
#ifndef _PCRE_STRINGPIECE_H
|
||||
#define _PCRE_STRINGPIECE_H
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <iosfwd> // for ostream forward-declaration
|
||||
|
||||
#if 1
|
||||
#define HAVE_TYPE_TRAITS
|
||||
#include <type_traits.h>
|
||||
#elif 0
|
||||
#define HAVE_TYPE_TRAITS
|
||||
#include <bits/type_traits.h>
|
||||
#endif
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class StringPiece {
|
||||
private:
|
||||
const char* ptr_;
|
||||
int length_;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected.
|
||||
StringPiece()
|
||||
: ptr_(NULL), length_(0) { }
|
||||
StringPiece(const char* str)
|
||||
: ptr_(str), length_(static_cast<int>(strlen(str))) { }
|
||||
StringPiece(const string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
|
||||
StringPiece(const char* offset, int len)
|
||||
: ptr_(offset), length_(len) { }
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string. Use "as_string().c_str()" if you really need to do
|
||||
// this. Or better yet, change your routine so it does not rely on NUL
|
||||
// termination.
|
||||
const char* data() const { return ptr_; }
|
||||
int size() const { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
|
||||
void clear() { ptr_ = NULL; length_ = 0; }
|
||||
void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
|
||||
void set(const char* str) {
|
||||
ptr_ = str;
|
||||
length_ = static_cast<int>(strlen(str));
|
||||
}
|
||||
void set(const void* buffer, int len) {
|
||||
ptr_ = reinterpret_cast<const char*>(buffer);
|
||||
length_ = len;
|
||||
}
|
||||
|
||||
char operator[](int i) const { return ptr_[i]; }
|
||||
|
||||
void remove_prefix(int n) {
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
void remove_suffix(int n) {
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
bool operator==(const StringPiece& x) const {
|
||||
return ((length_ == x.length_) &&
|
||||
(memcmp(ptr_, x.ptr_, length_) == 0));
|
||||
}
|
||||
bool operator!=(const StringPiece& x) const {
|
||||
return !(*this == x);
|
||||
}
|
||||
|
||||
#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \
|
||||
bool operator cmp (const StringPiece& x) const { \
|
||||
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
|
||||
return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \
|
||||
}
|
||||
STRINGPIECE_BINARY_PREDICATE(<, <);
|
||||
STRINGPIECE_BINARY_PREDICATE(<=, <);
|
||||
STRINGPIECE_BINARY_PREDICATE(>=, >);
|
||||
STRINGPIECE_BINARY_PREDICATE(>, >);
|
||||
#undef STRINGPIECE_BINARY_PREDICATE
|
||||
|
||||
int compare(const StringPiece& x) const {
|
||||
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
|
||||
if (r == 0) {
|
||||
if (length_ < x.length_) r = -1;
|
||||
else if (length_ > x.length_) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
string as_string() const {
|
||||
return string(data(), size());
|
||||
}
|
||||
|
||||
void CopyToString(string* target) const {
|
||||
target->assign(ptr_, length_);
|
||||
}
|
||||
|
||||
// Does "this" start with "x"
|
||||
bool starts_with(const StringPiece& x) const {
|
||||
return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Functions used to create STL containers that use StringPiece
|
||||
// Remember that a StringPiece's lifetime had better be less than
|
||||
// that of the underlying string or char*. If it is not, then you
|
||||
// cannot safely store a StringPiece into an STL container
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
#ifdef HAVE_TYPE_TRAITS
|
||||
// This makes vector<StringPiece> really fast for some STL implementations
|
||||
template<> struct __type_traits<pcrecpp::StringPiece> {
|
||||
typedef __true_type has_trivial_default_constructor;
|
||||
typedef __true_type has_trivial_copy_constructor;
|
||||
typedef __true_type has_trivial_assignment_operator;
|
||||
typedef __true_type has_trivial_destructor;
|
||||
typedef __true_type is_POD_type;
|
||||
};
|
||||
#endif
|
||||
|
||||
// allow StringPiece to be logged
|
||||
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece);
|
||||
|
||||
#endif /* _PCRE_STRINGPIECE_H */
|
@ -1,145 +0,0 @@
|
||||
// Copyright 2003 and onwards Google Inc.
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#include <stdio.h>
|
||||
#include <map>
|
||||
#include <algorithm> // for make_pair
|
||||
#include <pcre_stringpiece.h>
|
||||
|
||||
// CHECK dies with a fatal error if condition is not true. It is *not*
|
||||
// controlled by NDEBUG, so the check will be executed regardless of
|
||||
// compilation mode. Therefore, it is safe to do things like:
|
||||
// CHECK(fp->Write(x) == 4)
|
||||
#define CHECK(condition) do { \
|
||||
if (!(condition)) { \
|
||||
fprintf(stderr, "%s:%d: Check failed: %s\n", \
|
||||
__FILE__, __LINE__, #condition); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
using std::map;
|
||||
using std::make_pair;
|
||||
using pcrecpp::StringPiece;
|
||||
|
||||
static void CheckSTLComparator() {
|
||||
string s1("foo");
|
||||
string s2("bar");
|
||||
string s3("baz");
|
||||
|
||||
StringPiece p1(s1);
|
||||
StringPiece p2(s2);
|
||||
StringPiece p3(s3);
|
||||
|
||||
typedef map<StringPiece, int> TestMap;
|
||||
TestMap map;
|
||||
|
||||
map.insert(make_pair(p1, 0));
|
||||
map.insert(make_pair(p2, 1));
|
||||
map.insert(make_pair(p3, 2));
|
||||
CHECK(map.size() == 3);
|
||||
|
||||
TestMap::const_iterator iter = map.begin();
|
||||
CHECK(iter->second == 1);
|
||||
++iter;
|
||||
CHECK(iter->second == 2);
|
||||
++iter;
|
||||
CHECK(iter->second == 0);
|
||||
++iter;
|
||||
CHECK(iter == map.end());
|
||||
|
||||
TestMap::iterator new_iter = map.find("zot");
|
||||
CHECK(new_iter == map.end());
|
||||
|
||||
new_iter = map.find("bar");
|
||||
CHECK(new_iter != map.end());
|
||||
|
||||
map.erase(new_iter);
|
||||
CHECK(map.size() == 2);
|
||||
|
||||
iter = map.begin();
|
||||
CHECK(iter->second == 2);
|
||||
++iter;
|
||||
CHECK(iter->second == 0);
|
||||
++iter;
|
||||
CHECK(iter == map.end());
|
||||
}
|
||||
|
||||
static void CheckComparisonOperators() {
|
||||
#define CMP_Y(op, x, y) \
|
||||
CHECK( (StringPiece((x)) op StringPiece((y)))); \
|
||||
CHECK( (StringPiece((x)).compare(StringPiece((y))) op 0))
|
||||
|
||||
#define CMP_N(op, x, y) \
|
||||
CHECK(!(StringPiece((x)) op StringPiece((y)))); \
|
||||
CHECK(!(StringPiece((x)).compare(StringPiece((y))) op 0))
|
||||
|
||||
CMP_Y(==, "", "");
|
||||
CMP_Y(==, "a", "a");
|
||||
CMP_Y(==, "aa", "aa");
|
||||
CMP_N(==, "a", "");
|
||||
CMP_N(==, "", "a");
|
||||
CMP_N(==, "a", "b");
|
||||
CMP_N(==, "a", "aa");
|
||||
CMP_N(==, "aa", "a");
|
||||
|
||||
CMP_N(!=, "", "");
|
||||
CMP_N(!=, "a", "a");
|
||||
CMP_N(!=, "aa", "aa");
|
||||
CMP_Y(!=, "a", "");
|
||||
CMP_Y(!=, "", "a");
|
||||
CMP_Y(!=, "a", "b");
|
||||
CMP_Y(!=, "a", "aa");
|
||||
CMP_Y(!=, "aa", "a");
|
||||
|
||||
CMP_Y(<, "a", "b");
|
||||
CMP_Y(<, "a", "aa");
|
||||
CMP_Y(<, "aa", "b");
|
||||
CMP_Y(<, "aa", "bb");
|
||||
CMP_N(<, "a", "a");
|
||||
CMP_N(<, "b", "a");
|
||||
CMP_N(<, "aa", "a");
|
||||
CMP_N(<, "b", "aa");
|
||||
CMP_N(<, "bb", "aa");
|
||||
|
||||
CMP_Y(<=, "a", "a");
|
||||
CMP_Y(<=, "a", "b");
|
||||
CMP_Y(<=, "a", "aa");
|
||||
CMP_Y(<=, "aa", "b");
|
||||
CMP_Y(<=, "aa", "bb");
|
||||
CMP_N(<=, "b", "a");
|
||||
CMP_N(<=, "aa", "a");
|
||||
CMP_N(<=, "b", "aa");
|
||||
CMP_N(<=, "bb", "aa");
|
||||
|
||||
CMP_N(>=, "a", "b");
|
||||
CMP_N(>=, "a", "aa");
|
||||
CMP_N(>=, "aa", "b");
|
||||
CMP_N(>=, "aa", "bb");
|
||||
CMP_Y(>=, "a", "a");
|
||||
CMP_Y(>=, "b", "a");
|
||||
CMP_Y(>=, "aa", "a");
|
||||
CMP_Y(>=, "b", "aa");
|
||||
CMP_Y(>=, "bb", "aa");
|
||||
|
||||
CMP_N(>, "a", "a");
|
||||
CMP_N(>, "a", "b");
|
||||
CMP_N(>, "a", "aa");
|
||||
CMP_N(>, "aa", "b");
|
||||
CMP_N(>, "aa", "bb");
|
||||
CMP_Y(>, "b", "a");
|
||||
CMP_Y(>, "aa", "a");
|
||||
CMP_Y(>, "b", "aa");
|
||||
CMP_Y(>, "bb", "aa");
|
||||
|
||||
#undef CMP_Y
|
||||
#undef CMP_N
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
CheckComparisonOperators();
|
||||
CheckSTLComparator();
|
||||
|
||||
printf("OK\n");
|
||||
return 0;
|
||||
}
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -394,11 +394,13 @@ do
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
@ -411,6 +413,7 @@ do
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
@ -428,6 +431,7 @@ do
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
@ -487,7 +491,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -61,6 +61,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
@ -194,4 +196,6 @@ const ucp_type_table _pcre_utt[] = {
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -45,12 +45,12 @@ properties. */
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinternal.h" /* Internal table details */
|
||||
#include "ucptable.c" /* The table itself */
|
||||
#include "ucptable.h" /* The table itself */
|
||||
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
static int ucp_gentype[] = {
|
||||
static const int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -66,6 +66,7 @@ Returns: < 0 if the string is a valid UTF-8 string
|
||||
int
|
||||
_pcre_valid_utf8(const uschar *string, int length)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register const uschar *p;
|
||||
|
||||
if (length < 0)
|
||||
@ -123,6 +124,7 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -75,7 +75,7 @@ I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_DATA_SCOPE const char *
|
||||
PCRE_EXP_DEFN const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -1,857 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "config.h"
|
||||
// We need this to compile the proper dll on windows/msys. This is copied
|
||||
// from pcre_internal.h. It would probably be better just to include that.
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
#include "pcre.h"
|
||||
#include "pcre_stringpiece.h"
|
||||
#include "pcrecpp.h"
|
||||
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
// Maximum number of args we can set
|
||||
static const int kMaxArgs = 16;
|
||||
static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
|
||||
|
||||
// Special object that stands-in for no argument
|
||||
Arg no_arg((void*)NULL);
|
||||
|
||||
// If a regular expression has no error, its error_ field points here
|
||||
static const string empty_string;
|
||||
|
||||
// If the user doesn't ask for any options, we just use this one
|
||||
static RE_Options default_options;
|
||||
|
||||
void RE::Init(const string& pat, const RE_Options* options) {
|
||||
pattern_ = pat;
|
||||
if (options == NULL) {
|
||||
options_ = default_options;
|
||||
} else {
|
||||
options_ = *options;
|
||||
}
|
||||
error_ = &empty_string;
|
||||
re_full_ = NULL;
|
||||
re_partial_ = NULL;
|
||||
|
||||
re_partial_ = Compile(UNANCHORED);
|
||||
if (re_partial_ != NULL) {
|
||||
// Check for complicated patterns. The following change is
|
||||
// conservative in that it may treat some "simple" patterns
|
||||
// as "complex" (e.g., if the vertical bar is in a character
|
||||
// class or is escaped). But it seems good enough.
|
||||
if (strchr(pat.c_str(), '|') == NULL) {
|
||||
// Simple pattern: we can use position-based checks to perform
|
||||
// fully anchored matches
|
||||
re_full_ = re_partial_;
|
||||
} else {
|
||||
// We need a special pattern for anchored matches
|
||||
re_full_ = Compile(ANCHOR_BOTH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RE::Cleanup() {
|
||||
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
|
||||
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
|
||||
if (error_ != &empty_string) delete error_;
|
||||
}
|
||||
|
||||
|
||||
RE::~RE() {
|
||||
Cleanup();
|
||||
}
|
||||
|
||||
|
||||
pcre* RE::Compile(Anchor anchor) {
|
||||
// First, convert RE_Options into pcre options
|
||||
int pcre_options = 0;
|
||||
pcre_options = options_.all_options();
|
||||
|
||||
// Special treatment for anchoring. This is needed because at
|
||||
// runtime pcre only provides an option for anchoring at the
|
||||
// beginning of a string (unless you use offset).
|
||||
//
|
||||
// There are three types of anchoring we want:
|
||||
// UNANCHORED Compile the original pattern, and use
|
||||
// a pcre unanchored match.
|
||||
// ANCHOR_START Compile the original pattern, and use
|
||||
// a pcre anchored match.
|
||||
// ANCHOR_BOTH Tack a "\z" to the end of the original pattern
|
||||
// and use a pcre anchored match.
|
||||
|
||||
const char* compile_error;
|
||||
int eoffset;
|
||||
pcre* re;
|
||||
if (anchor != ANCHOR_BOTH) {
|
||||
re = pcre_compile(pattern_.c_str(), pcre_options,
|
||||
&compile_error, &eoffset, NULL);
|
||||
} else {
|
||||
// Tack a '\z' at the end of RE. Parenthesize it first so that
|
||||
// the '\z' applies to all top-level alternatives in the regexp.
|
||||
string wrapped = "(?:"; // A non-counting grouping operator
|
||||
wrapped += pattern_;
|
||||
wrapped += ")\\z";
|
||||
re = pcre_compile(wrapped.c_str(), pcre_options,
|
||||
&compile_error, &eoffset, NULL);
|
||||
}
|
||||
if (re == NULL) {
|
||||
if (error_ == &empty_string) error_ = new string(compile_error);
|
||||
}
|
||||
return re;
|
||||
}
|
||||
|
||||
/***** Matching interfaces *****/
|
||||
|
||||
bool RE::FullMatch(const StringPiece& text,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
|
||||
}
|
||||
|
||||
bool RE::PartialMatch(const StringPiece& text,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
|
||||
}
|
||||
|
||||
bool RE::Consume(StringPiece* input,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
if (DoMatchImpl(*input, ANCHOR_START, &consumed,
|
||||
args, n, vec, kVecSize)) {
|
||||
input->remove_prefix(consumed);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool RE::FindAndConsume(StringPiece* input,
|
||||
const Arg& ptr1,
|
||||
const Arg& ptr2,
|
||||
const Arg& ptr3,
|
||||
const Arg& ptr4,
|
||||
const Arg& ptr5,
|
||||
const Arg& ptr6,
|
||||
const Arg& ptr7,
|
||||
const Arg& ptr8,
|
||||
const Arg& ptr9,
|
||||
const Arg& ptr10,
|
||||
const Arg& ptr11,
|
||||
const Arg& ptr12,
|
||||
const Arg& ptr13,
|
||||
const Arg& ptr14,
|
||||
const Arg& ptr15,
|
||||
const Arg& ptr16) const {
|
||||
const Arg* args[kMaxArgs];
|
||||
int n = 0;
|
||||
if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
|
||||
if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
|
||||
if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
|
||||
if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
|
||||
if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
|
||||
if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
|
||||
if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
|
||||
if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
|
||||
if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
|
||||
if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
|
||||
if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
|
||||
if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
|
||||
if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
|
||||
if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
|
||||
if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
|
||||
if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
|
||||
done:
|
||||
|
||||
int consumed;
|
||||
int vec[kVecSize];
|
||||
if (DoMatchImpl(*input, UNANCHORED, &consumed,
|
||||
args, n, vec, kVecSize)) {
|
||||
input->remove_prefix(consumed);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool RE::Replace(const StringPiece& rewrite,
|
||||
string *str) const {
|
||||
int vec[kVecSize];
|
||||
int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
|
||||
if (matches == 0)
|
||||
return false;
|
||||
|
||||
string s;
|
||||
if (!Rewrite(&s, rewrite, *str, vec, matches))
|
||||
return false;
|
||||
|
||||
assert(vec[0] >= 0);
|
||||
assert(vec[1] >= 0);
|
||||
str->replace(vec[0], vec[1] - vec[0], s);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
|
||||
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
|
||||
static int NewlineMode(int pcre_options) {
|
||||
// TODO: if we can make it threadsafe, cache this var
|
||||
int newline_mode = 0;
|
||||
/* if (newline_mode) return newline_mode; */ // do this once it's cached
|
||||
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
|
||||
newline_mode = (pcre_options &
|
||||
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
|
||||
} else {
|
||||
int newline;
|
||||
pcre_config(PCRE_CONFIG_NEWLINE, &newline);
|
||||
if (newline == 10)
|
||||
newline_mode = PCRE_NEWLINE_LF;
|
||||
else if (newline == 13)
|
||||
newline_mode = PCRE_NEWLINE_CR;
|
||||
else if (newline == 3338)
|
||||
newline_mode = PCRE_NEWLINE_CRLF;
|
||||
else
|
||||
assert("" == "Unexpected return value from pcre_config(NEWLINE)");
|
||||
}
|
||||
return newline_mode;
|
||||
}
|
||||
|
||||
int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
string *str) const {
|
||||
int count = 0;
|
||||
int vec[kVecSize];
|
||||
string out;
|
||||
int start = 0;
|
||||
int lastend = -1;
|
||||
|
||||
for (; start <= static_cast<int>(str->length()); count++) {
|
||||
int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
|
||||
if (matches <= 0)
|
||||
break;
|
||||
int matchstart = vec[0], matchend = vec[1];
|
||||
assert(matchstart >= start);
|
||||
assert(matchend >= matchstart);
|
||||
if (matchstart == matchend && matchstart == lastend) {
|
||||
// advance one character if we matched an empty string at the same
|
||||
// place as the last match occurred
|
||||
matchend = start + 1;
|
||||
// If the current char is CR and we're in CRLF mode, skip LF too.
|
||||
// Note it's better to call pcre_fullinfo() than to examine
|
||||
// all_options(), since options_ could have changed bewteen
|
||||
// compile-time and now, but this is simpler and safe enough.
|
||||
if (start+1 < static_cast<int>(str->length()) &&
|
||||
(*str)[start] == '\r' && (*str)[start+1] == '\n' &&
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
|
||||
matchend++;
|
||||
}
|
||||
// We also need to advance more than one char if we're in utf8 mode.
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (options_.utf8()) {
|
||||
while (matchend < static_cast<int>(str->length()) &&
|
||||
((*str)[matchend] & 0xc0) == 0x80)
|
||||
matchend++;
|
||||
}
|
||||
#endif
|
||||
if (matchend <= static_cast<int>(str->length()))
|
||||
out.append(*str, start, matchend - start);
|
||||
start = matchend;
|
||||
} else {
|
||||
out.append(*str, start, matchstart - start);
|
||||
Rewrite(&out, rewrite, *str, vec, matches);
|
||||
start = matchend;
|
||||
lastend = matchend;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
return 0;
|
||||
|
||||
if (start < static_cast<int>(str->length()))
|
||||
out.append(*str, start, str->length() - start);
|
||||
swap(out, *str);
|
||||
return count;
|
||||
}
|
||||
|
||||
bool RE::Extract(const StringPiece& rewrite,
|
||||
const StringPiece& text,
|
||||
string *out) const {
|
||||
int vec[kVecSize];
|
||||
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
|
||||
if (matches == 0)
|
||||
return false;
|
||||
out->erase();
|
||||
return Rewrite(out, rewrite, text, vec, matches);
|
||||
}
|
||||
|
||||
/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
|
||||
string result;
|
||||
|
||||
// Escape any ascii character not in [A-Za-z_0-9].
|
||||
//
|
||||
// Note that it's legal to escape a character even if it has no
|
||||
// special meaning in a regular expression -- so this function does
|
||||
// that. (This also makes it identical to the perl function of the
|
||||
// same name; see `perldoc -f quotemeta`.)
|
||||
for (int ii = 0; ii < unquoted.size(); ++ii) {
|
||||
// Note that using 'isalnum' here raises the benchmark time from
|
||||
// 32ns to 58ns:
|
||||
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
|
||||
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
|
||||
(unquoted[ii] < '0' || unquoted[ii] > '9') &&
|
||||
unquoted[ii] != '_' &&
|
||||
// If this is the part of a UTF8 or Latin1 character, we need
|
||||
// to copy this byte without escaping. Experimentally this is
|
||||
// what works correctly with the regexp library.
|
||||
!(unquoted[ii] & 128)) {
|
||||
result += '\\';
|
||||
}
|
||||
result += unquoted[ii];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/***** Actual matching and rewriting code *****/
|
||||
|
||||
int RE::TryMatch(const StringPiece& text,
|
||||
int startpos,
|
||||
Anchor anchor,
|
||||
int *vec,
|
||||
int vecsize) const {
|
||||
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
|
||||
if (re == NULL) {
|
||||
//fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
pcre_extra extra = { 0 };
|
||||
if (options_.match_limit() > 0) {
|
||||
extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
|
||||
extra.match_limit = options_.match_limit();
|
||||
}
|
||||
if (options_.match_limit_recursion() > 0) {
|
||||
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra.match_limit_recursion = options_.match_limit_recursion();
|
||||
}
|
||||
int rc = pcre_exec(re, // The regular expression object
|
||||
&extra,
|
||||
(text.data() == NULL) ? "" : text.data(),
|
||||
text.size(),
|
||||
startpos,
|
||||
(anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
|
||||
vec,
|
||||
vecsize);
|
||||
|
||||
// Handle errors
|
||||
if (rc == PCRE_ERROR_NOMATCH) {
|
||||
return 0;
|
||||
} else if (rc < 0) {
|
||||
//fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
|
||||
// re, pattern_.c_str());
|
||||
return 0;
|
||||
} else if (rc == 0) {
|
||||
// pcre_exec() returns 0 as a special case when the number of
|
||||
// capturing subpatterns exceeds the size of the vector.
|
||||
// When this happens, there is a match and the output vector
|
||||
// is filled, but we miss out on the positions of the extra subpatterns.
|
||||
rc = vecsize / 2;
|
||||
}
|
||||
|
||||
if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {
|
||||
// We need an extra check to make sure that the match extended
|
||||
// to the end of the input string
|
||||
assert(vec[0] == 0); // PCRE_ANCHORED forces starting match
|
||||
if (vec[1] != text.size()) return 0; // Did not get ending match
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool RE::DoMatchImpl(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const* args,
|
||||
int n,
|
||||
int* vec,
|
||||
int vecsize) const {
|
||||
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
|
||||
int matches = TryMatch(text, 0, anchor, vec, vecsize);
|
||||
assert(matches >= 0); // TryMatch never returns negatives
|
||||
if (matches == 0)
|
||||
return false;
|
||||
|
||||
*consumed = vec[1];
|
||||
|
||||
if (n == 0 || args == NULL) {
|
||||
// We are not interested in results
|
||||
return true;
|
||||
}
|
||||
|
||||
if (NumberOfCapturingGroups() < n) {
|
||||
// RE has fewer capturing groups than number of arg pointers passed in
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we got here, we must have matched the whole pattern.
|
||||
// We do not need (can not do) any more checks on the value of 'matches' here
|
||||
// -- see the comment for TryMatch.
|
||||
for (int i = 0; i < n; i++) {
|
||||
const int start = vec[2*(i+1)];
|
||||
const int limit = vec[2*(i+1)+1];
|
||||
if (!args[i]->Parse(text.data() + start, limit-start)) {
|
||||
// TODO: Should we indicate what the error was?
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RE::DoMatch(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const args[],
|
||||
int n) const {
|
||||
assert(n >= 0);
|
||||
size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
|
||||
// (as for kVecSize)
|
||||
int space[21]; // use stack allocation for small vecsize (common case)
|
||||
int* vec = vecsize <= 21 ? space : new int[vecsize];
|
||||
bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
|
||||
if (vec != space) delete [] vec;
|
||||
return retval;
|
||||
}
|
||||
|
||||
bool RE::Rewrite(string *out, const StringPiece &rewrite,
|
||||
const StringPiece &text, int *vec, int veclen) const {
|
||||
for (const char *s = rewrite.data(), *end = s + rewrite.size();
|
||||
s < end; s++) {
|
||||
int c = *s;
|
||||
if (c == '\\') {
|
||||
c = *++s;
|
||||
if (isdigit(c)) {
|
||||
int n = (c - '0');
|
||||
if (n >= veclen) {
|
||||
//fprintf(stderr, requested group %d in regexp %.*s\n",
|
||||
// n, rewrite.size(), rewrite.data());
|
||||
return false;
|
||||
}
|
||||
int start = vec[2 * n];
|
||||
if (start >= 0)
|
||||
out->append(text.data() + start, vec[2 * n + 1] - start);
|
||||
} else if (c == '\\') {
|
||||
out->push_back('\\');
|
||||
} else {
|
||||
//fprintf(stderr, "invalid rewrite pattern: %.*s\n",
|
||||
// rewrite.size(), rewrite.data());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
out->push_back(c);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Return the number of capturing subpatterns, or -1 if the
|
||||
// regexp wasn't valid on construction.
|
||||
int RE::NumberOfCapturingGroups() const {
|
||||
if (re_partial_ == NULL) return -1;
|
||||
|
||||
int result;
|
||||
int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
|
||||
NULL, // We did not study the pattern
|
||||
PCRE_INFO_CAPTURECOUNT,
|
||||
&result);
|
||||
assert(pcre_retval == 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/***** Parsers for various types *****/
|
||||
|
||||
bool Arg::parse_null(const char* str, int n, void* dest) {
|
||||
// We fail if somebody asked us to store into a non-NULL void* pointer
|
||||
return (dest == NULL);
|
||||
}
|
||||
|
||||
bool Arg::parse_string(const char* str, int n, void* dest) {
|
||||
reinterpret_cast<string*>(dest)->assign(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
|
||||
reinterpret_cast<StringPiece*>(dest)->set(str, n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_char(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
*(reinterpret_cast<char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_uchar(const char* str, int n, void* dest) {
|
||||
if (n != 1) return false;
|
||||
*(reinterpret_cast<unsigned char*>(dest)) = str[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
// Largest number spec that we are willing to parse
|
||||
static const int kMaxNumberLength = 32;
|
||||
|
||||
// REQUIRES "buf" must have length at least kMaxNumberLength+1
|
||||
// REQUIRES "n > 0"
|
||||
// Copies "str" into "buf" and null-terminates if necessary.
|
||||
// Returns one of:
|
||||
// a. "str" if no termination is needed
|
||||
// b. "buf" if the string was copied and null-terminated
|
||||
// c. "" if the input was invalid and has no hope of being parsed
|
||||
static const char* TerminateNumber(char* buf, const char* str, int n) {
|
||||
if ((n > 0) && isspace(*str)) {
|
||||
// We are less forgiving than the strtoxxx() routines and do not
|
||||
// allow leading spaces.
|
||||
return "";
|
||||
}
|
||||
|
||||
// See if the character right after the input text may potentially
|
||||
// look like a digit.
|
||||
if (isdigit(str[n]) ||
|
||||
((str[n] >= 'a') && (str[n] <= 'f')) ||
|
||||
((str[n] >= 'A') && (str[n] <= 'F'))) {
|
||||
if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
|
||||
memcpy(buf, str, n);
|
||||
buf[n] = '\0';
|
||||
return buf;
|
||||
} else {
|
||||
// We can parse right out of the supplied string, so return it.
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
||||
bool Arg::parse_long_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
char* end;
|
||||
errno = 0;
|
||||
long r = strtol(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
*(reinterpret_cast<long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_ulong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
if (str[0] == '-') return false; // strtoul() on a negative number?!
|
||||
char* end;
|
||||
errno = 0;
|
||||
unsigned long r = strtoul(str, &end, radix);
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
*(reinterpret_cast<unsigned long*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_short_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<short*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_ushort_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > USHRT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<unsigned short*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_int_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
long r;
|
||||
if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r < INT_MIN || r > INT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_uint_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
unsigned long r;
|
||||
if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
|
||||
if (r > UINT_MAX) return false; // Out of range
|
||||
*(reinterpret_cast<unsigned int*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_longlong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
#ifndef HAVE_LONG_LONG
|
||||
return false;
|
||||
#else
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
char* end;
|
||||
errno = 0;
|
||||
#if defined HAVE_STRTOQ
|
||||
long long r = strtoq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
long long r = strtoll(str, &end, radix);
|
||||
#else
|
||||
#error parse_longlong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
*(reinterpret_cast<long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_LONG_LONG */
|
||||
}
|
||||
|
||||
bool Arg::parse_ulonglong_radix(const char* str,
|
||||
int n,
|
||||
void* dest,
|
||||
int radix) {
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
return false;
|
||||
#else
|
||||
if (n == 0) return false;
|
||||
char buf[kMaxNumberLength+1];
|
||||
str = TerminateNumber(buf, str, n);
|
||||
if (str[0] == '-') return false; // strtoull() on a negative number?!
|
||||
char* end;
|
||||
errno = 0;
|
||||
#if defined HAVE_STRTOQ
|
||||
unsigned long long r = strtouq(str, &end, radix);
|
||||
#elif defined HAVE_STRTOLL
|
||||
unsigned long long r = strtoull(str, &end, radix);
|
||||
#else
|
||||
#error parse_ulonglong_radix: cannot convert input to a long-long
|
||||
#endif
|
||||
if (end != str + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
*(reinterpret_cast<unsigned long long*>(dest)) = r;
|
||||
return true;
|
||||
#endif /* HAVE_UNSIGNED_LONG_LONG */
|
||||
}
|
||||
|
||||
bool Arg::parse_double(const char* str, int n, void* dest) {
|
||||
if (n == 0) return false;
|
||||
static const int kMaxLength = 200;
|
||||
char buf[kMaxLength];
|
||||
if (n >= kMaxLength) return false;
|
||||
memcpy(buf, str, n);
|
||||
buf[n] = '\0';
|
||||
errno = 0;
|
||||
char* end;
|
||||
double r = strtod(buf, &end);
|
||||
if (end != buf + n) return false; // Leftover junk
|
||||
if (errno) return false;
|
||||
*(reinterpret_cast<double*>(dest)) = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Arg::parse_float(const char* str, int n, void* dest) {
|
||||
double r;
|
||||
if (!parse_double(str, n, &r)) return false;
|
||||
*(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#define DEFINE_INTEGER_PARSERS(name) \
|
||||
bool Arg::parse_##name(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 10); \
|
||||
} \
|
||||
bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 16); \
|
||||
} \
|
||||
bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 8); \
|
||||
} \
|
||||
bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
|
||||
return parse_##name##_radix(str, n, dest, 0); \
|
||||
}
|
||||
|
||||
DEFINE_INTEGER_PARSERS(short) /* */
|
||||
DEFINE_INTEGER_PARSERS(ushort) /* */
|
||||
DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
|
||||
DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
|
||||
DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
|
||||
DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
|
||||
DEFINE_INTEGER_PARSERS(longlong) /* */
|
||||
DEFINE_INTEGER_PARSERS(ulonglong) /* */
|
||||
|
||||
#undef DEFINE_INTEGER_PARSERS
|
||||
|
||||
} // namespace pcrecpp
|
@ -1,695 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
|
||||
|
||||
#ifndef _PCRECPP_H
|
||||
#define _PCRECPP_H
|
||||
|
||||
// C++ interface to the pcre regular-expression library. RE supports
|
||||
// Perl-style regular expressions (with extensions like \d, \w, \s,
|
||||
// ...).
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// REGEXP SYNTAX:
|
||||
//
|
||||
// This module is part of the pcre library and hence supports its syntax
|
||||
// for regular expressions.
|
||||
//
|
||||
// The syntax is pretty similar to Perl's. For those not familiar
|
||||
// with Perl's regular expressions, here are some examples of the most
|
||||
// commonly used extensions:
|
||||
//
|
||||
// "hello (\\w+) world" -- \w matches a "word" character
|
||||
// "version (\\d+)" -- \d matches a digit
|
||||
// "hello\\s+world" -- \s matches any whitespace character
|
||||
// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
|
||||
// "(?i)hello" -- (?i) turns on case-insensitive matching
|
||||
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING INTERFACE:
|
||||
//
|
||||
// The "FullMatch" operation checks that supplied text matches a
|
||||
// supplied pattern exactly.
|
||||
//
|
||||
// Example: successful match
|
||||
// pcrecpp::RE re("h.*o");
|
||||
// re.FullMatch("hello");
|
||||
//
|
||||
// Example: unsuccessful match (requires full match):
|
||||
// pcrecpp::RE re("e");
|
||||
// !re.FullMatch("hello");
|
||||
//
|
||||
// Example: creating a temporary RE object:
|
||||
// pcrecpp::RE("h.*o").FullMatch("hello");
|
||||
//
|
||||
// You can pass in a "const char*" or a "string" for "text". The
|
||||
// examples below tend to use a const char*.
|
||||
//
|
||||
// You can, as in the different examples above, store the RE object
|
||||
// explicitly in a variable or use a temporary RE object. The
|
||||
// examples below use one mode or the other arbitrarily. Either
|
||||
// could correctly be used for any of these examples.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// MATCHING WITH SUB-STRING EXTRACTION:
|
||||
//
|
||||
// You can supply extra pointer arguments to extract matched subpieces.
|
||||
//
|
||||
// Example: extracts "ruby" into "s" and 1234 into "i"
|
||||
// int i;
|
||||
// string s;
|
||||
// pcrecpp::RE re("(\\w+):(\\d+)");
|
||||
// re.FullMatch("ruby:1234", &s, &i);
|
||||
//
|
||||
// Example: does not try to extract any extra sub-patterns
|
||||
// re.FullMatch("ruby:1234", &s);
|
||||
//
|
||||
// Example: does not try to extract into NULL
|
||||
// re.FullMatch("ruby:1234", NULL, &i);
|
||||
//
|
||||
// Example: integer overflow causes failure
|
||||
// !re.FullMatch("ruby:1234567891234", NULL, &i);
|
||||
//
|
||||
// Example: fails because there aren't enough sub-patterns:
|
||||
// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
|
||||
//
|
||||
// Example: fails because string cannot be stored in integer
|
||||
// !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
|
||||
//
|
||||
// The provided pointer arguments can be pointers to any scalar numeric
|
||||
// type, or one of
|
||||
// string (matched piece is copied to string)
|
||||
// StringPiece (StringPiece is mutated to point to matched piece)
|
||||
// T (where "bool T::ParseFrom(const char*, int)" exists)
|
||||
// NULL (the corresponding matched sub-pattern is not copied)
|
||||
//
|
||||
// CAVEAT: An optional sub-pattern that does not exist in the matched
|
||||
// string is assigned the empty string. Therefore, the following will
|
||||
// return false (because the empty string is not a valid number):
|
||||
// int number;
|
||||
// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// DO_MATCH
|
||||
//
|
||||
// The matching interface supports at most 16 arguments per call.
|
||||
// If you need more, consider using the more general interface
|
||||
// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PARTIAL MATCHES
|
||||
//
|
||||
// You can use the "PartialMatch" operation when you want the pattern
|
||||
// to match any substring of the text.
|
||||
//
|
||||
// Example: simple search for a string:
|
||||
// pcrecpp::RE("ell").PartialMatch("hello");
|
||||
//
|
||||
// Example: find first number in a string:
|
||||
// int number;
|
||||
// pcrecpp::RE re("(\\d+)");
|
||||
// re.PartialMatch("x*100 + 20", &number);
|
||||
// assert(number == 100);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// UTF-8 AND THE MATCHING INTERFACE:
|
||||
//
|
||||
// By default, pattern and text are plain text, one byte per character.
|
||||
// The UTF8 flag, passed to the constructor, causes both pattern
|
||||
// and string to be treated as UTF-8 text, still a byte stream but
|
||||
// potentially multiple bytes per character. In practice, the text
|
||||
// is likelier to be UTF-8 than the pattern, but the match returned
|
||||
// may depend on the UTF8 flag, so always use it when matching
|
||||
// UTF8 text. E.g., "." will match one byte normally but with UTF8
|
||||
// set may match up to three bytes of a multi-byte character.
|
||||
//
|
||||
// Example:
|
||||
// pcrecpp::RE_Options options;
|
||||
// options.set_utf8();
|
||||
// pcrecpp::RE re(utf8_pattern, options);
|
||||
// re.FullMatch(utf8_string);
|
||||
//
|
||||
// Example: using the convenience function UTF8():
|
||||
// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
|
||||
// re.FullMatch(utf8_string);
|
||||
//
|
||||
// NOTE: The UTF8 option is ignored if pcre was not configured with the
|
||||
// --enable-utf8 flag.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
|
||||
//
|
||||
// PCRE defines some modifiers to change the behavior of the regular
|
||||
// expression engine.
|
||||
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
|
||||
// to pass such modifiers to a RE class.
|
||||
//
|
||||
// Currently, the following modifiers are supported
|
||||
//
|
||||
// modifier description Perl corresponding
|
||||
//
|
||||
// PCRE_CASELESS case insensitive match /i
|
||||
// PCRE_MULTILINE multiple lines match /m
|
||||
// PCRE_DOTALL dot matches newlines /s
|
||||
// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
|
||||
// PCRE_EXTRA strict escape parsing N/A
|
||||
// PCRE_EXTENDED ignore whitespaces /x
|
||||
// PCRE_UTF8 handles UTF8 chars built-in
|
||||
// PCRE_UNGREEDY reverses * and *? N/A
|
||||
// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
|
||||
//
|
||||
// (For a full account on how each modifier works, please check the
|
||||
// PCRE API reference manual).
|
||||
//
|
||||
// (*) Both Perl and PCRE allow non matching parentheses by means of the
|
||||
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
|
||||
// capture, while (ab|cd) does.
|
||||
//
|
||||
// For each modifier, there are two member functions whose name is made
|
||||
// out of the modifier in lowercase, without the "PCRE_" prefix. For
|
||||
// instance, PCRE_CASELESS is handled by
|
||||
// bool caseless(),
|
||||
// which returns true if the modifier is set, and
|
||||
// RE_Options & set_caseless(bool),
|
||||
// which sets or unsets the modifier.
|
||||
//
|
||||
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
|
||||
// set_match_limit() and match_limit() member functions.
|
||||
// Setting match_limit to a non-zero value will limit the executation of
|
||||
// pcre to keep it from doing bad things like blowing the stack or taking
|
||||
// an eternity to return a result. A value of 5000 is good enough to stop
|
||||
// stack blowup in a 2MB thread stack. Setting match_limit to zero will
|
||||
// disable match limiting. Alternately, you can set match_limit_recursion()
|
||||
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
|
||||
// recurses. match_limit() caps the number of matches pcre does;
|
||||
// match_limit_recrusion() caps the depth of recursion.
|
||||
//
|
||||
// Normally, to pass one or more modifiers to a RE class, you declare
|
||||
// a RE_Options object, set the appropriate options, and pass this
|
||||
// object to a RE constructor. Example:
|
||||
//
|
||||
// RE_options opt;
|
||||
// opt.set_caseless(true);
|
||||
//
|
||||
// if (RE("HELLO", opt).PartialMatch("hello world")) ...
|
||||
//
|
||||
// RE_options has two constructors. The default constructor takes no
|
||||
// arguments and creates a set of flags that are off by default.
|
||||
//
|
||||
// The optional parameter 'option_flags' is to facilitate transfer
|
||||
// of legacy code from C programs. This lets you do
|
||||
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
|
||||
//
|
||||
// But new code is better off doing
|
||||
// RE(pattern,
|
||||
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
|
||||
// (See below)
|
||||
//
|
||||
// If you are going to pass one of the most used modifiers, there are some
|
||||
// convenience functions that return a RE_Options class with the
|
||||
// appropriate modifier already set:
|
||||
// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
|
||||
//
|
||||
// If you need to set several options at once, and you don't want to go
|
||||
// through the pains of declaring a RE_Options object and setting several
|
||||
// options, there is a parallel method that give you such ability on the
|
||||
// fly. You can concatenate several set_xxxxx member functions, since each
|
||||
// of them returns a reference to its class object. e.g.: to pass
|
||||
// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
|
||||
// statement, you may write
|
||||
//
|
||||
// RE(" ^ xyz \\s+ .* blah$", RE_Options()
|
||||
// .set_caseless(true)
|
||||
// .set_extended(true)
|
||||
// .set_multiline(true)).PartialMatch(sometext);
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// SCANNING TEXT INCREMENTALLY
|
||||
//
|
||||
// The "Consume" operation may be useful if you want to repeatedly
|
||||
// match regular expressions at the front of a string and skip over
|
||||
// them as they match. This requires use of the "StringPiece" type,
|
||||
// which represents a sub-range of a real string. Like RE, StringPiece
|
||||
// is defined in the pcrecpp namespace.
|
||||
//
|
||||
// Example: read lines of the form "var = value" from a string.
|
||||
// string contents = ...; // Fill string somehow
|
||||
// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
|
||||
//
|
||||
// string var;
|
||||
// int value;
|
||||
// pcrecpp::RE re("(\\w+) = (\\d+)\n");
|
||||
// while (re.Consume(&input, &var, &value)) {
|
||||
// ...;
|
||||
// }
|
||||
//
|
||||
// Each successful call to "Consume" will set "var/value", and also
|
||||
// advance "input" so it points past the matched text.
|
||||
//
|
||||
// The "FindAndConsume" operation is similar to "Consume" but does not
|
||||
// anchor your match at the beginning of the string. For example, you
|
||||
// could extract all words from a string by repeatedly calling
|
||||
// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// PARSING HEX/OCTAL/C-RADIX NUMBERS
|
||||
//
|
||||
// By default, if you pass a pointer to a numeric value, the
|
||||
// corresponding text is interpreted as a base-10 number. You can
|
||||
// instead wrap the pointer with a call to one of the operators Hex(),
|
||||
// Octal(), or CRadix() to interpret the text in another base. The
|
||||
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
|
||||
// prefixes, but defaults to base-10.
|
||||
//
|
||||
// Example:
|
||||
// int a, b, c, d;
|
||||
// pcrecpp::RE re("(.*) (.*) (.*) (.*)");
|
||||
// re.FullMatch("100 40 0100 0x40",
|
||||
// pcrecpp::Octal(&a), pcrecpp::Hex(&b),
|
||||
// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
|
||||
// will leave 64 in a, b, c, and d.
|
||||
//
|
||||
// -----------------------------------------------------------------------
|
||||
// REPLACING PARTS OF STRINGS
|
||||
//
|
||||
// You can replace the first match of "pattern" in "str" with
|
||||
// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9)
|
||||
// can be used to insert text matching corresponding parenthesized
|
||||
// group from the pattern. \0 in "rewrite" refers to the entire
|
||||
// matching text. E.g.,
|
||||
//
|
||||
// string s = "yabba dabba doo";
|
||||
// pcrecpp::RE("b+").Replace("d", &s);
|
||||
//
|
||||
// will leave "s" containing "yada dabba doo". The result is true if
|
||||
// the pattern matches and a replacement occurs, or false otherwise.
|
||||
//
|
||||
// GlobalReplace() is like Replace(), except that it replaces all
|
||||
// occurrences of the pattern in the string with the rewrite.
|
||||
// Replacements are not subject to re-matching. E.g.,
|
||||
//
|
||||
// string s = "yabba dabba doo";
|
||||
// pcrecpp::RE("b+").GlobalReplace("d", &s);
|
||||
//
|
||||
// will leave "s" containing "yada dada doo". It returns the number
|
||||
// of replacements made.
|
||||
//
|
||||
// Extract() is like Replace(), except that if the pattern matches,
|
||||
// "rewrite" is copied into "out" (an additional argument) with
|
||||
// substitutions. The non-matching portions of "text" are ignored.
|
||||
// Returns true iff a match occurred and the extraction happened
|
||||
// successfully. If no match occurs, the string is left unaffected.
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <pcrecpparg.h> // defines the Arg class
|
||||
// These aren't technically needed here, but we include them
|
||||
// anyway so folks who include pcrecpp.h don't have to include
|
||||
// all these other header files as well.
|
||||
#include <pcre.h>
|
||||
#include <pcre_stringpiece.h>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
#define PCRE_SET_OR_CLEAR(b, o) \
|
||||
if (b) all_options_ |= (o); else all_options_ &= ~(o); \
|
||||
return *this
|
||||
|
||||
#define PCRE_IS_SET(o) \
|
||||
(all_options_ & o) == o
|
||||
|
||||
// We convert user-passed pointers into special Arg objects
|
||||
extern Arg no_arg;
|
||||
|
||||
/***** Compiling regular expressions: the RE class *****/
|
||||
|
||||
// RE_Options allow you to set options to be passed along to pcre,
|
||||
// along with other options we put on top of pcre.
|
||||
// Only 9 modifiers, plus match_limit and match_limit_recursion,
|
||||
// are supported now.
|
||||
class RE_Options {
|
||||
public:
|
||||
// constructor
|
||||
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
|
||||
|
||||
// alternative constructor.
|
||||
// To facilitate transfer of legacy code from C programs
|
||||
//
|
||||
// This lets you do
|
||||
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
|
||||
// But new code is better off doing
|
||||
// RE(pattern,
|
||||
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
|
||||
RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
|
||||
all_options_(option_flags) {}
|
||||
// we're fine with the default destructor, copy constructor, etc.
|
||||
|
||||
// accessors and mutators
|
||||
int match_limit() const { return match_limit_; };
|
||||
RE_Options &set_match_limit(int limit) {
|
||||
match_limit_ = limit;
|
||||
return *this;
|
||||
}
|
||||
|
||||
int match_limit_recursion() const { return match_limit_recursion_; };
|
||||
RE_Options &set_match_limit_recursion(int limit) {
|
||||
match_limit_recursion_ = limit;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool caseless() const {
|
||||
return PCRE_IS_SET(PCRE_CASELESS);
|
||||
}
|
||||
RE_Options &set_caseless(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
|
||||
}
|
||||
|
||||
bool multiline() const {
|
||||
return PCRE_IS_SET(PCRE_MULTILINE);
|
||||
}
|
||||
RE_Options &set_multiline(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
|
||||
}
|
||||
|
||||
bool dotall() const {
|
||||
return PCRE_IS_SET(PCRE_DOTALL);
|
||||
}
|
||||
RE_Options &set_dotall(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x,PCRE_DOTALL);
|
||||
}
|
||||
|
||||
bool extended() const {
|
||||
return PCRE_IS_SET(PCRE_EXTENDED);
|
||||
}
|
||||
RE_Options &set_extended(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x,PCRE_EXTENDED);
|
||||
}
|
||||
|
||||
bool dollar_endonly() const {
|
||||
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
|
||||
}
|
||||
RE_Options &set_dollar_endonly(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x,PCRE_DOLLAR_ENDONLY);
|
||||
}
|
||||
|
||||
bool extra() const {
|
||||
return PCRE_IS_SET( PCRE_EXTRA);
|
||||
}
|
||||
RE_Options &set_extra(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
|
||||
}
|
||||
|
||||
bool ungreedy() const {
|
||||
return PCRE_IS_SET(PCRE_UNGREEDY);
|
||||
}
|
||||
RE_Options &set_ungreedy(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
|
||||
}
|
||||
|
||||
bool utf8() const {
|
||||
return PCRE_IS_SET(PCRE_UTF8);
|
||||
}
|
||||
RE_Options &set_utf8(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
|
||||
}
|
||||
|
||||
bool no_auto_capture() const {
|
||||
return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
|
||||
}
|
||||
RE_Options &set_no_auto_capture(bool x) {
|
||||
PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
|
||||
}
|
||||
|
||||
RE_Options &set_all_options(int opt) {
|
||||
all_options_ = opt;
|
||||
return *this;
|
||||
}
|
||||
int all_options() const {
|
||||
return all_options_ ;
|
||||
}
|
||||
|
||||
// TODO: add other pcre flags
|
||||
|
||||
private:
|
||||
int match_limit_;
|
||||
int match_limit_recursion_;
|
||||
int all_options_;
|
||||
};
|
||||
|
||||
// These functions return some common RE_Options
|
||||
static inline RE_Options UTF8() {
|
||||
return RE_Options().set_utf8(true);
|
||||
}
|
||||
|
||||
static inline RE_Options CASELESS() {
|
||||
return RE_Options().set_caseless(true);
|
||||
}
|
||||
static inline RE_Options MULTILINE() {
|
||||
return RE_Options().set_multiline(true);
|
||||
}
|
||||
|
||||
static inline RE_Options DOTALL() {
|
||||
return RE_Options().set_dotall(true);
|
||||
}
|
||||
|
||||
static inline RE_Options EXTENDED() {
|
||||
return RE_Options().set_extended(true);
|
||||
}
|
||||
|
||||
// Interface for regular expression matching. Also corresponds to a
|
||||
// pre-compiled regular expression. An "RE" object is safe for
|
||||
// concurrent use by multiple threads.
|
||||
class RE {
|
||||
public:
|
||||
// We provide implicit conversions from strings so that users can
|
||||
// pass in a string or a "const char*" wherever an "RE" is expected.
|
||||
RE(const char* pat) { Init(pat, NULL); }
|
||||
RE(const char *pat, const RE_Options& option) { Init(pat, &option); }
|
||||
RE(const string& pat) { Init(pat, NULL); }
|
||||
RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
|
||||
|
||||
// Copy constructor & assignment - note that these are expensive
|
||||
// because they recompile the expression.
|
||||
RE(const RE& re) { Init(re.pattern_, &re.options_); }
|
||||
const RE& operator=(const RE& re) {
|
||||
if (this != &re) {
|
||||
Cleanup();
|
||||
|
||||
// This is the code that originally came from Google
|
||||
// Init(re.pattern_.c_str(), &re.options_);
|
||||
|
||||
// This is the replacement from Ari Pollak
|
||||
Init(re.pattern_, &re.options_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
~RE();
|
||||
|
||||
// The string specification for this RE. E.g.
|
||||
// RE re("ab*c?d+");
|
||||
// re.pattern(); // "ab*c?d+"
|
||||
const string& pattern() const { return pattern_; }
|
||||
|
||||
// If RE could not be created properly, returns an error string.
|
||||
// Else returns the empty string.
|
||||
const string& error() const { return *error_; }
|
||||
|
||||
/***** The useful part: the matching interface *****/
|
||||
|
||||
// This is provided so one can do pattern.ReplaceAll() just as
|
||||
// easily as ReplaceAll(pattern-text, ....)
|
||||
|
||||
bool FullMatch(const StringPiece& text,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool PartialMatch(const StringPiece& text,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool Consume(StringPiece* input,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool FindAndConsume(StringPiece* input,
|
||||
const Arg& ptr1 = no_arg,
|
||||
const Arg& ptr2 = no_arg,
|
||||
const Arg& ptr3 = no_arg,
|
||||
const Arg& ptr4 = no_arg,
|
||||
const Arg& ptr5 = no_arg,
|
||||
const Arg& ptr6 = no_arg,
|
||||
const Arg& ptr7 = no_arg,
|
||||
const Arg& ptr8 = no_arg,
|
||||
const Arg& ptr9 = no_arg,
|
||||
const Arg& ptr10 = no_arg,
|
||||
const Arg& ptr11 = no_arg,
|
||||
const Arg& ptr12 = no_arg,
|
||||
const Arg& ptr13 = no_arg,
|
||||
const Arg& ptr14 = no_arg,
|
||||
const Arg& ptr15 = no_arg,
|
||||
const Arg& ptr16 = no_arg) const;
|
||||
|
||||
bool Replace(const StringPiece& rewrite,
|
||||
string *str) const;
|
||||
|
||||
int GlobalReplace(const StringPiece& rewrite,
|
||||
string *str) const;
|
||||
|
||||
bool Extract(const StringPiece &rewrite,
|
||||
const StringPiece &text,
|
||||
string *out) const;
|
||||
|
||||
// Escapes all potentially meaningful regexp characters in
|
||||
// 'unquoted'. The returned string, used as a regular expression,
|
||||
// will exactly match the original string. For example,
|
||||
// 1.5-2.0?
|
||||
// may become:
|
||||
// 1\.5\-2\.0\?
|
||||
static string QuoteMeta(const StringPiece& unquoted);
|
||||
|
||||
|
||||
/***** Generic matching interface *****/
|
||||
|
||||
// Type of match (TODO: Should be restructured as part of RE_Options)
|
||||
enum Anchor {
|
||||
UNANCHORED, // No anchoring
|
||||
ANCHOR_START, // Anchor at start only
|
||||
ANCHOR_BOTH // Anchor at start and end
|
||||
};
|
||||
|
||||
// General matching routine. Stores the length of the match in
|
||||
// "*consumed" if successful.
|
||||
bool DoMatch(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const* args, int n) const;
|
||||
|
||||
// Return the number of capturing subpatterns, or -1 if the
|
||||
// regexp wasn't valid on construction.
|
||||
int NumberOfCapturingGroups() const;
|
||||
|
||||
private:
|
||||
|
||||
void Init(const string& pattern, const RE_Options* options);
|
||||
void Cleanup();
|
||||
|
||||
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
|
||||
// pairs of integers for the beginning and end positions of matched
|
||||
// text. The first pair corresponds to the entire matched text;
|
||||
// subsequent pairs correspond, in order, to parentheses-captured
|
||||
// matches. Returns the number of pairs (one more than the number of
|
||||
// the last subpattern with a match) if matching was successful
|
||||
// and zero if the match failed.
|
||||
// I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
|
||||
// against "foo", "bar", and "baz" respectively.
|
||||
// When matching RE("(foo)|hello") against "hello", it will return 1.
|
||||
// But the values for all subpattern are filled in into "vec".
|
||||
int TryMatch(const StringPiece& text,
|
||||
int startpos,
|
||||
Anchor anchor,
|
||||
int *vec,
|
||||
int vecsize) const;
|
||||
|
||||
// Append the "rewrite" string, with backslash subsitutions from "text"
|
||||
// and "vec", to string "out".
|
||||
bool Rewrite(string *out,
|
||||
const StringPiece& rewrite,
|
||||
const StringPiece& text,
|
||||
int *vec,
|
||||
int veclen) const;
|
||||
|
||||
// internal implementation for DoMatch
|
||||
bool DoMatchImpl(const StringPiece& text,
|
||||
Anchor anchor,
|
||||
int* consumed,
|
||||
const Arg* const args[],
|
||||
int n,
|
||||
int* vec,
|
||||
int vecsize) const;
|
||||
|
||||
// Compile the regexp for the specified anchoring mode
|
||||
pcre* Compile(Anchor anchor);
|
||||
|
||||
string pattern_;
|
||||
RE_Options options_;
|
||||
pcre* re_full_; // For full matches
|
||||
pcre* re_partial_; // For partial matches
|
||||
const string* error_; // Error indicator (or points to empty string)
|
||||
};
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
#endif /* _PCRECPP_H */
|
File diff suppressed because it is too large
Load Diff
@ -1,171 +0,0 @@
|
||||
// Copyright (c) 2005, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Author: Sanjay Ghemawat
|
||||
|
||||
#ifndef _PCRECPPARG_H
|
||||
#define _PCRECPPARG_H
|
||||
|
||||
#include <stdlib.h> // for NULL
|
||||
#include <string>
|
||||
|
||||
namespace pcrecpp {
|
||||
|
||||
class StringPiece;
|
||||
|
||||
// Hex/Octal/Binary?
|
||||
|
||||
// Special class for parsing into objects that define a ParseFrom() method
|
||||
template <class T>
|
||||
class _RE_MatchObject {
|
||||
public:
|
||||
static inline bool Parse(const char* str, int n, void* dest) {
|
||||
T* object = reinterpret_cast<T*>(dest);
|
||||
return object->ParseFrom(str, n);
|
||||
}
|
||||
};
|
||||
|
||||
class Arg {
|
||||
public:
|
||||
// Empty constructor so we can declare arrays of Arg
|
||||
Arg();
|
||||
|
||||
// Constructor specially designed for NULL arguments
|
||||
Arg(void*);
|
||||
|
||||
typedef bool (*Parser)(const char* str, int n, void* dest);
|
||||
|
||||
// Type-specific parsers
|
||||
#define PCRE_MAKE_PARSER(type,name) \
|
||||
Arg(type* p) : arg_(p), parser_(name) { } \
|
||||
Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
|
||||
|
||||
|
||||
PCRE_MAKE_PARSER(char, parse_char);
|
||||
PCRE_MAKE_PARSER(unsigned char, parse_uchar);
|
||||
PCRE_MAKE_PARSER(short, parse_short);
|
||||
PCRE_MAKE_PARSER(unsigned short, parse_ushort);
|
||||
PCRE_MAKE_PARSER(int, parse_int);
|
||||
PCRE_MAKE_PARSER(unsigned int, parse_uint);
|
||||
PCRE_MAKE_PARSER(long, parse_long);
|
||||
PCRE_MAKE_PARSER(unsigned long, parse_ulong);
|
||||
#if 1
|
||||
PCRE_MAKE_PARSER(long long, parse_longlong);
|
||||
#endif
|
||||
#if 1
|
||||
PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
|
||||
#endif
|
||||
PCRE_MAKE_PARSER(float, parse_float);
|
||||
PCRE_MAKE_PARSER(double, parse_double);
|
||||
PCRE_MAKE_PARSER(std::string, parse_string);
|
||||
PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
|
||||
|
||||
#undef PCRE_MAKE_PARSER
|
||||
|
||||
// Generic constructor
|
||||
template <class T> Arg(T*, Parser parser);
|
||||
// Generic constructor template
|
||||
template <class T> Arg(T* p)
|
||||
: arg_(p), parser_(_RE_MatchObject<T>::Parse) {
|
||||
}
|
||||
|
||||
// Parse the data
|
||||
bool Parse(const char* str, int n) const;
|
||||
|
||||
private:
|
||||
void* arg_;
|
||||
Parser parser_;
|
||||
|
||||
static bool parse_null (const char* str, int n, void* dest);
|
||||
static bool parse_char (const char* str, int n, void* dest);
|
||||
static bool parse_uchar (const char* str, int n, void* dest);
|
||||
static bool parse_float (const char* str, int n, void* dest);
|
||||
static bool parse_double (const char* str, int n, void* dest);
|
||||
static bool parse_string (const char* str, int n, void* dest);
|
||||
static bool parse_stringpiece (const char* str, int n, void* dest);
|
||||
|
||||
#define PCRE_DECLARE_INTEGER_PARSER(name) \
|
||||
private: \
|
||||
static bool parse_ ## name(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _radix( \
|
||||
const char* str, int n, void* dest, int radix); \
|
||||
public: \
|
||||
static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
|
||||
static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
|
||||
|
||||
PCRE_DECLARE_INTEGER_PARSER(short);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ushort);
|
||||
PCRE_DECLARE_INTEGER_PARSER(int);
|
||||
PCRE_DECLARE_INTEGER_PARSER(uint);
|
||||
PCRE_DECLARE_INTEGER_PARSER(long);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ulong);
|
||||
PCRE_DECLARE_INTEGER_PARSER(longlong);
|
||||
PCRE_DECLARE_INTEGER_PARSER(ulonglong);
|
||||
|
||||
#undef PCRE_DECLARE_INTEGER_PARSER
|
||||
};
|
||||
|
||||
inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
|
||||
inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
|
||||
|
||||
inline bool Arg::Parse(const char* str, int n) const {
|
||||
return (*parser_)(str, n, arg_);
|
||||
}
|
||||
|
||||
// This part of the parser, appropriate only for ints, deals with bases
|
||||
#define MAKE_INTEGER_PARSER(type, name) \
|
||||
inline Arg Hex(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _hex); } \
|
||||
inline Arg Octal(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _octal); } \
|
||||
inline Arg CRadix(type* ptr) { \
|
||||
return Arg(ptr, Arg::parse_ ## name ## _cradix); }
|
||||
|
||||
MAKE_INTEGER_PARSER(short, short);
|
||||
MAKE_INTEGER_PARSER(unsigned short, ushort);
|
||||
MAKE_INTEGER_PARSER(int, int);
|
||||
MAKE_INTEGER_PARSER(unsigned int, uint);
|
||||
MAKE_INTEGER_PARSER(long, long);
|
||||
MAKE_INTEGER_PARSER(unsigned long, ulong);
|
||||
#if 1
|
||||
MAKE_INTEGER_PARSER(long long, longlong);
|
||||
#endif
|
||||
#if 1
|
||||
MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
|
||||
#endif
|
||||
|
||||
#undef PCRE_IS_SET
|
||||
#undef PCRE_SET_OR_CLEAR
|
||||
#undef MAKE_INTEGER_PARSER
|
||||
|
||||
} // namespace pcrecpp
|
||||
|
||||
|
||||
#endif /* _PCRECPPARG_H */
|
@ -16,6 +16,10 @@ systems (e.g. Solaris) use the -R option.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
|
@ -6,7 +6,7 @@
|
||||
its pattern matching. On a Unix or Win32 system it can recurse into
|
||||
directories.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -37,6 +37,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
@ -46,17 +50,17 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "config.h"
|
||||
#include "pcre.h"
|
||||
#include <pcre.h>
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#define VERSION "4.4 29-Nov-2006"
|
||||
#define MAX_PATTERN_COUNT 100
|
||||
|
||||
#if BUFSIZ > 8192
|
||||
@ -84,7 +88,7 @@ enum { DEE_READ, DEE_SKIP };
|
||||
|
||||
/* Line ending types */
|
||||
|
||||
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
|
||||
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
|
||||
|
||||
|
||||
|
||||
@ -117,8 +121,8 @@ static char *locale = NULL;
|
||||
static const unsigned char *pcretables = NULL;
|
||||
|
||||
static int pattern_count = 0;
|
||||
static pcre **pattern_list;
|
||||
static pcre_extra **hints_list;
|
||||
static pcre **pattern_list = NULL;
|
||||
static pcre_extra **hints_list = NULL;
|
||||
|
||||
static char *include_pattern = NULL;
|
||||
static char *exclude_pattern = NULL;
|
||||
@ -192,7 +196,7 @@ static option_item optionlist[] = {
|
||||
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
|
||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
|
||||
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
|
||||
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
|
||||
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
|
||||
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
|
||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||
@ -222,7 +226,7 @@ static const char *prefix[] = {
|
||||
static const char *suffix[] = {
|
||||
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
|
||||
|
||||
/* UTF-8 tables - used only when the newline setting is "all". */
|
||||
/* UTF-8 tables - used only when the newline setting is "any". */
|
||||
|
||||
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
@ -244,7 +248,7 @@ although at present the only ones are for Unix, Win32, and for "no support". */
|
||||
|
||||
/************* Directory scanning in Unix ***********/
|
||||
|
||||
#if IS_UNIX
|
||||
#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
@ -276,7 +280,7 @@ for (;;)
|
||||
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
|
||||
return dent->d_name;
|
||||
}
|
||||
return NULL; /* Keep compiler happy; never executed */
|
||||
/* Control never reaches here */
|
||||
}
|
||||
|
||||
static void
|
||||
@ -314,7 +318,7 @@ Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
|
||||
when it did not exist. */
|
||||
|
||||
|
||||
#elif HAVE_WIN32API
|
||||
#elif HAVE_WINDOWS_H
|
||||
|
||||
#ifndef STRICT
|
||||
# define STRICT
|
||||
@ -436,8 +440,8 @@ FALSE;
|
||||
typedef void directory_type;
|
||||
|
||||
int isdirectory(char *filename) { return 0; }
|
||||
directory_type * opendirectory(char *filename) {}
|
||||
char *readdirectory(directory_type *dir) {}
|
||||
directory_type * opendirectory(char *filename) { return (directory_type*)0;}
|
||||
char *readdirectory(directory_type *dir) { return (char*)0;}
|
||||
void closedirectory(directory_type *dir) {}
|
||||
|
||||
|
||||
@ -461,7 +465,7 @@ return FALSE;
|
||||
|
||||
|
||||
|
||||
#if ! HAVE_STRERROR
|
||||
#ifndef HAVE_STRERROR
|
||||
/*************************************************
|
||||
* Provide strerror() for non-ANSI libraries *
|
||||
*************************************************/
|
||||
@ -541,6 +545,50 @@ switch(endlinetype)
|
||||
}
|
||||
break;
|
||||
|
||||
case EL_ANYCRLF:
|
||||
while (p < endptr)
|
||||
{
|
||||
int extra = 0;
|
||||
register int c = *((unsigned char *)p);
|
||||
|
||||
if (utf8 && c >= 0xc0)
|
||||
{
|
||||
int gcii, gcss;
|
||||
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
gcss = 6*extra;
|
||||
c = (c & utf8_table3[extra]) << gcss;
|
||||
for (gcii = 1; gcii <= extra; gcii++)
|
||||
{
|
||||
gcss -= 6;
|
||||
c |= (p[gcii] & 0x3f) << gcss;
|
||||
}
|
||||
}
|
||||
|
||||
p += 1 + extra;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case 0x0a: /* LF */
|
||||
*lenptr = 1;
|
||||
return p;
|
||||
|
||||
case 0x0d: /* CR */
|
||||
if (p < endptr && *p == 0x0a)
|
||||
{
|
||||
*lenptr = 2;
|
||||
p++;
|
||||
}
|
||||
else *lenptr = 1;
|
||||
return p;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} /* End of loop for ANYCRLF case */
|
||||
|
||||
*lenptr = 0; /* Must have hit the end */
|
||||
return endptr;
|
||||
|
||||
case EL_ANY:
|
||||
while (p < endptr)
|
||||
{
|
||||
@ -639,6 +687,7 @@ switch(endlinetype)
|
||||
return p; /* But control should never get here */
|
||||
|
||||
case EL_ANY:
|
||||
case EL_ANYCRLF:
|
||||
if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
|
||||
if (utf8) while ((*p & 0xc0) == 0x80) p--;
|
||||
|
||||
@ -667,7 +716,17 @@ switch(endlinetype)
|
||||
}
|
||||
else c = *((unsigned char *)pp);
|
||||
|
||||
switch (c)
|
||||
if (endlinetype == EL_ANYCRLF) switch (c)
|
||||
{
|
||||
case 0x0a: /* LF */
|
||||
case 0x0d: /* CR */
|
||||
return p;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
else switch (c)
|
||||
{
|
||||
case 0x0a: /* LF */
|
||||
case 0x0b: /* VT */
|
||||
@ -1188,7 +1247,8 @@ if ((sep = isdirectory(pathname)) != 0)
|
||||
while ((nextfile = readdirectory(dir)) != NULL)
|
||||
{
|
||||
int frc, blen;
|
||||
blen = slprintf(buffer, sizeof(buffer), "%.512s%c%.128s", pathname, sep, nextfile);
|
||||
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
|
||||
blen = strlen(buffer);
|
||||
|
||||
if (exclude_compiled != NULL &&
|
||||
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
|
||||
@ -1280,7 +1340,7 @@ for (op = optionlist; op->one_char != 0; op++)
|
||||
{
|
||||
int n;
|
||||
char s[4];
|
||||
if (op->one_char > 0) snprintf(s, sizeof(s), "-%c,", op->one_char); else strcpy(s, " ");
|
||||
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
|
||||
printf(" %s --%s%n", s, op->long_name, &n);
|
||||
n = 30 - n;
|
||||
if (n < 1) n = 1;
|
||||
@ -1327,8 +1387,7 @@ switch(letter)
|
||||
case 'x': process_options |= PO_LINE_MATCH; break;
|
||||
|
||||
case 'V':
|
||||
fprintf(stderr, "pcregrep version %s using ", VERSION);
|
||||
fprintf(stderr, "PCRE version %s\n", pcre_version());
|
||||
fprintf(stderr, "pcregrep version %s\n", pcre_version());
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
@ -1354,7 +1413,7 @@ ordin(int n)
|
||||
{
|
||||
static char buffer[8];
|
||||
char *p = buffer;
|
||||
snprintf(p, sizeof(buffer), "%d", n);
|
||||
sprintf(p, "%d", n);
|
||||
while (*p != 0) p++;
|
||||
switch (n%10)
|
||||
{
|
||||
@ -1400,11 +1459,15 @@ if (pattern_count >= MAX_PATTERN_COUNT)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
snprintf(buffer, sizeof(buffer), "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
|
||||
sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
|
||||
suffix[process_options]);
|
||||
pattern_list[pattern_count] =
|
||||
pcre_compile(buffer, options, &error, &errptr, pcretables);
|
||||
if (pattern_list[pattern_count++] != NULL) return TRUE;
|
||||
if (pattern_list[pattern_count] != NULL)
|
||||
{
|
||||
pattern_count++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Handle compile errors */
|
||||
|
||||
@ -1462,7 +1525,7 @@ if ((process_options & PO_FIXED_STRINGS) != 0)
|
||||
char *p = end_of_line(pattern, eop, &ellength);
|
||||
if (ellength == 0)
|
||||
return compile_single_pattern(pattern, options, filename, count);
|
||||
snprintf(buffer, sizeof(buffer), "%.*s", p - pattern - ellength, pattern);
|
||||
sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
|
||||
pattern = p;
|
||||
if (!compile_single_pattern(buffer, options, filename, count))
|
||||
return FALSE;
|
||||
@ -1486,6 +1549,7 @@ int i, j;
|
||||
int rc = 1;
|
||||
int pcre_options = 0;
|
||||
int cmd_pattern_count = 0;
|
||||
int hint_count = 0;
|
||||
int errptr;
|
||||
BOOL only_one_at_top;
|
||||
char *patterns[MAX_PATTERN_COUNT];
|
||||
@ -1503,6 +1567,7 @@ switch(i)
|
||||
case '\r': newline = (char *)"cr"; break;
|
||||
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
|
||||
case -1: newline = (char *)"any"; break;
|
||||
case -2: newline = (char *)"anycrlf"; break;
|
||||
}
|
||||
|
||||
/* Process the options */
|
||||
@ -1578,9 +1643,9 @@ for (i = 1; i < argc; i++)
|
||||
char buff1[24];
|
||||
char buff2[24];
|
||||
int baselen = opbra - op->long_name;
|
||||
snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name);
|
||||
snprintf(buff2, sizeof(buff2), "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
|
||||
opbra + 1);
|
||||
sprintf(buff1, "%.*s", baselen, op->long_name);
|
||||
sprintf(buff2, "%s%.*s", buff1,
|
||||
(int)strlen(op->long_name) - baselen - 2, opbra + 1);
|
||||
if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
|
||||
break;
|
||||
}
|
||||
@ -1810,6 +1875,11 @@ else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
|
||||
pcre_options |= PCRE_NEWLINE_ANY;
|
||||
endlinetype = EL_ANY;
|
||||
}
|
||||
else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
|
||||
{
|
||||
pcre_options |= PCRE_NEWLINE_ANYCRLF;
|
||||
endlinetype = EL_ANYCRLF;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
|
||||
@ -1864,7 +1934,7 @@ hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
|
||||
if (pattern_list == NULL || hints_list == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcregrep: malloc failed\n");
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
/* If no patterns were provided by -e, and there is no file provided by -f,
|
||||
@ -1883,7 +1953,7 @@ for (j = 0; j < cmd_pattern_count; j++)
|
||||
{
|
||||
if (!compile_pattern(patterns[j], pcre_options, NULL,
|
||||
(j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
/* Compile the regular expressions that are provided in a file. */
|
||||
@ -1907,7 +1977,7 @@ if (pattern_filename != NULL)
|
||||
{
|
||||
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
|
||||
strerror(errno));
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
filename = pattern_filename;
|
||||
}
|
||||
@ -1920,7 +1990,7 @@ if (pattern_filename != NULL)
|
||||
linenumber++;
|
||||
if (buffer[0] == 0) continue; /* Skip blank lines */
|
||||
if (!compile_pattern(buffer, pcre_options, filename, linenumber))
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
@ -1934,10 +2004,11 @@ for (j = 0; j < pattern_count; j++)
|
||||
if (error != NULL)
|
||||
{
|
||||
char s[16];
|
||||
if (pattern_count == 1) s[0] = 0; else snprintf(s, sizeof(s), " number %d", j);
|
||||
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
|
||||
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
hint_count++;
|
||||
}
|
||||
|
||||
/* If there are include or exclude patterns, compile them. */
|
||||
@ -1950,7 +2021,7 @@ if (exclude_pattern != NULL)
|
||||
{
|
||||
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
|
||||
errptr, error);
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1962,14 +2033,17 @@ if (include_pattern != NULL)
|
||||
{
|
||||
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
|
||||
errptr, error);
|
||||
return 2;
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there are no further arguments, do the business on stdin and exit. */
|
||||
|
||||
if (i >= argc)
|
||||
return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
|
||||
{
|
||||
rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Otherwise, work through the remaining arguments as files or directories.
|
||||
Pass in the fact that there is only one argument at top level - this suppresses
|
||||
@ -1986,7 +2060,22 @@ for (; i < argc; i++)
|
||||
else if (frc == 0 && rc == 1) rc = 0;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (pattern_list != NULL)
|
||||
{
|
||||
for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
|
||||
free(pattern_list);
|
||||
}
|
||||
if (hints_list != NULL)
|
||||
{
|
||||
for (i = 0; i < hint_count; i++) free(hints_list[i]);
|
||||
free(hints_list);
|
||||
}
|
||||
return rc;
|
||||
|
||||
EXIT2:
|
||||
rc = 2;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcregrep */
|
||||
|
@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -80,7 +80,7 @@ static const int eint[] = {
|
||||
REG_BADPAT, /* malformed number or name after (?( */
|
||||
REG_BADPAT, /* conditional group contains more than two branches */
|
||||
REG_BADPAT, /* assertion expected after (?( */
|
||||
REG_BADPAT, /* (?R or (?digits must be followed by ) */
|
||||
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */
|
||||
REG_ECTYPE, /* unknown POSIX class name */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */
|
||||
@ -108,7 +108,8 @@ static const int eint[] = {
|
||||
REG_BADPAT, /* DEFINE group contains more than one branch */
|
||||
REG_BADPAT, /* repeating a DEFINE group is not allowed */
|
||||
REG_INVARG, /* inconsistent NEWLINE options */
|
||||
REG_BADPAT /* \g is not followed followed by an (optionally braced) non-zero number */
|
||||
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
|
||||
REG_BADPAT /* (?+ or (?- must be followed by a non-zero number */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
@ -141,7 +142,7 @@ static const char *const pstring[] = {
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCRE_DATA_SCOPE size_t
|
||||
PCREPOSIX_EXP_DEFN size_t
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const char *message, *addmessage;
|
||||
@ -158,7 +159,7 @@ addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
||||
if (errbuf_size > 0)
|
||||
{
|
||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
||||
snprintf(errbuf, errbuf_size, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
||||
else
|
||||
{
|
||||
strncpy(errbuf, message, errbuf_size - 1);
|
||||
@ -176,7 +177,7 @@ return length + addlength;
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCRE_DATA_SCOPE void
|
||||
PCREPOSIX_EXP_DEFN void
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
(pcre_free)(preg->re_pcre);
|
||||
@ -199,7 +200,7 @@ Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCREPOSIX_EXP_DEFN int
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
const char *errorptr;
|
||||
@ -241,7 +242,7 @@ If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
|
||||
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
|
||||
the only result is yes/no/error. */
|
||||
|
||||
PCRE_DATA_SCOPE int
|
||||
PCREPOSIX_EXP_DEFN int
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
|
@ -9,7 +9,7 @@
|
||||
Compatible Regular Expression library. It defines the things POSIX says should
|
||||
be there. I hope.
|
||||
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
Copyright (c) 1997-2007 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -105,40 +105,36 @@ typedef struct {
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export settings are needed. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCREPOSIX_STATIC
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Otherwise, we use the standard "extern". */
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifndef PCREPOSIX_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# define PCREPOSIX_EXP_DECL extern "C"
|
||||
# define PCREPOSIX_EXP_DEFN extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# define PCREPOSIX_EXP_DECL extern
|
||||
# define PCREPOSIX_EXP_DEFN extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions */
|
||||
|
||||
PCRE_DATA_SCOPE int regcomp(regex_t *, const char *, int);
|
||||
PCRE_DATA_SCOPE int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCRE_DATA_SCOPE size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCRE_DATA_SCOPE void regfree(regex_t *);
|
||||
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
||||
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCREPOSIX_EXP_DECL void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
File diff suppressed because it is too large
Load Diff
7
ext/pcre/pcrelib/testdata/grepinputx
vendored
7
ext/pcre/pcrelib/testdata/grepinputx
vendored
@ -39,11 +39,4 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
abc
|
||||
def
|
||||
ghi
|
||||
jkl
|
||||
|
||||
This is the last line of this file.
|
||||
|
56
ext/pcre/pcrelib/testdata/grepoutput
vendored
56
ext/pcre/pcrelib/testdata/grepoutput
vendored
@ -75,14 +75,7 @@ RC=1
|
||||
39:nineteen
|
||||
40:twenty
|
||||
41:
|
||||
42:Here follows some CR/LF/CRLF test data.
|
||||
43:
|
||||
44:abc
|
||||
def
|
||||
45:ghi
|
||||
46:jkl
|
||||
47:
|
||||
48:This is the last line of this file.
|
||||
42:This is the last line of this file.
|
||||
---------------------------- Test 12 -----------------------------
|
||||
Pattern
|
||||
---------------------------- Test 13 -----------------------------
|
||||
@ -164,8 +157,7 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 25 -----------------------------
|
||||
15-
|
||||
16-complete pair
|
||||
@ -215,8 +207,7 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 27 -----------------------------
|
||||
four
|
||||
five
|
||||
@ -236,10 +227,7 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
abc
|
||||
def
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 28 -----------------------------
|
||||
14-of lines all by themselves.
|
||||
15-
|
||||
@ -291,12 +279,7 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
abc
|
||||
def
|
||||
ghi
|
||||
jkl
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 30 -----------------------------
|
||||
./testdata/grepinput-4-features should be added at the end, because some of the tests involve the
|
||||
./testdata/grepinput-5-output of line numbers, and we don't want these to change.
|
||||
@ -345,8 +328,8 @@ RC=2
|
||||
./testdata/grepinputx
|
||||
RC=0
|
||||
---------------------------- Test 36 -----------------------------
|
||||
./testdata/grepinputx
|
||||
./testdata/grepinput8
|
||||
./testdata/grepinputx
|
||||
RC=0
|
||||
---------------------------- Test 37 -----------------------------
|
||||
aaaaa0
|
||||
@ -400,30 +383,3 @@ AB.VE
|
||||
AB.VE the turtle
|
||||
PUT NEW DATA ABOVE THIS LINE.
|
||||
---------------------------- Test 49 ------------------------------
|
||||
abc
|
||||
def
|
||||
ghi
|
||||
jkl
|
||||
---------------------------- Test 50 ------------------------------
|
||||
def
|
||||
---------------------------- Test 51 ------------------------------
|
||||
ghi
|
||||
jkl
|
||||
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 52 ------------------------------
|
||||
def
|
||||
ghi
|
||||
jkl
|
||||
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 53 ------------------------------
|
||||
ghi
|
||||
jkl
|
||||
|
||||
This is the last line of this file.
|
||||
---------------------------- Test 54 ------------------------------
|
||||
44:abc
|
||||
45:def
|
||||
46:ghi
|
||||
47:jkl
|
||||
|
22
ext/pcre/pcrelib/testdata/grepoutputN
vendored
Normal file
22
ext/pcre/pcrelib/testdata/grepoutputN
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
---------------------------- Test N1 ------------------------------
|
||||
1:abc
|
||||
2:def
|
||||
---------------------------- Test N2 ------------------------------
|
||||
1:abc
|
||||
def
|
||||
2:ghi
|
||||
jkl---------------------------- Test N3 ------------------------------
|
||||
2:def
|
||||
3:
|
||||
ghi
|
||||
jkl---------------------------- Test N4 ------------------------------
|
||||
2:ghi
|
||||
jkl---------------------------- Test N5 ------------------------------
|
||||
1:abc
|
||||
2:def
|
||||
3:ghi
|
||||
4:jkl---------------------------- Test N6 ------------------------------
|
||||
1:abc
|
||||
2:def
|
||||
3:ghi
|
||||
4:jkl
|
7
ext/pcre/pcrelib/testdata/testinput1
vendored
7
ext/pcre/pcrelib/testdata/testinput1
vendored
@ -1494,8 +1494,8 @@
|
||||
/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
|
||||
abcdefghijk\12S
|
||||
|
||||
/ab\hdef/
|
||||
abhdef
|
||||
/ab\idef/
|
||||
abidef
|
||||
|
||||
/a{0}bc/
|
||||
bc
|
||||
@ -4018,4 +4018,7 @@
|
||||
abc
|
||||
abc\n
|
||||
|
||||
/(.*(.)?)*/
|
||||
abcd
|
||||
|
||||
/ End of testinput1 /
|
||||
|
104
ext/pcre/pcrelib/testdata/testinput10
vendored
Normal file
104
ext/pcre/pcrelib/testdata/testinput10
vendored
Normal file
@ -0,0 +1,104 @@
|
||||
/-- These are a few representative patterns whose lengths and offsets are to be
|
||||
shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
are all themselves checked in other tests. --/
|
||||
|
||||
/((?i)b)/BM
|
||||
|
||||
/(?s)(.*X|^B)/BM
|
||||
|
||||
/(?s:.*X|^B)/BM
|
||||
|
||||
/^[[:alnum:]]/BM
|
||||
|
||||
/#/IxMD
|
||||
|
||||
/a#/IxMD
|
||||
|
||||
/x?+/BM
|
||||
|
||||
/x++/BM
|
||||
|
||||
/x{1,3}+/BM
|
||||
|
||||
/(x)*+/BM
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/BM
|
||||
|
||||
|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
|
||||
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
|
||||
/(a(?1)b)/BM
|
||||
|
||||
/(a(?1)+b)/BM
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/BM
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/BM
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
|
||||
|
||||
/abc(?C255)de(?C)f/BM
|
||||
|
||||
/abcde/CBM
|
||||
|
||||
/\x{100}/8BM
|
||||
|
||||
/\x{1000}/8BM
|
||||
|
||||
/\x{10000}/8BM
|
||||
|
||||
/\x{100000}/8BM
|
||||
|
||||
/\x{1000000}/8BM
|
||||
|
||||
/\x{4000000}/8BM
|
||||
|
||||
/\x{7fffFFFF}/8BM
|
||||
|
||||
/[\x{ff}]/8BM
|
||||
|
||||
/[\x{100}]/8BM
|
||||
|
||||
/\x80/8BM
|
||||
|
||||
/\xff/8BM
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8M
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8M
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8M
|
||||
|
||||
/[\x{100}]/8BM
|
||||
|
||||
/[Z\x{100}]/8BM
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/B8M
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/B8M
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/B8M
|
||||
|
||||
/[\p{L}]/BM
|
||||
|
||||
/[\p{^L}]/BM
|
||||
|
||||
/[\P{L}]/BM
|
||||
|
||||
/[\P{^L}]/BM
|
||||
|
||||
/[abc\p{L}\x{0660}]/8BM
|
||||
|
||||
/[\p{Nd}]/8BM
|
||||
|
||||
/[\p{Nd}+-]+/8BM
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iBM
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8BM
|
||||
|
||||
/[\x{105}-\x{109}]/8iBM
|
||||
|
||||
/ End of testinput10 /
|
1146
ext/pcre/pcrelib/testdata/testinput2
vendored
1146
ext/pcre/pcrelib/testdata/testinput2
vendored
File diff suppressed because it is too large
Load Diff
2
ext/pcre/pcrelib/testdata/testinput3
vendored
2
ext/pcre/pcrelib/testdata/testinput3
vendored
@ -86,6 +86,6 @@
|
||||
>>>\xaa<<<
|
||||
>>>\xba<<<
|
||||
|
||||
/[[:alpha:]][[:lower:]][[:upper:]]/DLfr_FR
|
||||
/[[:alpha:]][[:lower:]][[:upper:]]/DZLfr_FR
|
||||
|
||||
/ End of testinput3 /
|
||||
|
186
ext/pcre/pcrelib/testdata/testinput5
vendored
186
ext/pcre/pcrelib/testdata/testinput5
vendored
@ -1,20 +1,20 @@
|
||||
/\x{100}/8DM
|
||||
/\x{100}/8DZ
|
||||
|
||||
/\x{1000}/8DM
|
||||
/\x{1000}/8DZ
|
||||
|
||||
/\x{10000}/8DM
|
||||
/\x{10000}/8DZ
|
||||
|
||||
/\x{100000}/8DM
|
||||
/\x{100000}/8DZ
|
||||
|
||||
/\x{1000000}/8DM
|
||||
/\x{1000000}/8DZ
|
||||
|
||||
/\x{4000000}/8DM
|
||||
/\x{4000000}/8DZ
|
||||
|
||||
/\x{7fffFFFF}/8DM
|
||||
/\x{7fffFFFF}/8DZ
|
||||
|
||||
/[\x{ff}]/8DM
|
||||
/[\x{ff}]/8DZ
|
||||
|
||||
/[\x{100}]/8DM
|
||||
/[\x{100}]/8DZ
|
||||
|
||||
/\x{ffffffff}/8
|
||||
|
||||
@ -23,39 +23,39 @@
|
||||
/^\x{100}a\x{1234}/8
|
||||
\x{100}a\x{1234}bcd
|
||||
|
||||
/\x80/8D
|
||||
/\x80/8DZ
|
||||
|
||||
/\xff/8D
|
||||
/\xff/8DZ
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/DZ8
|
||||
\x{0041}\x{2262}\x{0391}\x{002e}
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
||||
\x{D55c}\x{ad6d}\x{C5B4}
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8
|
||||
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
|
||||
/\x{80}/D8
|
||||
/\x{80}/DZ8
|
||||
|
||||
/\x{084}/D8
|
||||
/\x{084}/DZ8
|
||||
|
||||
/\x{104}/D8
|
||||
/\x{104}/DZ8
|
||||
|
||||
/\x{861}/D8
|
||||
/\x{861}/DZ8
|
||||
|
||||
/\x{212ab}/D8
|
||||
/\x{212ab}/DZ8
|
||||
|
||||
/.{3,5}X/D8
|
||||
/.{3,5}X/DZ8
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}X
|
||||
|
||||
|
||||
/.{3,5}?/D8
|
||||
/.{3,5}?/DZ8
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
||||
|
||||
/-- These tests are here rather than in testinput4 because Perl 5.6 has --/
|
||||
/-- some problems with UTF-8 support, in the area of \x{..} where the --/
|
||||
/-- value is < 255. It grumbles about invalid UTF-8 strings. --/
|
||||
/-- These tests are here rather than in testinput4 because Perl 5.6 has some
|
||||
problems with UTF-8 support, in the area of \x{..} where the value is < 255.
|
||||
It grumbles about invalid UTF-8 strings. --/
|
||||
|
||||
/^[a\x{c0}]b/8
|
||||
\x{c0}b
|
||||
@ -80,29 +80,29 @@
|
||||
/(?<=\C)X/8
|
||||
Should produce an error diagnostic
|
||||
|
||||
/-- This one is here not because it's different to Perl, but because the --/
|
||||
/-- way the captured single-byte is displayed. (In Perl it becomes a --/
|
||||
/-- character, and you can't tell the difference.) --/
|
||||
/-- This one is here not because it's different to Perl, but because the way
|
||||
the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||||
can't tell the difference.) --/
|
||||
|
||||
/X(\C)(.*)/8
|
||||
X\x{1234}
|
||||
X\nabc
|
||||
|
||||
/^[ab]/8D
|
||||
/^[ab]/8DZ
|
||||
bar
|
||||
*** Failers
|
||||
c
|
||||
\x{ff}
|
||||
\x{100}
|
||||
|
||||
/^[^ab]/8D
|
||||
/^[^ab]/8DZ
|
||||
c
|
||||
\x{ff}
|
||||
\x{100}
|
||||
*** Failers
|
||||
aaa
|
||||
|
||||
/[^ab\xC0-\xF0]/8SD
|
||||
/[^ab\xC0-\xF0]/8SDZ
|
||||
\x{f1}
|
||||
\x{bf}
|
||||
\x{100}
|
||||
@ -111,16 +111,16 @@
|
||||
\x{c0}
|
||||
\x{f0}
|
||||
|
||||
/Ä€{3,4}/8SD
|
||||
/Ä€{3,4}/8SDZ
|
||||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||||
|
||||
/(\x{100}+|x)/8SD
|
||||
/(\x{100}+|x)/8SDZ
|
||||
|
||||
/(\x{100}*a|x)/8SD
|
||||
/(\x{100}*a|x)/8SDZ
|
||||
|
||||
/(\x{100}{0,2}a|x)/8SD
|
||||
/(\x{100}{0,2}a|x)/8SDZ
|
||||
|
||||
/(\x{100}{1,2}a|x)/8SD
|
||||
/(\x{100}{1,2}a|x)/8SDZ
|
||||
|
||||
/\x{100}*(\d+|"(?1)")/8
|
||||
1234
|
||||
@ -132,34 +132,34 @@
|
||||
*** Failers
|
||||
\x{100}\x{100}abcd
|
||||
|
||||
/\x{100}/8D
|
||||
/\x{100}/8DZ
|
||||
|
||||
/\x{100}*/8D
|
||||
/\x{100}*/8DZ
|
||||
|
||||
/a\x{100}*/8D
|
||||
/a\x{100}*/8DZ
|
||||
|
||||
/ab\x{100}*/8D
|
||||
/ab\x{100}*/8DZ
|
||||
|
||||
/a\x{100}\x{101}*/8D
|
||||
/a\x{100}\x{101}*/8DZ
|
||||
|
||||
/a\x{100}\x{101}+/8D
|
||||
/a\x{100}\x{101}+/8DZ
|
||||
|
||||
/\x{100}*A/8D
|
||||
/\x{100}*A/8DZ
|
||||
A
|
||||
|
||||
/\x{100}*\d(?R)/8D
|
||||
/\x{100}*\d(?R)/8DZ
|
||||
|
||||
/[^\x{c4}]/D
|
||||
/[^\x{c4}]/DZ
|
||||
|
||||
/[^\x{c4}]/8D
|
||||
/[^\x{c4}]/8DZ
|
||||
|
||||
/[\x{100}]/8DM
|
||||
/[\x{100}]/8DZ
|
||||
\x{100}
|
||||
Z\x{100}
|
||||
\x{100}Z
|
||||
*** Failers
|
||||
|
||||
/[Z\x{100}]/8DM
|
||||
/[Z\x{100}]/8DZ
|
||||
Z\x{100}
|
||||
\x{100}
|
||||
\x{100}Z
|
||||
@ -174,21 +174,21 @@
|
||||
\x{105}
|
||||
\x{ff}
|
||||
|
||||
/[z-\x{100}]/8D
|
||||
/[z-\x{100}]/8DZ
|
||||
|
||||
/[z\Qa-d]Ä€\E]/8D
|
||||
/[z\Qa-d]Ä€\E]/8DZ
|
||||
\x{100}
|
||||
Ä€
|
||||
|
||||
/[\xFF]/D
|
||||
/[\xFF]/DZ
|
||||
>\xff<
|
||||
|
||||
/[\xff]/D8
|
||||
/[\xff]/DZ8
|
||||
>\x{ff}<
|
||||
|
||||
/[^\xFF]/D
|
||||
/[^\xFF]/DZ
|
||||
|
||||
/[^\xff]/8D
|
||||
/[^\xff]/8DZ
|
||||
|
||||
/[Ä-Ü]/8
|
||||
Ö # Matches without Study
|
||||
@ -212,7 +212,7 @@
|
||||
|
||||
/ÃÃÃxxx/8
|
||||
|
||||
/ÃÃÃxxx/8?D
|
||||
/ÃÃÃxxx/8?DZ
|
||||
|
||||
/abc/8
|
||||
Ã]
|
||||
@ -239,19 +239,19 @@
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
|
||||
/\x{100}abc(xyz(?1))/8D
|
||||
/\x{100}abc(xyz(?1))/8DZ
|
||||
|
||||
/[^\x{100}]abc(xyz(?1))/8D
|
||||
/[^\x{100}]abc(xyz(?1))/8DZ
|
||||
|
||||
/[ab\x{100}]abc(xyz(?1))/8D
|
||||
/[ab\x{100}]abc(xyz(?1))/8DZ
|
||||
|
||||
/(\x{100}(b(?2)c))?/D8
|
||||
/(\x{100}(b(?2)c))?/DZ8
|
||||
|
||||
/(\x{100}(b(?2)c)){0,2}/D8
|
||||
/(\x{100}(b(?2)c)){0,2}/DZ8
|
||||
|
||||
/(\x{100}(b(?1)c))?/D8
|
||||
/(\x{100}(b(?1)c))?/DZ8
|
||||
|
||||
/(\x{100}(b(?1)c)){0,2}/D8
|
||||
/(\x{100}(b(?1)c)){0,2}/DZ8
|
||||
|
||||
/\W/8
|
||||
A.B
|
||||
@ -263,7 +263,7 @@
|
||||
/a\x{1234}b/P8
|
||||
a\x{1234}b
|
||||
|
||||
/^\ሴ/8D
|
||||
/^\ሴ/8DZ
|
||||
|
||||
/\777/I
|
||||
|
||||
@ -271,23 +271,23 @@
|
||||
\x{1ff}
|
||||
\777
|
||||
|
||||
/\x{100}*\d/8D
|
||||
/\x{100}*\d/8DZ
|
||||
|
||||
/\x{100}*\s/8D
|
||||
/\x{100}*\s/8DZ
|
||||
|
||||
/\x{100}*\w/8D
|
||||
/\x{100}*\w/8DZ
|
||||
|
||||
/\x{100}*\D/8D
|
||||
/\x{100}*\D/8DZ
|
||||
|
||||
/\x{100}*\S/8D
|
||||
/\x{100}*\S/8DZ
|
||||
|
||||
/\x{100}*\W/8D
|
||||
/\x{100}*\W/8DZ
|
||||
|
||||
/\x{100}+\x{200}/8D
|
||||
/\x{100}+\x{200}/8DZ
|
||||
|
||||
/\x{100}+X/8D
|
||||
/\x{100}+X/8DZ
|
||||
|
||||
/X+\x{200}/8D
|
||||
/X+\x{200}/8DZ
|
||||
|
||||
/()()()()()()()()()()
|
||||
()()()()()()()()()()
|
||||
@ -296,11 +296,11 @@
|
||||
A (x) (?41) B/8x
|
||||
AxxB
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/B8
|
||||
/^[\x{100}\E-\Q\E\x{150}]/BZ8
|
||||
|
||||
/^[\QÄ€\E-\QÅ<51>\E]/B8
|
||||
/^[\QÄ€\E-\QÅ<51>\E]/BZ8
|
||||
|
||||
/^[\QÄ€\E-\QÅ<51>\E/B8
|
||||
/^[\QÄ€\E-\QÅ<51>\E/BZ8
|
||||
|
||||
/^abc./mgx8<any>
|
||||
abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
|
||||
@ -355,4 +355,42 @@
|
||||
a\n\n\n\rb
|
||||
a\r
|
||||
|
||||
/\H\h\V\v/8
|
||||
X X\x0a
|
||||
X\x09X\x0b
|
||||
** Failers
|
||||
\x{a0} X\x0a
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c
|
||||
** Failers
|
||||
\x09\x20\x{a0}\x0a\x0b
|
||||
|
||||
/\H\h\V\v/8
|
||||
\x{3001}\x{3000}\x{2030}\x{2028}
|
||||
X\x{180e}X\x{85}
|
||||
** Failers
|
||||
\x{2009} X\x0a
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
|
||||
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
|
||||
\x09\x20\x{202f}\x0a\x0b\x0c
|
||||
** Failers
|
||||
\x09\x{200a}\x{a0}\x{2028}\x0b
|
||||
|
||||
/[\h]/8BZ
|
||||
>\x{1680}
|
||||
|
||||
/[\h]{3,}/8BZ
|
||||
>\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
|
||||
|
||||
/[\v]/8BZ
|
||||
|
||||
/[\H]/8BZ
|
||||
|
||||
/[\V]/8BZ
|
||||
|
||||
/ End of testinput5 /
|
||||
|
38
ext/pcre/pcrelib/testdata/testinput6
vendored
38
ext/pcre/pcrelib/testdata/testinput6
vendored
@ -323,20 +323,20 @@
|
||||
** Failers
|
||||
WXYZ
|
||||
|
||||
/[\p{L}]/D
|
||||
/[\p{L}]/DZ
|
||||
|
||||
/[\p{^L}]/D
|
||||
/[\p{^L}]/DZ
|
||||
|
||||
/[\P{L}]/D
|
||||
/[\P{L}]/DZ
|
||||
|
||||
/[\P{^L}]/D
|
||||
/[\P{^L}]/DZ
|
||||
|
||||
/[abc\p{L}\x{0660}]/8D
|
||||
/[abc\p{L}\x{0660}]/8DZ
|
||||
|
||||
/[\p{Nd}]/8DM
|
||||
/[\p{Nd}]/8DZ
|
||||
1234
|
||||
|
||||
/[\p{Nd}+-]+/8DM
|
||||
/[\p{Nd}+-]+/8DZ
|
||||
1234
|
||||
12-34
|
||||
12+\x{661}-34
|
||||
@ -425,13 +425,13 @@
|
||||
A\x{391}\x{10427}\x{ff5a}\x{1fb0}
|
||||
A\x{391}\x{10427}\x{ff3a}\x{1fb8}
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iD
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8D
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
|
||||
|
||||
/AB\x{1fb0}/8D
|
||||
/AB\x{1fb0}/8DZ
|
||||
|
||||
/AB\x{1fb0}/8Di
|
||||
/AB\x{1fb0}/8DZi
|
||||
|
||||
/\x{391}+/8i
|
||||
\x{391}\x{3b1}\x{3b1}\x{3b1}\x{391}
|
||||
@ -452,7 +452,7 @@
|
||||
\x{c0}
|
||||
\x{e0}
|
||||
|
||||
/[\x{105}-\x{109}]/8iD
|
||||
/[\x{105}-\x{109}]/8iDZ
|
||||
\x{104}
|
||||
\x{105}
|
||||
\x{109}
|
||||
@ -460,7 +460,7 @@
|
||||
\x{100}
|
||||
\x{10a}
|
||||
|
||||
/[z-\x{100}]/8iD
|
||||
/[z-\x{100}]/8iDZ
|
||||
Z
|
||||
z
|
||||
\x{39c}
|
||||
@ -475,7 +475,7 @@
|
||||
Y
|
||||
y
|
||||
|
||||
/[z-\x{100}]/8Di
|
||||
/[z-\x{100}]/8DZi
|
||||
|
||||
/^\X/8
|
||||
A
|
||||
@ -762,4 +762,14 @@ of case for anything other than the ASCII letters. /
|
||||
/^\p{Balinese}\p{Cuneiform}\p{Nko}\p{Phags_Pa}\p{Phoenician}/8
|
||||
\x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
|
||||
|
||||
/The next two are special cases where the lengths of the different cases of the
|
||||
same character differ. The first went wrong with heap fram storage; the 2nd
|
||||
was broken in all cases./
|
||||
|
||||
/^\x{023a}+?(\x{0130}+)/8i
|
||||
\x{023a}\x{2c65}\x{0130}
|
||||
|
||||
/^\x{023a}+([^X])/8i
|
||||
\x{023a}\x{2c65}X
|
||||
|
||||
/ End of testinput6 /
|
||||
|
73
ext/pcre/pcrelib/testdata/testinput7
vendored
73
ext/pcre/pcrelib/testdata/testinput7
vendored
@ -1931,8 +1931,8 @@
|
||||
/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
|
||||
abcdefghijk\12S
|
||||
|
||||
/ab\hdef/
|
||||
abhdef
|
||||
/ab\idef/
|
||||
abidef
|
||||
|
||||
/a{0}bc/
|
||||
bc
|
||||
@ -4229,4 +4229,73 @@
|
||||
\r\nfoo
|
||||
\nfoo
|
||||
|
||||
/^$/mg<any>
|
||||
abc\r\rxyz
|
||||
abc\n\rxyz
|
||||
** Failers
|
||||
abc\r\nxyz
|
||||
|
||||
/^X/m
|
||||
XABC
|
||||
** Failers
|
||||
XABC\B
|
||||
|
||||
/(?m)^$/<any>g+
|
||||
abc\r\n\r\n
|
||||
|
||||
/(?m)^$|^\r\n/<any>g+
|
||||
abc\r\n\r\n
|
||||
|
||||
/(?m)$/<any>g+
|
||||
abc\r\n\r\n
|
||||
|
||||
/(?|(abc)|(xyz))/
|
||||
>abc<
|
||||
>xyz<
|
||||
|
||||
/(x)(?|(abc)|(xyz))(x)/
|
||||
xabcx
|
||||
xxyzx
|
||||
|
||||
/(x)(?|(abc)(pqr)|(xyz))(x)/
|
||||
xabcpqrx
|
||||
xxyzx
|
||||
|
||||
/(?|(abc)|(xyz))(?1)/
|
||||
abcabc
|
||||
xyzabc
|
||||
** Failers
|
||||
xyzxyz
|
||||
|
||||
/\H\h\V\v/
|
||||
X X\x0a
|
||||
X\x09X\x0b
|
||||
** Failers
|
||||
\xa0 X\x0a
|
||||
|
||||
/\H*\h+\V?\v{3,4}/
|
||||
\x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\xa0\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\xa0\x0a\x0b\x0c
|
||||
** Failers
|
||||
\x09\x20\xa0\x0a\x0b
|
||||
|
||||
/\H{3,4}/
|
||||
XY ABCDE
|
||||
XY PQR ST
|
||||
|
||||
/.\h{3,4}./
|
||||
XY AB PQRS
|
||||
|
||||
/\h*X\h?\H+Y\H?Z/
|
||||
>XNNNYZ
|
||||
> X NYQZ
|
||||
** Failers
|
||||
>XYZ
|
||||
> X NY Z
|
||||
|
||||
/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/
|
||||
>XY\x0aZ\x0aA\x0bNN\x0c
|
||||
>\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
|
||||
/ End of testinput7 /
|
||||
|
38
ext/pcre/pcrelib/testdata/testinput8
vendored
38
ext/pcre/pcrelib/testdata/testinput8
vendored
@ -590,4 +590,42 @@
|
||||
a\n\n\n\rb
|
||||
a\r
|
||||
|
||||
/\h+\V?\v{3,4}/8
|
||||
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
|
||||
/\V?\v{3,4}/8
|
||||
\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
|
||||
/\h+\V?\v{3,4}/8
|
||||
>\x09\x20\x{a0}X\x0a\x0a\x0a<
|
||||
|
||||
/\V?\v{3,4}/8
|
||||
>\x09\x20\x{a0}X\x0a\x0a\x0a<
|
||||
|
||||
/\H\h\V\v/8
|
||||
X X\x0a
|
||||
X\x09X\x0b
|
||||
** Failers
|
||||
\x{a0} X\x0a
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c
|
||||
** Failers
|
||||
\x09\x20\x{a0}\x0a\x0b
|
||||
|
||||
/\H\h\V\v/8
|
||||
\x{3001}\x{3000}\x{2030}\x{2028}
|
||||
X\x{180e}X\x{85}
|
||||
** Failers
|
||||
\x{2009} X\x0a
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
|
||||
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
|
||||
\x09\x20\x{202f}\x0a\x0b\x0c
|
||||
** Failers
|
||||
\x09\x{200a}\x{a0}\x{2028}\x0b
|
||||
|
||||
/ End of testinput 8 /
|
||||
|
6
ext/pcre/pcrelib/testdata/testinput9
vendored
6
ext/pcre/pcrelib/testdata/testinput9
vendored
@ -813,4 +813,10 @@
|
||||
\x{1c5}XY
|
||||
AXY
|
||||
|
||||
/^\x{023a}+?(\x{0130}+)/8i
|
||||
\x{023a}\x{2c65}\x{0130}
|
||||
|
||||
/^\x{023a}+([^X])/8i
|
||||
\x{023a}\x{2c65}X
|
||||
|
||||
/ End /
|
||||
|
11
ext/pcre/pcrelib/testdata/testoutput1
vendored
11
ext/pcre/pcrelib/testdata/testoutput1
vendored
@ -2189,9 +2189,9 @@ No match
|
||||
10: j
|
||||
11: k
|
||||
|
||||
/ab\hdef/
|
||||
abhdef
|
||||
0: abhdef
|
||||
/ab\idef/
|
||||
abidef
|
||||
0: abidef
|
||||
|
||||
/a{0}bc/
|
||||
bc
|
||||
@ -6571,4 +6571,9 @@ No match
|
||||
abc\n
|
||||
No match
|
||||
|
||||
/(.*(.)?)*/
|
||||
abcd
|
||||
0: abcd
|
||||
1:
|
||||
|
||||
/ End of testinput1 /
|
||||
|
563
ext/pcre/pcrelib/testdata/testoutput10
vendored
Normal file
563
ext/pcre/pcrelib/testdata/testoutput10
vendored
Normal file
@ -0,0 +1,563 @@
|
||||
/-- These are a few representative patterns whose lengths and offsets are to be
|
||||
shown when the link size is 2. This is just a doublecheck test to ensure the
|
||||
sizes don't go horribly wrong when something is changed. The pattern contents
|
||||
are all themselves checked in other tests. --/
|
||||
|
||||
/((?i)b)/BM
|
||||
Memory allocation (code space): 21
|
||||
------------------------------------------------------------------
|
||||
0 17 Bra 0
|
||||
3 9 Bra 1
|
||||
8 01 Opt
|
||||
10 NC b
|
||||
12 9 Ket
|
||||
15 00 Opt
|
||||
17 17 Ket
|
||||
20 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s)(.*X|^B)/BM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 9 Bra 1
|
||||
8 Any*
|
||||
10 X
|
||||
12 6 Alt
|
||||
15 ^
|
||||
16 B
|
||||
18 15 Ket
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?s:.*X|^B)/BM
|
||||
Memory allocation (code space): 29
|
||||
------------------------------------------------------------------
|
||||
0 25 Bra 0
|
||||
3 9 Bra 0
|
||||
6 04 Opt
|
||||
8 Any*
|
||||
10 X
|
||||
12 8 Alt
|
||||
15 04 Opt
|
||||
17 ^
|
||||
18 B
|
||||
20 17 Ket
|
||||
23 00 Opt
|
||||
25 25 Ket
|
||||
28 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[[:alnum:]]/BM
|
||||
Memory allocation (code space): 41
|
||||
------------------------------------------------------------------
|
||||
0 37 Bra 0
|
||||
3 ^
|
||||
4 [0-9A-Za-z]
|
||||
37 37 Ket
|
||||
40 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/#/IxMD
|
||||
Memory allocation (code space): 7
|
||||
------------------------------------------------------------------
|
||||
0 3 Bra 0
|
||||
3 3 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: extended
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/a#/IxMD
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: extended
|
||||
First char = 'a'
|
||||
No need char
|
||||
|
||||
/x?+/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
3 x?+
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x++/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
3 x++
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/x{1,3}+/BM
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
3 9 Once
|
||||
6 x
|
||||
8 x{0,2}
|
||||
12 9 Ket
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(x)*+/BM
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 20 Bra 0
|
||||
3 14 Once
|
||||
6 Brazero
|
||||
7 7 Bra 1
|
||||
12 x
|
||||
14 7 KetRmax
|
||||
17 14 Ket
|
||||
20 20 Ket
|
||||
23 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/BM
|
||||
Memory allocation (code space): 120
|
||||
------------------------------------------------------------------
|
||||
0 116 Bra 0
|
||||
3 ^
|
||||
4 109 Bra 1
|
||||
9 7 Bra 2
|
||||
14 a+
|
||||
16 7 Ket
|
||||
19 39 Bra 3
|
||||
24 [ab]+?
|
||||
58 39 Ket
|
||||
61 39 Bra 4
|
||||
66 [bc]+
|
||||
100 39 Ket
|
||||
103 7 Bra 5
|
||||
108 \w*
|
||||
110 7 Ket
|
||||
113 109 Ket
|
||||
116 116 Ket
|
||||
119 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
Memory allocation (code space): 826
|
||||
------------------------------------------------------------------
|
||||
0 822 Bra 0
|
||||
3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
821 \b
|
||||
822 822 Ket
|
||||
825 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
Memory allocation (code space): 816
|
||||
------------------------------------------------------------------
|
||||
0 812 Bra 0
|
||||
3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
811 \b
|
||||
812 812 Ket
|
||||
815 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)b)/BM
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra 0
|
||||
3 18 Bra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
13 3 Recurse
|
||||
16 6 Ket
|
||||
19 b
|
||||
21 18 Ket
|
||||
24 24 Ket
|
||||
27 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(a(?1)+b)/BM
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra 0
|
||||
3 18 Bra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
13 3 Recurse
|
||||
16 6 KetRmax
|
||||
19 b
|
||||
21 18 Ket
|
||||
24 24 Ket
|
||||
27 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/BM
|
||||
Memory allocation (code space): 42
|
||||
------------------------------------------------------------------
|
||||
0 32 Bra 0
|
||||
3 a
|
||||
5 7 Bra 1
|
||||
10 b
|
||||
12 5 Alt
|
||||
15 c
|
||||
17 12 Ket
|
||||
20 d
|
||||
22 7 Bra 2
|
||||
27 e
|
||||
29 7 Ket
|
||||
32 32 Ket
|
||||
35 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/BM
|
||||
Memory allocation (code space): 54
|
||||
------------------------------------------------------------------
|
||||
0 41 Bra 0
|
||||
3 25 Bra 0
|
||||
6 a
|
||||
8 17 Bra 1
|
||||
13 c
|
||||
15 7 Bra 2
|
||||
20 d
|
||||
22 7 Ket
|
||||
25 17 Ket
|
||||
28 25 Ket
|
||||
31 7 Bra 3
|
||||
36 a
|
||||
38 7 Ket
|
||||
41 41 Ket
|
||||
44 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
|
||||
Memory allocation (code space): 43
|
||||
------------------------------------------------------------------
|
||||
0 36 Bra 0
|
||||
3 7 Bra 1
|
||||
8 a
|
||||
10 7 Ket
|
||||
13 Any
|
||||
14 Any
|
||||
15 Any
|
||||
16 \1
|
||||
19 bbb
|
||||
25 6 Once
|
||||
28 3 Recurse
|
||||
31 6 Ket
|
||||
34 d
|
||||
36 36 Ket
|
||||
39 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abc(?C255)de(?C)f/BM
|
||||
Memory allocation (code space): 31
|
||||
------------------------------------------------------------------
|
||||
0 27 Bra 0
|
||||
3 abc
|
||||
9 Callout 255 10 1
|
||||
15 de
|
||||
19 Callout 0 16 1
|
||||
25 f
|
||||
27 27 Ket
|
||||
30 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/abcde/CBM
|
||||
Memory allocation (code space): 53
|
||||
------------------------------------------------------------------
|
||||
0 49 Bra 0
|
||||
3 Callout 255 0 1
|
||||
9 a
|
||||
11 Callout 255 1 1
|
||||
17 b
|
||||
19 Callout 255 2 1
|
||||
25 c
|
||||
27 Callout 255 3 1
|
||||
33 d
|
||||
35 Callout 255 4 1
|
||||
41 e
|
||||
43 Callout 255 5 0
|
||||
49 49 Ket
|
||||
52 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100}/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
3 \x{100}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{1000}/8BM
|
||||
Memory allocation (code space): 11
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 \x{1000}
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{10000}/8BM
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
3 \x{10000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{100000}/8BM
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
3 \x{100000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{1000000}/8BM
|
||||
Memory allocation (code space): 13
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra 0
|
||||
3 \x{1000000}
|
||||
9 9 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{4000000}/8BM
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra 0
|
||||
3 \x{4000000}
|
||||
10 10 Ket
|
||||
13 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{7fffFFFF}/8BM
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra 0
|
||||
3 \x{7fffffff}
|
||||
10 10 Ket
|
||||
13 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{ff}]/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{100}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\x{100}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x80/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
3 \x{80}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\xff/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 A\x{2262}\x{391}.
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 'A'
|
||||
Need char = '.'
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8M
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
3 \x{d55c}\x{ad6d}\x{c5b4}
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 237
|
||||
Need char = 180
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8M
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
3 \x{65e5}\x{672c}\x{8a9e}
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 230
|
||||
Need char = 158
|
||||
|
||||
/[\x{100}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\x{100}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[Z\x{100}]/8BM
|
||||
Memory allocation (code space): 47
|
||||
------------------------------------------------------------------
|
||||
0 43 Bra 0
|
||||
3 [Z\x{100}]
|
||||
43 43 Ket
|
||||
46 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/B8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E]/B8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^[\QĀ\E-\QŐ\E/B8M
|
||||
Failed: missing terminating ] for character class at offset 15
|
||||
|
||||
/[\p{L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{^L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{^L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[abc\p{L}\x{0660}]/8BM
|
||||
Memory allocation (code space): 50
|
||||
------------------------------------------------------------------
|
||||
0 46 Bra 0
|
||||
3 [a-c\p{L}\x{660}]
|
||||
46 46 Ket
|
||||
49 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{Nd}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\p{Nd}+-]+/8BM
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra 0
|
||||
3 [+\-\p{Nd}]+
|
||||
44 44 Ket
|
||||
47 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iBM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 NC A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8BM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\x{105}-\x{109}]/8iBM
|
||||
Memory allocation (code space): 17
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra 0
|
||||
3 [\x{104}-\x{109}]
|
||||
13 13 Ket
|
||||
16 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/ End of testinput10 /
|
3912
ext/pcre/pcrelib/testdata/testoutput2
vendored
3912
ext/pcre/pcrelib/testdata/testoutput2
vendored
File diff suppressed because it is too large
Load Diff
14
ext/pcre/pcrelib/testdata/testoutput3
vendored
14
ext/pcre/pcrelib/testdata/testoutput3
vendored
@ -146,14 +146,14 @@ No match
|
||||
>>>\xba<<<
|
||||
0: º
|
||||
|
||||
/[[:alpha:]][[:lower:]][[:upper:]]/DLfr_FR
|
||||
/[[:alpha:]][[:lower:]][[:upper:]]/DZLfr_FR
|
||||
------------------------------------------------------------------
|
||||
0 102 Bra 0
|
||||
3 [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff]
|
||||
36 [a-z\xb5\xdf-\xf6\xf8-\xff]
|
||||
69 [A-Z\xc0-\xd6\xd8-\xde]
|
||||
102 102 Ket
|
||||
105 End
|
||||
Bra 0
|
||||
[A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff]
|
||||
[a-z\xb5\xdf-\xf6\xf8-\xff]
|
||||
[A-Z\xc0-\xd6\xd8-\xde]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
|
1053
ext/pcre/pcrelib/testdata/testoutput5
vendored
1053
ext/pcre/pcrelib/testdata/testoutput5
vendored
File diff suppressed because it is too large
Load Diff
156
ext/pcre/pcrelib/testdata/testoutput6
vendored
156
ext/pcre/pcrelib/testdata/testoutput6
vendored
@ -548,73 +548,72 @@ No match
|
||||
WXYZ
|
||||
No match
|
||||
|
||||
/[\p{L}]/D
|
||||
/[\p{L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
[\p{L}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[\p{^L}]/D
|
||||
/[\p{^L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
[\P{L}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[\P{L}]/D
|
||||
/[\P{L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
[\P{L}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[\P{^L}]/D
|
||||
/[\P{^L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
[\p{L}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[abc\p{L}\x{0660}]/8D
|
||||
/[abc\p{L}\x{0660}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
0 46 Bra 0
|
||||
3 [a-c\p{L}\x{660}]
|
||||
46 46 Ket
|
||||
49 End
|
||||
Bra 0
|
||||
[a-c\p{L}\x{660}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[\p{Nd}]/8DM
|
||||
Memory allocation (code space): 15
|
||||
/[\p{Nd}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 [\p{Nd}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
[\p{Nd}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
@ -623,13 +622,12 @@ No need char
|
||||
1234
|
||||
0: 1
|
||||
|
||||
/[\p{Nd}+-]+/8DM
|
||||
Memory allocation (code space): 48
|
||||
/[\p{Nd}+-]+/8DZ
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra 0
|
||||
3 [+\-\p{Nd}]+
|
||||
44 44 Ket
|
||||
47 End
|
||||
Bra 0
|
||||
[+\-\p{Nd}]+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
@ -779,48 +777,48 @@ No match
|
||||
A\x{391}\x{10427}\x{ff3a}\x{1fb8}
|
||||
0: A\x{391}\x{10427}\x{ff3a}\x{1fb8}
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iD
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 NC A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
Bra 0
|
||||
NC A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless utf8
|
||||
First char = 'A' (caseless)
|
||||
No need char
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8D
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
Bra 0
|
||||
A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 'A'
|
||||
Need char = 176
|
||||
|
||||
/AB\x{1fb0}/8D
|
||||
/AB\x{1fb0}/8DZ
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 AB\x{1fb0}
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
AB\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 'A'
|
||||
Need char = 176
|
||||
|
||||
/AB\x{1fb0}/8Di
|
||||
/AB\x{1fb0}/8DZi
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 NC AB\x{1fb0}
|
||||
11 11 Ket
|
||||
14 End
|
||||
Bra 0
|
||||
NC AB\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless utf8
|
||||
@ -857,12 +855,12 @@ Need char = 'B' (caseless)
|
||||
\x{e0}
|
||||
0: \x{e0}
|
||||
|
||||
/[\x{105}-\x{109}]/8iD
|
||||
/[\x{105}-\x{109}]/8iDZ
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra 0
|
||||
3 [\x{104}-\x{109}]
|
||||
13 13 Ket
|
||||
16 End
|
||||
Bra 0
|
||||
[\x{104}-\x{109}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless utf8
|
||||
@ -881,12 +879,12 @@ No match
|
||||
\x{10a}
|
||||
No match
|
||||
|
||||
/[z-\x{100}]/8iD
|
||||
/[z-\x{100}]/8iDZ
|
||||
------------------------------------------------------------------
|
||||
0 20 Bra 0
|
||||
3 [Z\x{39c}\x{178}z-\x{101}]
|
||||
20 20 Ket
|
||||
23 End
|
||||
Bra 0
|
||||
[Z\x{39c}\x{178}z-\x{101}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless utf8
|
||||
@ -919,12 +917,12 @@ No match
|
||||
y
|
||||
No match
|
||||
|
||||
/[z-\x{100}]/8Di
|
||||
/[z-\x{100}]/8DZi
|
||||
------------------------------------------------------------------
|
||||
0 20 Bra 0
|
||||
3 [Z\x{39c}\x{178}z-\x{101}]
|
||||
20 20 Ket
|
||||
23 End
|
||||
Bra 0
|
||||
[Z\x{39c}\x{178}z-\x{101}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: caseless utf8
|
||||
@ -1432,4 +1430,18 @@ of case for anything other than the ASCII letters. /
|
||||
\x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
|
||||
0: \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
|
||||
|
||||
/The next two are special cases where the lengths of the different cases of the
|
||||
same character differ. The first went wrong with heap fram storage; the 2nd
|
||||
was broken in all cases./
|
||||
|
||||
/^\x{023a}+?(\x{0130}+)/8i
|
||||
\x{023a}\x{2c65}\x{0130}
|
||||
0: \x{23a}\x{2c65}\x{130}
|
||||
1: \x{130}
|
||||
|
||||
/^\x{023a}+([^X])/8i
|
||||
\x{023a}\x{2c65}X
|
||||
0: \x{23a}\x{2c65}
|
||||
1: \x{2c65}
|
||||
|
||||
/ End of testinput6 /
|
||||
|
126
ext/pcre/pcrelib/testdata/testoutput7
vendored
126
ext/pcre/pcrelib/testdata/testoutput7
vendored
@ -3039,9 +3039,9 @@ No match
|
||||
abcdefghijk\12S
|
||||
0: abcdefghijk\x0aS
|
||||
|
||||
/ab\hdef/
|
||||
abhdef
|
||||
0: abhdef
|
||||
/ab\idef/
|
||||
abidef
|
||||
0: abidef
|
||||
|
||||
/a{0}bc/
|
||||
bc
|
||||
@ -6952,4 +6952,124 @@ No match
|
||||
\nfoo
|
||||
0: \x0afoo
|
||||
|
||||
/^$/mg<any>
|
||||
abc\r\rxyz
|
||||
0:
|
||||
abc\n\rxyz
|
||||
0:
|
||||
** Failers
|
||||
No match
|
||||
abc\r\nxyz
|
||||
No match
|
||||
|
||||
/^X/m
|
||||
XABC
|
||||
0: X
|
||||
** Failers
|
||||
No match
|
||||
XABC\B
|
||||
No match
|
||||
|
||||
/(?m)^$/<any>g+
|
||||
abc\r\n\r\n
|
||||
0:
|
||||
0+ \x0d\x0a
|
||||
|
||||
/(?m)^$|^\r\n/<any>g+
|
||||
abc\r\n\r\n
|
||||
0: \x0d\x0a
|
||||
0+
|
||||
1:
|
||||
|
||||
/(?m)$/<any>g+
|
||||
abc\r\n\r\n
|
||||
0:
|
||||
0+ \x0d\x0a\x0d\x0a
|
||||
0:
|
||||
0+ \x0d\x0a
|
||||
0:
|
||||
0+
|
||||
|
||||
/(?|(abc)|(xyz))/
|
||||
>abc<
|
||||
0: abc
|
||||
>xyz<
|
||||
0: xyz
|
||||
|
||||
/(x)(?|(abc)|(xyz))(x)/
|
||||
xabcx
|
||||
0: xabcx
|
||||
xxyzx
|
||||
0: xxyzx
|
||||
|
||||
/(x)(?|(abc)(pqr)|(xyz))(x)/
|
||||
xabcpqrx
|
||||
0: xabcpqrx
|
||||
xxyzx
|
||||
0: xxyzx
|
||||
|
||||
/(?|(abc)|(xyz))(?1)/
|
||||
abcabc
|
||||
0: abcabc
|
||||
xyzabc
|
||||
0: xyzabc
|
||||
** Failers
|
||||
No match
|
||||
xyzxyz
|
||||
No match
|
||||
|
||||
/\H\h\V\v/
|
||||
X X\x0a
|
||||
0: X X\x0a
|
||||
X\x09X\x0b
|
||||
0: X\x09X\x0b
|
||||
** Failers
|
||||
No match
|
||||
\xa0 X\x0a
|
||||
No match
|
||||
|
||||
/\H*\h+\V?\v{3,4}/
|
||||
\x09\x20\xa0X\x0a\x0b\x0c\x0d\x0a
|
||||
0: \x09 \xa0X\x0a\x0b\x0c\x0d
|
||||
1: \x09 \xa0X\x0a\x0b\x0c
|
||||
\x09\x20\xa0\x0a\x0b\x0c\x0d\x0a
|
||||
0: \x09 \xa0\x0a\x0b\x0c\x0d
|
||||
1: \x09 \xa0\x0a\x0b\x0c
|
||||
\x09\x20\xa0\x0a\x0b\x0c
|
||||
0: \x09 \xa0\x0a\x0b\x0c
|
||||
** Failers
|
||||
No match
|
||||
\x09\x20\xa0\x0a\x0b
|
||||
No match
|
||||
|
||||
/\H{3,4}/
|
||||
XY ABCDE
|
||||
0: ABCD
|
||||
1: ABC
|
||||
XY PQR ST
|
||||
0: PQR
|
||||
|
||||
/.\h{3,4}./
|
||||
XY AB PQRS
|
||||
0: B P
|
||||
1: B
|
||||
|
||||
/\h*X\h?\H+Y\H?Z/
|
||||
>XNNNYZ
|
||||
0: XNNNYZ
|
||||
> X NYQZ
|
||||
0: X NYQZ
|
||||
** Failers
|
||||
No match
|
||||
>XYZ
|
||||
No match
|
||||
> X NY Z
|
||||
No match
|
||||
|
||||
/\v*X\v?Y\v+Z\V*\x0a\V+\x0b\V{2,3}\x0c/
|
||||
>XY\x0aZ\x0aA\x0bNN\x0c
|
||||
0: XY\x0aZ\x0aA\x0bNN\x0c
|
||||
>\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
|
||||
/ End of testinput7 /
|
||||
|
66
ext/pcre/pcrelib/testdata/testoutput8
vendored
66
ext/pcre/pcrelib/testdata/testoutput8
vendored
@ -1138,4 +1138,70 @@ No match
|
||||
a\r
|
||||
No match
|
||||
|
||||
/\h+\V?\v{3,4}/8
|
||||
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d}
|
||||
1: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}
|
||||
|
||||
/\V?\v{3,4}/8
|
||||
\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
0: X\x{0a}\x{0b}\x{0c}\x{0d}
|
||||
1: X\x{0a}\x{0b}\x{0c}
|
||||
|
||||
/\h+\V?\v{3,4}/8
|
||||
>\x09\x20\x{a0}X\x0a\x0a\x0a<
|
||||
0: \x{09} \x{a0}X\x{0a}\x{0a}\x{0a}
|
||||
|
||||
/\V?\v{3,4}/8
|
||||
>\x09\x20\x{a0}X\x0a\x0a\x0a<
|
||||
0: X\x{0a}\x{0a}\x{0a}
|
||||
|
||||
/\H\h\V\v/8
|
||||
X X\x0a
|
||||
0: X X\x{0a}
|
||||
X\x09X\x0b
|
||||
0: X\x{09}X\x{0b}
|
||||
** Failers
|
||||
No match
|
||||
\x{a0} X\x0a
|
||||
No match
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
|
||||
0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d}
|
||||
1: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
|
||||
0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d}
|
||||
1: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}
|
||||
\x09\x20\x{a0}\x0a\x0b\x0c
|
||||
0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}
|
||||
** Failers
|
||||
No match
|
||||
\x09\x20\x{a0}\x0a\x0b
|
||||
No match
|
||||
|
||||
/\H\h\V\v/8
|
||||
\x{3001}\x{3000}\x{2030}\x{2028}
|
||||
0: \x{3001}\x{3000}\x{2030}\x{2028}
|
||||
X\x{180e}X\x{85}
|
||||
0: X\x{180e}X\x{85}
|
||||
** Failers
|
||||
No match
|
||||
\x{2009} X\x0a
|
||||
No match
|
||||
|
||||
/\H*\h+\V?\v{3,4}/8
|
||||
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
|
||||
0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d}
|
||||
1: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}
|
||||
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
|
||||
0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028}
|
||||
1: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}
|
||||
\x09\x20\x{202f}\x0a\x0b\x0c
|
||||
0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c}
|
||||
** Failers
|
||||
No match
|
||||
\x09\x{200a}\x{a0}\x{2028}\x0b
|
||||
No match
|
||||
|
||||
/ End of testinput 8 /
|
||||
|
8
ext/pcre/pcrelib/testdata/testoutput9
vendored
8
ext/pcre/pcrelib/testdata/testoutput9
vendored
@ -1624,4 +1624,12 @@ No match
|
||||
AXY
|
||||
No match
|
||||
|
||||
/^\x{023a}+?(\x{0130}+)/8i
|
||||
\x{023a}\x{2c65}\x{0130}
|
||||
0: \x{23a}\x{2c65}\x{130}
|
||||
|
||||
/^\x{023a}+([^X])/8i
|
||||
\x{023a}\x{2c65}X
|
||||
0: \x{23a}\x{2c65}
|
||||
|
||||
/ End /
|
||||
|
@ -2,7 +2,7 @@
|
||||
property table. See ucpinternal.h for a description of the layout.
|
||||
This version was made from the Unicode 5.0.0 tables. */
|
||||
|
||||
static cnode ucp_table[] = {
|
||||
static const cnode ucp_table[] = {
|
||||
{ 0x09800000, 0x0000001f },
|
||||
{ 0x09000020, 0x74000000 },
|
||||
{ 0x09800021, 0x54000002 },
|
102
ext/pcre/upgrade-pcre.php
Normal file
102
ext/pcre/upgrade-pcre.php
Normal file
@ -0,0 +1,102 @@
|
||||
<?php
|
||||
|
||||
// script to upgrade PCRE. just drop the pcre-x.x.tar.xx here and run the script
|
||||
|
||||
$pattern = 'pcre-*.tar.*';
|
||||
$newpcre = glob($pattern);
|
||||
|
||||
if (count($newpcre) > 1) {
|
||||
echo "more than one '$pattern' file. aborting\n";
|
||||
print_r($newpcre);
|
||||
exit;
|
||||
}
|
||||
|
||||
if (count($newpcre) == 0) {
|
||||
die("need one '$pattern' file. aborting.\n");
|
||||
}
|
||||
|
||||
|
||||
$newpcre = $newpcre[0];
|
||||
|
||||
if (strpos($newpcre, 'gz')) {
|
||||
system("tar xfz $newpcre");
|
||||
} elseif (strpos($newpcre, 'bz2')) {
|
||||
system("tar xfj $newpcre");
|
||||
} else {
|
||||
die("file type not recognized: $newpcre\n");
|
||||
}
|
||||
|
||||
$newpcre = substr($newpcre, 0, strpos($newpcre, '.tar'));
|
||||
$dirlen = strlen('pcrelib');
|
||||
|
||||
function recurse($path)
|
||||
{
|
||||
global $newpcre, $dirlen;
|
||||
|
||||
foreach(scandir($path) as $file) {
|
||||
|
||||
if ($file[0] === '.' || $file === 'CVS') continue;
|
||||
if (substr_compare($file, '.lo', -3, 3) == 0 || substr_compare($file, '.o', -2, 2) == 0) continue;
|
||||
|
||||
$file = "$path/$file";
|
||||
|
||||
if (is_dir($file)) {
|
||||
recurse($file);
|
||||
continue;
|
||||
}
|
||||
|
||||
echo "processing $file... ";
|
||||
|
||||
$newfile = $newpcre . substr($file, $dirlen);
|
||||
|
||||
if (is_file($tmp = $newfile . '.generic') || is_file($tmp = $newfile . '.dist')) {
|
||||
$newfile = $tmp;
|
||||
}
|
||||
|
||||
|
||||
if (!is_file($newfile)) {
|
||||
die("$newfile is not available any more\n");
|
||||
}
|
||||
|
||||
copy($newfile, $file);
|
||||
echo "OK\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
recurse('pcrelib');
|
||||
|
||||
$dirorig = scandir('pcrelib/testdata');
|
||||
$k = array_search('CVS', $dirorig);
|
||||
unset($dirorig[$k]);
|
||||
|
||||
$dirnew = scandir("$newpcre/testdata");
|
||||
$diff = array_diff($dirorig, $dirnew);
|
||||
|
||||
foreach ($diff as $file) {
|
||||
$file2 = 'pcrelib'.substr($file, strlen($newpcre));
|
||||
copy($file, $file2);
|
||||
}
|
||||
|
||||
|
||||
// the config.h needs special care
|
||||
$prepend_config_h = '
|
||||
#include <php_compat.h>
|
||||
#undef PACKAGE_NAME
|
||||
#undef PACKAGE_VERSION
|
||||
#undef PACKAGE_TARNAME
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
#define SUPPORT_UCP
|
||||
#define SUPPORT_UTF8
|
||||
|
||||
|
||||
';
|
||||
|
||||
file_put_contents('pcrelib/config.h', $prepend_config_h . file_get_contents('pcrelib/config.h'));
|
||||
|
||||
|
||||
echo "\nThe End :-)\n\n"
|
||||
|
||||
?>
|
Loading…
Reference in New Issue
Block a user