mirror of
https://github.com/php/php-src.git
synced 2025-01-22 03:34:19 +08:00
Upgrade PCRE lib to 6.7
This commit is contained in:
parent
307b3bcbb4
commit
45debc52ef
@ -5,7 +5,7 @@ ARG_WITH("pcre-regex", "Perl Compatible Regular Expressions", "yes");
|
||||
|
||||
if (PHP_PCRE_REGEX == "yes") {
|
||||
EXTENSION("pcre", "php_pcre.c", PHP_PCRE_REGEX_SHARED,
|
||||
"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DNO_RECURSE -Iext/pcre/pcrelib");
|
||||
"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DNO_RECURSE -Iext/pcre/pcrelib");
|
||||
ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
|
||||
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
|
||||
|
||||
|
@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
if test "$PHP_PCRE_REGEX" = "yes"; then
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -I@ext_srcdir@/pcrelib)
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -I@ext_srcdir@/pcrelib)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
|
||||
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
|
||||
@ -51,7 +51,7 @@ if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
|
||||
AC_DEFINE(HAVE_PCRE, 1, [ ])
|
||||
PHP_ADD_INCLUDE($PCRE_INCDIR)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000)
|
||||
fi
|
||||
PHP_SUBST(PCRE_SHARED_LIBADD)
|
||||
fi
|
||||
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ THE C++ WRAPPER LIBRARY
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2005 Google Inc
|
||||
Copyright (c) 2006 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
||||
|
@ -22,7 +22,7 @@ Email domain: cam.ac.uk
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England. Phone: +44 1223 334714.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2006 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2005, Google Inc.
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -1,6 +1,175 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 6.7 04-Jul-06
|
||||
---------------------
|
||||
|
||||
1. In order to handle tests when input lines are enormously long, pcretest has
|
||||
been re-factored so that it automatically extends its buffers when
|
||||
necessary. The code is crude, but this _is_ just a test program. The
|
||||
default size has been increased from 32K to 50K.
|
||||
|
||||
2. The code in pcre_study() was using the value of the re argument before
|
||||
testing it for NULL. (Of course, in any sensible call of the function, it
|
||||
won't be NULL.)
|
||||
|
||||
3. The memmove() emulation function in pcre_internal.h, which is used on
|
||||
systems that lack both memmove() and bcopy() - that is, hardly ever -
|
||||
was missing a "static" storage class specifier.
|
||||
|
||||
4. When UTF-8 mode was not set, PCRE looped when compiling certain patterns
|
||||
containing an extended class (one that cannot be represented by a bitmap
|
||||
because it contains high-valued characters or Unicode property items, e.g.
|
||||
[\pZ]). Almost always one would set UTF-8 mode when processing such a
|
||||
pattern, but PCRE should not loop if you do not (it no longer does).
|
||||
[Detail: two cases were found: (a) a repeated subpattern containing an
|
||||
extended class; (b) a recursive reference to a subpattern that followed a
|
||||
previous extended class. It wasn't skipping over the extended class
|
||||
correctly when UTF-8 mode was not set.]
|
||||
|
||||
5. A negated single-character class was not being recognized as fixed-length
|
||||
in lookbehind assertions such as (?<=[^f]), leading to an incorrect
|
||||
compile error "lookbehind assertion is not fixed length".
|
||||
|
||||
6. The RunPerlTest auxiliary script was showing an unexpected difference
|
||||
between PCRE and Perl for UTF-8 tests. It turns out that it is hard to
|
||||
write a Perl script that can interpret lines of an input file either as
|
||||
byte characters or as UTF-8, which is what "perltest" was being required to
|
||||
do for the non-UTF-8 and UTF-8 tests, respectively. Essentially what you
|
||||
can't do is switch easily at run time between having the "use utf8;" pragma
|
||||
or not. In the end, I fudged it by using the RunPerlTest script to insert
|
||||
"use utf8;" explicitly for the UTF-8 tests.
|
||||
|
||||
7. In multiline (/m) mode, PCRE was matching ^ after a terminating newline at
|
||||
the end of the subject string, contrary to the documentation and to what
|
||||
Perl does. This was true of both matching functions. Now it matches only at
|
||||
the start of the subject and immediately after *internal* newlines.
|
||||
|
||||
8. A call of pcre_fullinfo() from pcretest to get the option bits was passing
|
||||
a pointer to an int instead of a pointer to an unsigned long int. This
|
||||
caused problems on 64-bit systems.
|
||||
|
||||
9. Applied a patch from the folks at Google to pcrecpp.cc, to fix "another
|
||||
instance of the 'standard' template library not being so standard".
|
||||
|
||||
10. There was no check on the number of named subpatterns nor the maximum
|
||||
length of a subpattern name. The product of these values is used to compute
|
||||
the size of the memory block for a compiled pattern. By supplying a very
|
||||
long subpattern name and a large number of named subpatterns, the size
|
||||
computation could be caused to overflow. This is now prevented by limiting
|
||||
the length of names to 32 characters, and the number of named subpatterns
|
||||
to 10,000.
|
||||
|
||||
11. Subpatterns that are repeated with specific counts have to be replicated in
|
||||
the compiled pattern. The size of memory for this was computed from the
|
||||
length of the subpattern and the repeat count. The latter is limited to
|
||||
65535, but there was no limit on the former, meaning that integer overflow
|
||||
could in principle occur. The compiled length of a repeated subpattern is
|
||||
now limited to 30,000 bytes in order to prevent this.
|
||||
|
||||
12. Added the optional facility to have named substrings with the same name.
|
||||
|
||||
13. Added the ability to use a named substring as a condition, using the
|
||||
Python syntax: (?(name)yes|no). This overloads (?(R)... and names that
|
||||
are numbers (not recommended). Forward references are permitted.
|
||||
|
||||
14. Added forward references in named backreferences (if you see what I mean).
|
||||
|
||||
15. In UTF-8 mode, with the PCRE_DOTALL option set, a quantified dot in the
|
||||
pattern could run off the end of the subject. For example, the pattern
|
||||
"(?s)(.{1,5})"8 did this with the subject "ab".
|
||||
|
||||
16. If PCRE_DOTALL or PCRE_MULTILINE were set, pcre_dfa_exec() behaved as if
|
||||
PCRE_CASELESS was set when matching characters that were quantified with ?
|
||||
or *.
|
||||
|
||||
17. A character class other than a single negated character that had a minimum
|
||||
but no maximum quantifier - for example [ab]{6,} - was not handled
|
||||
correctly by pce_dfa_exec(). It would match only one character.
|
||||
|
||||
18. A valid (though odd) pattern that looked like a POSIX character
|
||||
class but used an invalid character after [ (for example [[,abc,]]) caused
|
||||
pcre_compile() to give the error "Failed: internal error: code overflow" or
|
||||
in some cases to crash with a glibc free() error. This could even happen if
|
||||
the pattern terminated after [[ but there just happened to be a sequence of
|
||||
letters, a binary zero, and a closing ] in the memory that followed.
|
||||
|
||||
19. Perl's treatment of octal escapes in the range \400 to \777 has changed
|
||||
over the years. Originally (before any Unicode support), just the bottom 8
|
||||
bits were taken. Thus, for example, \500 really meant \100. Nowadays the
|
||||
output from "man perlunicode" includes this:
|
||||
|
||||
The regular expression compiler produces polymorphic opcodes. That
|
||||
is, the pattern adapts to the data and automatically switches to
|
||||
the Unicode character scheme when presented with Unicode data--or
|
||||
instead uses a traditional byte scheme when presented with byte
|
||||
data.
|
||||
|
||||
Sadly, a wide octal escape does not cause a switch, and in a string with
|
||||
no other multibyte characters, these octal escapes are treated as before.
|
||||
Thus, in Perl, the pattern /\500/ actually matches \100 but the pattern
|
||||
/\500|\x{1ff}/ matches \500 or \777 because the whole thing is treated as a
|
||||
Unicode string.
|
||||
|
||||
I have not perpetrated such confusion in PCRE. Up till now, it took just
|
||||
the bottom 8 bits, as in old Perl. I have now made octal escapes with
|
||||
values greater than \377 illegal in non-UTF-8 mode. In UTF-8 mode they
|
||||
translate to the appropriate multibyte character.
|
||||
|
||||
29. Applied some refactoring to reduce the number of warnings from Microsoft
|
||||
and Borland compilers. This has included removing the fudge introduced
|
||||
seven years ago for the OS/2 compiler (see 2.02/2 below) because it caused
|
||||
a warning about an unused variable.
|
||||
|
||||
21. PCRE has not included VT (character 0x0b) in the set of whitespace
|
||||
characters since release 4.0, because Perl (from release 5.004) does not.
|
||||
[Or at least, is documented not to: some releases seem to be in conflict
|
||||
with the documentation.] However, when a pattern was studied with
|
||||
pcre_study() and all its branches started with \s, PCRE still included VT
|
||||
as a possible starting character. Of course, this did no harm; it just
|
||||
caused an unnecessary match attempt.
|
||||
|
||||
22. Removed a now-redundant internal flag bit that recorded the fact that case
|
||||
dependency changed within the pattern. This was once needed for "required
|
||||
byte" processing, but is no longer used. This recovers a now-scarce options
|
||||
bit. Also moved the least significant internal flag bit to the most-
|
||||
significant bit of the word, which was not previously used (hangover from
|
||||
the days when it was an int rather than a uint) to free up another bit for
|
||||
the future.
|
||||
|
||||
23. Added support for CRLF line endings as well as CR and LF. As well as the
|
||||
default being selectable at build time, it can now be changed at runtime
|
||||
via the PCRE_NEWLINE_xxx flags. There are now options for pcregrep to
|
||||
specify that it is scanning data with non-default line endings.
|
||||
|
||||
24. Changed the definition of CXXLINK to make it agree with the definition of
|
||||
LINK in the Makefile, by replacing LDFLAGS to CXXFLAGS.
|
||||
|
||||
25. Applied Ian Taylor's patches to avoid using another stack frame for tail
|
||||
recursions. This makes a big different to stack usage for some patterns.
|
||||
|
||||
26. If a subpattern containing a named recursion or subroutine reference such
|
||||
as (?P>B) was quantified, for example (xxx(?P>B)){3}, the calculation of
|
||||
the space required for the compiled pattern went wrong and gave too small a
|
||||
value. Depending on the environment, this could lead to "Failed: internal
|
||||
error: code overflow at offset 49" or "glibc detected double free or
|
||||
corruption" errors.
|
||||
|
||||
27. Applied patches from Google (a) to support the new newline modes and (b) to
|
||||
advance over multibyte UTF-8 characters in GlobalReplace.
|
||||
|
||||
28. Change free() to pcre_free() in pcredemo.c. Apparently this makes a
|
||||
difference for some implementation of PCRE in some Windows version.
|
||||
|
||||
29. Added some extra testing facilities to pcretest:
|
||||
|
||||
\q<number> in a data line sets the "match limit" value
|
||||
\Q<number> in a data line sets the "match recursion limt" value
|
||||
-S <number> sets the stack size, where <number> is in megabytes
|
||||
|
||||
The -S option isn't available for Windows.
|
||||
|
||||
|
||||
Version 6.6 06-Feb-06
|
||||
---------------------
|
||||
|
||||
|
@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2005, Google Inc.
|
||||
Copyright (c) 2006, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -1,6 +1,17 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 6.7 04-Jul-06
|
||||
---------------------
|
||||
|
||||
The main additions to this release are the ability to use the same name for
|
||||
multiple sets of parentheses, and support for CRLF line endings in both the
|
||||
library and pcregrep (and in pcretest for testing).
|
||||
|
||||
Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
|
||||
significantly reduced for certain subject strings.
|
||||
|
||||
|
||||
Release 6.5 01-Feb-06
|
||||
---------------------
|
||||
|
||||
|
@ -34,7 +34,7 @@ Documentation for PCRE
|
||||
----------------------
|
||||
|
||||
If you install PCRE in the normal way, you will end up with an installed set of
|
||||
man pages whose names all start with "pcre". The one that is called "pcre"
|
||||
man pages whose names all start with "pcre". The one that is just called "pcre"
|
||||
lists all the others. In addition to these man pages, the PCRE documentation is
|
||||
supplied in two other forms; however, as there is no standard place to install
|
||||
them, they are left in the doc directory of the unpacked source distribution.
|
||||
@ -114,15 +114,17 @@ library. You can read more about them in the pcrebuild man page.
|
||||
. If, in addition to support for UTF-8 character strings, you want to include
|
||||
support for the \P, \p, and \X sequences that recognize Unicode character
|
||||
properties, you must add --enable-unicode-properties to the "configure"
|
||||
command. This adds about 90K to the size of the library (in the form of a
|
||||
command. This adds about 30K to the size of the library (in the form of a
|
||||
property table); only the basic two-letter properties such as Lu are
|
||||
supported.
|
||||
|
||||
. You can build PCRE to recognize either CR or LF as the newline character,
|
||||
instead of whatever your compiler uses for "\n", by adding --newline-is-cr or
|
||||
--newline-is-lf to the "configure" command, respectively. Only do this if you
|
||||
really understand what you are doing. On traditional Unix-like systems, the
|
||||
newline character is LF.
|
||||
. You can build PCRE to recognize either CR or LF or the sequence CRLF as
|
||||
indicating the end of a line. Whatever you specify at build time is the
|
||||
default; the caller of PCRE can change the selection at run time. The default
|
||||
newline indicator is a single LF character (the Unix standard). You can
|
||||
specify the default newline indicator by adding --newline-is-cr or
|
||||
--newline-is-lf or --newline-is-crlf to the "configure" command,
|
||||
respectively.
|
||||
|
||||
. When called via the POSIX interface, PCRE uses malloc() to get additional
|
||||
storage for processing capturing parentheses if there are more than 10 of
|
||||
@ -142,6 +144,16 @@ library. You can read more about them in the pcrebuild man page.
|
||||
pcre_exec() can supply their own value. There is discussion on the pcreapi
|
||||
man page.
|
||||
|
||||
. There is a separate counter that limits the depth of recursive function calls
|
||||
during a matching process. This also has a default of ten million, which is
|
||||
essentially "unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-recursion=500000
|
||||
|
||||
Recursive function calls use up the runtime stack; running out of stack can
|
||||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcrestack man page.
|
||||
|
||||
. The default maximum compiled pattern size is around 64K. You can increase
|
||||
this by adding --with-link-size=3 to the "configure" command. You can
|
||||
increase it even more by setting --with-link-size=4, but this is unlikely
|
||||
@ -165,7 +177,6 @@ library. You can read more about them in the pcrebuild man page.
|
||||
|
||||
The "configure" script builds eight files for the basic C library:
|
||||
|
||||
. pcre.h is the header file for C programs that call PCRE
|
||||
. Makefile is the makefile that builds the library
|
||||
. config.h contains build-time configuration options for the library
|
||||
. pcre-config is a script that shows the settings of "configure" options
|
||||
@ -432,25 +443,24 @@ The distribution should contain the following files:
|
||||
pcre_info.c )
|
||||
pcre_maketables.c )
|
||||
pcre_ord2utf8.c )
|
||||
pcre_printint.c )
|
||||
pcre_refcount.c )
|
||||
pcre_study.c )
|
||||
pcre_tables.c )
|
||||
pcre_try_flipped.c )
|
||||
pcre_ucp_findchar.c )
|
||||
pcre_ucp_searchfuncs.c)
|
||||
pcre_valid_utf8.c )
|
||||
pcre_version.c )
|
||||
pcre_xclass.c )
|
||||
|
||||
ucp_findchar.c )
|
||||
ucp.h ) source for the code that is used for
|
||||
ucpinternal.h ) Unicode property handling
|
||||
ucptable.c )
|
||||
ucptypetable.c )
|
||||
|
||||
pcre.in "source" for the header for the external API; pcre.h
|
||||
is built from this by "configure"
|
||||
pcre_printint.src ) debugging function that is #included in pcretest, and
|
||||
) can also be #included in pcre_compile()
|
||||
|
||||
pcre.h the public PCRE header file
|
||||
pcreposix.h header for the external POSIX wrapper API
|
||||
pcre_internal.h header for internal use
|
||||
ucp.h ) headers concerned with
|
||||
ucpinternal.h ) Unicode property handling
|
||||
config.in template for config.h, which is built by configure
|
||||
|
||||
pcrecpp.h the header file for the C++ wrapper
|
||||
@ -477,8 +487,9 @@ The distribution should contain the following files:
|
||||
RunGrepTest.in template for a Unix shell script for pcregrep tests
|
||||
config.guess ) files used by libtool,
|
||||
config.sub ) used only when building a shared library
|
||||
config.h.in "source" for the config.h header file
|
||||
configure a configuring shell script (built by autoconf)
|
||||
configure.in the autoconf input used to build configure
|
||||
configure.ac the autoconf input used to build configure
|
||||
doc/Tech.Notes notes on the encoding
|
||||
doc/*.3 man page sources for the PCRE functions
|
||||
doc/*.1 man page sources for pcregrep and pcretest
|
||||
@ -506,7 +517,6 @@ The distribution should contain the following files:
|
||||
|
||||
libpcre.def
|
||||
libpcreposix.def
|
||||
pcre.def
|
||||
|
||||
(D) Auxiliary file for VPASCAL
|
||||
|
||||
@ -515,4 +525,4 @@ The distribution should contain the following files:
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
January 2006
|
||||
June 2006
|
||||
|
@ -1,6 +1,9 @@
|
||||
Technical Notes about PCRE
|
||||
--------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE internals.
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
@ -21,13 +24,14 @@ the pattern, as is expected in Unix and Perl-style regular expressions.
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer and subsequently
|
||||
heavily modified for Perl actually compiles the expression twice: once in a
|
||||
dummy mode in order to find out how much store will be needed, and then for
|
||||
real. The execution function operates by backtracking and maximizing (or,
|
||||
optionally, minimizing in Perl) the amount of the subject that matches
|
||||
individual wild portions of the pattern. This is an "NFA algorithm" in Friedl's
|
||||
terminology.
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
@ -43,7 +47,7 @@ then a second pass to do the real compile - which may use a bit less than the
|
||||
predicted amount of store. The idea is that this is going to turn out faster
|
||||
because the first pass is degenerate and the second pass can just store stuff
|
||||
straight into the vector, which it knows is big enough. It does make the
|
||||
compiling functions bigger, of course, but they have got quite big anyway to
|
||||
compiling functions bigger, of course, but they have become quite big anyway to
|
||||
handle all the Perl stuff.
|
||||
|
||||
Traditional matching function
|
||||
@ -63,7 +67,7 @@ pcre_dfa_exec(). This implements a DFA matching algorithm that searches
|
||||
simultaneously for all possible matches that start at one point in the subject
|
||||
string. (Going back to my roots: see Historical Note 1 above.) This function
|
||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the
|
||||
facilities are available, and those that are don't always work in quite the
|
||||
facilities are available, and those that are do not always work in quite the
|
||||
same way. See the user documentation for details.
|
||||
|
||||
Format of compiled patterns
|
||||
@ -157,10 +161,12 @@ Match by Unicode property
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by a single byte that encodes the desired property value.
|
||||
Each is followed by two bytes that encode the desired property as a type and a
|
||||
value.
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by two
|
||||
bytes: OP_PROP or OP_NOTPROP and then the desired property value.
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
@ -339,4 +345,4 @@ at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
January 2006
|
||||
June 2006
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -55,9 +55,9 @@ cannot run ./configure. As it now stands, this file need not be edited in that
|
||||
circumstance. */
|
||||
|
||||
#define PCRE_MAJOR 6
|
||||
#define PCRE_MINOR 6
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 06-Feb-2006
|
||||
#define PCRE_DATE 04-Jul-2006
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions
|
||||
when building PCRE. */
|
||||
@ -116,6 +116,10 @@ extern "C" {
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
#define PCRE_DUPNAMES 0x00080000
|
||||
#define PCRE_NEWLINE_CR 0x00100000
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@ -269,6 +273,8 @@ PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
|
@ -42,6 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting internal functions that are not used by other modules. */
|
||||
|
||||
|
||||
#define NLBLOCK cd /* The block containing newline information */
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -190,7 +191,7 @@ static const char *error_texts[] = {
|
||||
"unrecognized character after (?<",
|
||||
/* 25 */
|
||||
"lookbehind assertion is not fixed length",
|
||||
"malformed number after (?(",
|
||||
"malformed number or name after (?(",
|
||||
"conditional group contains more than two branches",
|
||||
"assertion expected after (?(",
|
||||
"(?R or (?digits must be followed by )",
|
||||
@ -210,12 +211,17 @@ static const char *error_texts[] = {
|
||||
"recursive call could loop indefinitely",
|
||||
"unrecognized character after (?P",
|
||||
"syntax error after (?P",
|
||||
"two named groups have the same name",
|
||||
"two named subpatterns have the same name",
|
||||
"invalid UTF-8 string",
|
||||
/* 45 */
|
||||
"support for \\P, \\p, and \\X has not been compiled",
|
||||
"malformed \\P or \\p sequence",
|
||||
"unknown property name after \\P or \\p"
|
||||
"unknown property name after \\P or \\p",
|
||||
"subpattern name is too long (maximum 32 characters)",
|
||||
"too many named subpatterns (maximum 10,000)",
|
||||
/* 50 */
|
||||
"repeated subpattern is too long",
|
||||
"octal value is greater than \\377 (not in UTF-8 mode)"
|
||||
};
|
||||
|
||||
|
||||
@ -460,13 +466,16 @@ else
|
||||
}
|
||||
|
||||
/* \0 always starts an octal number, but we may drop through to here with a
|
||||
larger first octal digit. */
|
||||
larger first octal digit. The original code used just to take the least
|
||||
significant 8 bits of octal numbers (I think this is what early Perls used
|
||||
to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
|
||||
than 3 octal digits. */
|
||||
|
||||
case '0':
|
||||
c -= '0';
|
||||
while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
|
||||
c = c * 8 + *(++ptr) - '0';
|
||||
c &= 255; /* Take least significant 8 bits */
|
||||
if (!utf8 && c > 255) *errorcodeptr = ERR51;
|
||||
break;
|
||||
|
||||
/* \x is complicated. \x{ddd} is a character number which can be greater
|
||||
@ -762,6 +771,48 @@ return p;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find forward referenced named subpattern *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans along a pattern looking for capturing subpatterns, and
|
||||
counting them. If it finds a named pattern that matches the name it is given,
|
||||
it returns its number. This is used for forward references to named
|
||||
subpatterns. We know that if (?P< is encountered, the name will be terminated
|
||||
by '>' because that is checked in the first pass.
|
||||
|
||||
Arguments:
|
||||
pointer current position in the pattern
|
||||
count current count of capturing parens
|
||||
name name to seek
|
||||
namelen name length
|
||||
|
||||
Returns: the number of the named subpattern, or -1 if not found
|
||||
*/
|
||||
|
||||
static int
|
||||
find_named_parens(const uschar *ptr, int count, const uschar *name, int namelen)
|
||||
{
|
||||
const uschar *thisname;
|
||||
for (; *ptr != 0; ptr++)
|
||||
{
|
||||
if (*ptr == '\\' && ptr[1] != 0) { ptr++; continue; }
|
||||
if (*ptr != '(') continue;
|
||||
if (ptr[1] != '?') { count++; continue; }
|
||||
if (ptr[2] == '(') { ptr += 2; continue; }
|
||||
if (ptr[2] != 'P' || ptr[3] != '<') continue;
|
||||
count++;
|
||||
ptr += 4;
|
||||
thisname = ptr;
|
||||
while (*ptr != '>') ptr++;
|
||||
if (namelen == ptr - thisname && strncmp(name, thisname, namelen) == 0)
|
||||
return count;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first significant op code *
|
||||
*************************************************/
|
||||
@ -917,6 +968,7 @@ for (;;)
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
case OP_NOT:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
@ -1031,14 +1083,19 @@ Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
static const uschar *
|
||||
find_bracket(const uschar *code, BOOL utf8, int number)
|
||||
{
|
||||
#ifndef SUPPORT_UTF8
|
||||
utf8 = utf8; /* Stop pedantic compilers complaining */
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
register int c = *code;
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
|
||||
/* Handle bracketed group */
|
||||
|
||||
else if (c > OP_BRA)
|
||||
{
|
||||
int n = c - OP_BRA;
|
||||
@ -1046,17 +1103,16 @@ for (;;)
|
||||
if (n == number) return (uschar *)code;
|
||||
code += _pcre_OP_lengths[OP_BRA];
|
||||
}
|
||||
|
||||
/* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
|
||||
that are followed by a character may be followed by a multi-byte character.
|
||||
The length in the table is a minimum, so we have to scan along to skip the
|
||||
extra bytes. All opcodes are less than 128, so we can use relatively
|
||||
efficient code. */
|
||||
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
/* In UTF-8 mode, opcodes that are followed by a character may be followed
|
||||
by a multi-byte character. The length in the table is a minimum, so we have
|
||||
to scan along to skip the extra bytes. All opcodes are less than 128, so we
|
||||
can use relatively efficient code. */
|
||||
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1072,16 +1128,7 @@ for (;;)
|
||||
case OP_MINQUERY:
|
||||
while ((*code & 0xc0) == 0x80) code++;
|
||||
break;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
|
||||
case OP_XCLASS:
|
||||
code += GET(code, 1) + 1;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1105,30 +1152,34 @@ Returns: pointer to the opcode for OP_RECURSE, or NULL if not found
|
||||
static const uschar *
|
||||
find_recurse(const uschar *code, BOOL utf8)
|
||||
{
|
||||
#ifndef SUPPORT_UTF8
|
||||
utf8 = utf8; /* Stop pedantic compilers complaining */
|
||||
#endif
|
||||
|
||||
for (;;)
|
||||
{
|
||||
register int c = *code;
|
||||
if (c == OP_END) return NULL;
|
||||
else if (c == OP_RECURSE) return code;
|
||||
if (c == OP_RECURSE) return code;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
|
||||
/* All bracketed groups have the same length. */
|
||||
|
||||
else if (c > OP_BRA)
|
||||
{
|
||||
code += _pcre_OP_lengths[OP_BRA];
|
||||
}
|
||||
|
||||
/* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
|
||||
that are followed by a character may be followed by a multi-byte character.
|
||||
The length in the table is a minimum, so we have to scan along to skip the
|
||||
extra bytes. All opcodes are less than 128, so we can use relatively
|
||||
efficient code. */
|
||||
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
/* In UTF-8 mode, opcodes that are followed by a character may be followed
|
||||
by a multi-byte character. The length in the table is a minimum, so we have
|
||||
to scan along to skip the extra bytes. All opcodes are less than 128, so we
|
||||
can use relatively efficient code. */
|
||||
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
@ -1144,16 +1195,7 @@ for (;;)
|
||||
case OP_MINQUERY:
|
||||
while ((*code & 0xc0) == 0x80) code++;
|
||||
break;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit
|
||||
map. This includes negated single high-valued characters. The length in
|
||||
the table is zero; the actual length is stored in the compiled code. */
|
||||
|
||||
case OP_XCLASS:
|
||||
code += GET(code, 1) + 1;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1569,7 +1611,6 @@ int greedy_default, greedy_non_default;
|
||||
int firstbyte, reqbyte;
|
||||
int zeroreqbyte, zerofirstbyte;
|
||||
int req_caseopt, reqvary, tempreqvary;
|
||||
int condcount = 0;
|
||||
int options = *optionsptr;
|
||||
int after_manual_callout = 0;
|
||||
register int c;
|
||||
@ -1683,10 +1724,14 @@ for (;; ptr++)
|
||||
if ((cd->ctypes[c] & ctype_space) != 0) continue;
|
||||
if (c == '#')
|
||||
{
|
||||
/* The space before the ; is to avoid a warning on a silly compiler
|
||||
on the Macintosh. */
|
||||
while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
|
||||
if (c != 0) continue; /* Else fall through to handle end of string */
|
||||
while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) break;
|
||||
if (*ptr != 0)
|
||||
{
|
||||
ptr += cd->nllen - 1;
|
||||
continue;
|
||||
}
|
||||
/* Else fall through to handle end of string */
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2851,37 +2896,91 @@ for (;; ptr++)
|
||||
case '(':
|
||||
bravalue = OP_COND; /* Conditional group */
|
||||
|
||||
/* Condition to test for recursion */
|
||||
/* A condition can be a number, referring to a numbered group, a name,
|
||||
referring to a named group, 'R', referring to recursion, or an
|
||||
assertion. There are two unfortunate ambiguities, caused by history.
|
||||
(a) 'R' can be the recursive thing or the name 'R', and (b) a number
|
||||
could be a name that consists of digits. In both cases, we look for a
|
||||
name first; if not found, we try the other cases. If the first
|
||||
character after (?( is a word character, we know the rest up to ) will
|
||||
also be word characters because the syntax was checked in the first
|
||||
pass. */
|
||||
|
||||
if (ptr[1] == 'R')
|
||||
if ((cd->ctypes[ptr[1]] & ctype_word) != 0)
|
||||
{
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
|
||||
int i, namelen;
|
||||
int condref = 0;
|
||||
const uschar *name;
|
||||
uschar *slot = cd->name_table;
|
||||
|
||||
/* This is needed for all successful cases. */
|
||||
|
||||
skipbytes = 3;
|
||||
ptr += 3;
|
||||
}
|
||||
|
||||
/* Condition to test for a numbered subpattern match. We know that
|
||||
if a digit follows ( then there will just be digits until ) because
|
||||
the syntax was checked in the first pass. */
|
||||
/* Read the name, but also get it as a number if it's all digits */
|
||||
|
||||
else if ((digitab[ptr[1]] && ctype_digit) != 0)
|
||||
{
|
||||
int condref; /* Don't amalgamate; some compilers */
|
||||
condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */
|
||||
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
|
||||
if (condref == 0)
|
||||
name = ++ptr;
|
||||
while (*ptr != ')')
|
||||
{
|
||||
*errorcodeptr = ERR35;
|
||||
if (condref >= 0)
|
||||
condref = ((digitab[*ptr] & ctype_digit) != 0)?
|
||||
condref * 10 + *ptr - '0' : -1;
|
||||
ptr++;
|
||||
}
|
||||
namelen = ptr - name;
|
||||
ptr++;
|
||||
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
{
|
||||
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
|
||||
slot += cd->name_entry_size;
|
||||
}
|
||||
|
||||
/* Found a previous named subpattern */
|
||||
|
||||
if (i < cd->names_found)
|
||||
{
|
||||
condref = GET2(slot, 0);
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, condref);
|
||||
}
|
||||
|
||||
/* Search the pattern for a forward reference */
|
||||
|
||||
else if ((i = find_named_parens(ptr, *brackets, name, namelen)) > 0)
|
||||
{
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, i);
|
||||
}
|
||||
|
||||
/* Check for 'R' for recursion */
|
||||
|
||||
else if (namelen == 1 && *name == 'R')
|
||||
{
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
|
||||
}
|
||||
|
||||
/* Check for a subpattern number */
|
||||
|
||||
else if (condref > 0)
|
||||
{
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, condref);
|
||||
}
|
||||
|
||||
/* Either an unidentified subpattern, or a reference to (?(0) */
|
||||
|
||||
else
|
||||
{
|
||||
*errorcodeptr = (condref == 0)? ERR35: ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
ptr++;
|
||||
code[1+LINK_SIZE] = OP_CREF;
|
||||
PUT2(code, 2+LINK_SIZE, condref);
|
||||
skipbytes = 3;
|
||||
}
|
||||
|
||||
/* For conditions that are assertions, we just fall through, having
|
||||
set bravalue above. */
|
||||
|
||||
break;
|
||||
|
||||
case '=': /* Positive lookahead */
|
||||
@ -2953,10 +3052,13 @@ for (;; ptr++)
|
||||
{
|
||||
if (slot[2+namelen] == 0)
|
||||
{
|
||||
*errorcodeptr = ERR43;
|
||||
goto FAILED;
|
||||
if ((options & PCRE_DUPNAMES) == 0)
|
||||
{
|
||||
*errorcodeptr = ERR43;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
crc = -1; /* Current name is substring */
|
||||
else crc = -1; /* Current name is substring */
|
||||
}
|
||||
if (crc < 0)
|
||||
{
|
||||
@ -2989,14 +3091,18 @@ for (;; ptr++)
|
||||
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
|
||||
slot += cd->name_entry_size;
|
||||
}
|
||||
if (i >= cd->names_found)
|
||||
|
||||
if (i < cd->names_found) /* Back reference */
|
||||
{
|
||||
recno = GET2(slot, 0);
|
||||
}
|
||||
else if ((recno = /* Forward back reference */
|
||||
find_named_parens(ptr, *brackets, name, namelen)) <= 0)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
recno = GET2(slot, 0);
|
||||
|
||||
if (type == '>') goto HANDLE_RECURSION; /* A few lines below */
|
||||
|
||||
/* Back reference */
|
||||
@ -3036,9 +3142,8 @@ for (;; ptr++)
|
||||
regex in case it doesn't exist. */
|
||||
|
||||
*code = OP_END;
|
||||
called = (recno == 0)?
|
||||
cd->start_code : find_bracket(cd->start_code, utf8, recno);
|
||||
|
||||
called = (recno == 0)? cd->start_code :
|
||||
find_bracket(cd->start_code, utf8, recno);
|
||||
if (called == NULL)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
@ -3085,6 +3190,7 @@ for (;; ptr++)
|
||||
case '-': optset = &unset; break;
|
||||
|
||||
case 'i': *optset |= PCRE_CASELESS; break;
|
||||
case 'J': *optset |= PCRE_DUPNAMES; break;
|
||||
case 'm': *optset |= PCRE_MULTILINE; break;
|
||||
case 's': *optset |= PCRE_DOTALL; break;
|
||||
case 'x': *optset |= PCRE_EXTENDED; break;
|
||||
@ -3201,7 +3307,7 @@ for (;; ptr++)
|
||||
else if (bravalue == OP_COND)
|
||||
{
|
||||
uschar *tc = code;
|
||||
condcount = 0;
|
||||
int condcount = 0;
|
||||
|
||||
do {
|
||||
condcount++;
|
||||
@ -3906,13 +4012,14 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
|
||||
PCRE_DATA_SCOPE pcre *
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
real_pcre *re;
|
||||
int length = 1 + LINK_SIZE; /* For initial BRA plus length */
|
||||
int c, firstbyte, reqbyte;
|
||||
int c, firstbyte, reqbyte, newline;
|
||||
int bracount = 0;
|
||||
int branch_extra = 0;
|
||||
int branch_newextra;
|
||||
@ -3933,6 +4040,7 @@ uschar *code;
|
||||
const uschar *codestart;
|
||||
const uschar *ptr;
|
||||
compile_data compile_block;
|
||||
compile_data *cd = &compile_block;
|
||||
int brastack[BRASTACK_SIZE];
|
||||
uschar bralenstack[BRASTACK_SIZE];
|
||||
|
||||
@ -3986,18 +4094,42 @@ if ((options & ~PUBLIC_OPTIONS) != 0)
|
||||
/* Set up pointers to the individual character tables */
|
||||
|
||||
if (tables == NULL) tables = _pcre_default_tables;
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
cd->lcc = tables + lcc_offset;
|
||||
cd->fcc = tables + fcc_offset;
|
||||
cd->cbits = tables + cbits_offset;
|
||||
cd->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Handle different types of newline. The two bits give four cases. The current
|
||||
code allows for one- or two-byte sequences. */
|
||||
|
||||
switch (options & PCRE_NEWLINE_CRLF)
|
||||
{
|
||||
default: newline = NEWLINE; break; /* Compile-time default */
|
||||
case PCRE_NEWLINE_CR: newline = '\r'; break;
|
||||
case PCRE_NEWLINE_LF: newline = '\n'; break;
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
}
|
||||
|
||||
if (newline > 255)
|
||||
{
|
||||
cd->nllen = 2;
|
||||
cd->nl[0] = (newline >> 8) & 255;
|
||||
cd->nl[1] = newline & 255;
|
||||
}
|
||||
else
|
||||
{
|
||||
cd->nllen = 1;
|
||||
cd->nl[0] = newline;
|
||||
}
|
||||
|
||||
/* Maximum back reference and backref bitmap. This is updated for numeric
|
||||
references during the first pass, but for named references during the actual
|
||||
compile pass. The bitmap records up to 31 back references to help in deciding
|
||||
whether (.*) can be treated as anchored or not. */
|
||||
|
||||
compile_block.top_backref = 0;
|
||||
compile_block.backref_map = 0;
|
||||
cd->top_backref = 0;
|
||||
cd->backref_map = 0;
|
||||
|
||||
/* Reflect pattern for debugging output */
|
||||
|
||||
@ -4031,14 +4163,16 @@ while ((c = *(++ptr)) != 0)
|
||||
|
||||
if ((options & PCRE_EXTENDED) != 0)
|
||||
{
|
||||
if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
|
||||
if ((cd->ctypes[c] & ctype_space) != 0) continue;
|
||||
if (c == '#')
|
||||
{
|
||||
/* The space before the ; is to avoid a warning on a silly compiler
|
||||
on the Macintosh. */
|
||||
while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
|
||||
if (c == 0) break;
|
||||
continue;
|
||||
while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) break;
|
||||
if (*ptr != 0)
|
||||
{
|
||||
ptr += cd->nllen - 1;
|
||||
continue;
|
||||
}
|
||||
break; /* End loop at end of pattern */
|
||||
}
|
||||
}
|
||||
|
||||
@ -4128,9 +4262,9 @@ while ((c = *(++ptr)) != 0)
|
||||
if (c <= -ESC_REF)
|
||||
{
|
||||
int refnum = -c - ESC_REF;
|
||||
compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;
|
||||
if (refnum > compile_block.top_backref)
|
||||
compile_block.top_backref = refnum;
|
||||
cd->backref_map |= (refnum < 32)? (1 << refnum) : 1;
|
||||
if (refnum > cd->top_backref)
|
||||
cd->top_backref = refnum;
|
||||
length += 2; /* For single back reference */
|
||||
if (ptr[1] == '{' && is_counted_repeat(ptr+2))
|
||||
{
|
||||
@ -4284,7 +4418,9 @@ while ((c = *(++ptr)) != 0)
|
||||
/* Check the syntax for POSIX stuff. The bits we actually handle are
|
||||
checked during the real compile phase. */
|
||||
|
||||
else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))
|
||||
else if (*ptr == '[' &&
|
||||
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
|
||||
check_posix_syntax(ptr, &ptr, cd))
|
||||
{
|
||||
ptr++;
|
||||
class_optcount = 10; /* Make sure > 1 */
|
||||
@ -4517,6 +4653,61 @@ while ((c = *(++ptr)) != 0)
|
||||
ptr += 2;
|
||||
break;
|
||||
|
||||
/* Named subpatterns are an extension copied from Python */
|
||||
|
||||
case 'P':
|
||||
ptr += 3;
|
||||
|
||||
/* Handle the definition of a named subpattern */
|
||||
|
||||
if (*ptr == '<')
|
||||
{
|
||||
const uschar *p; /* Don't amalgamate; some compilers */
|
||||
p = ++ptr; /* grumble at autoincrement in declaration */
|
||||
while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
|
||||
if (*ptr != '>')
|
||||
{
|
||||
errorcode = ERR42;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
name_count++;
|
||||
if (name_count > MAX_NAME_COUNT)
|
||||
{
|
||||
errorcode = ERR49;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
if (ptr - p > max_name_size)
|
||||
{
|
||||
max_name_size = (ptr - p);
|
||||
if (max_name_size > MAX_NAME_SIZE)
|
||||
{
|
||||
errorcode = ERR48;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
}
|
||||
capturing = TRUE; /* Named parentheses are always capturing */
|
||||
break; /* Go handle capturing parentheses */
|
||||
}
|
||||
|
||||
/* Handle back references and recursive calls to named subpatterns */
|
||||
|
||||
if (*ptr == '=' || *ptr == '>')
|
||||
{
|
||||
length += 3 + 3*LINK_SIZE; /* Allow for the automatic "once" */
|
||||
while ((cd->ctypes[*(++ptr)] & ctype_word) != 0);
|
||||
if (*ptr != ')')
|
||||
{
|
||||
errorcode = ERR42;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
goto RECURSE_CHECK_QUANTIFIED;
|
||||
}
|
||||
|
||||
/* Unknown character after (?P */
|
||||
|
||||
errorcode = ERR41;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
|
||||
/* (?R) specifies a recursive call to the regex, which is an extension
|
||||
to provide the facility which can be obtained by (?p{perl-code}) in
|
||||
Perl 5.6. In Perl 5.8 this has become (??{perl-code}).
|
||||
@ -4542,8 +4733,10 @@ while ((c = *(++ptr)) != 0)
|
||||
|
||||
/* If this item is quantified, it will get wrapped inside brackets so
|
||||
as to use the code for quantified brackets. We jump down and use the
|
||||
code that handles this for real brackets. */
|
||||
code that handles this for real brackets. Come here from code for
|
||||
named recursions/subroutines. */
|
||||
|
||||
RECURSE_CHECK_QUANTIFIED:
|
||||
if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')
|
||||
{
|
||||
length += 2 + 2 * LINK_SIZE; /* to make bracketed */
|
||||
@ -4567,48 +4760,6 @@ while ((c = *(++ptr)) != 0)
|
||||
length += 2 + 2*LINK_SIZE;
|
||||
continue;
|
||||
|
||||
/* Named subpatterns are an extension copied from Python */
|
||||
|
||||
case 'P':
|
||||
ptr += 3;
|
||||
|
||||
/* Handle the definition of a named subpattern */
|
||||
|
||||
if (*ptr == '<')
|
||||
{
|
||||
const uschar *p; /* Don't amalgamate; some compilers */
|
||||
p = ++ptr; /* grumble at autoincrement in declaration */
|
||||
while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;
|
||||
if (*ptr != '>')
|
||||
{
|
||||
errorcode = ERR42;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
name_count++;
|
||||
if (ptr - p > max_name_size) max_name_size = (ptr - p);
|
||||
capturing = TRUE; /* Named parentheses are always capturing */
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle back references and recursive calls to named subpatterns */
|
||||
|
||||
if (*ptr == '=' || *ptr == '>')
|
||||
{
|
||||
length += 2 + 2*LINK_SIZE; /* Allow for the automatic "once" */
|
||||
while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
|
||||
if (*ptr != ')')
|
||||
{
|
||||
errorcode = ERR42;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Unknown character after (?P */
|
||||
|
||||
errorcode = ERR41;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
|
||||
/* Lookbehinds are in Perl from version 5.005 */
|
||||
|
||||
case '<':
|
||||
@ -4624,19 +4775,17 @@ while ((c = *(++ptr)) != 0)
|
||||
|
||||
/* Conditionals are in Perl from version 5.005. The bracket must either
|
||||
be followed by a number (for bracket reference) or by an assertion
|
||||
group, or (a PCRE extension) by 'R' for a recursion test. */
|
||||
group. PCRE extends this by allowing a name to reference a named group;
|
||||
unfortunately, previously 'R' was implemented for a recursion test.
|
||||
When this is compiled, we look for the named group 'R' first. At this
|
||||
point we just do a basic syntax check. */
|
||||
|
||||
case '(':
|
||||
if (ptr[3] == 'R' && ptr[4] == ')')
|
||||
if ((cd->ctypes[ptr[3]] & ctype_word) != 0)
|
||||
{
|
||||
ptr += 4;
|
||||
length += 3;
|
||||
}
|
||||
else if ((digitab[ptr[3]] & ctype_digit) != 0)
|
||||
{
|
||||
ptr += 4;
|
||||
length += 3;
|
||||
while ((digitab[*ptr] & ctype_digit) != 0) ptr++;
|
||||
while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
|
||||
if (*ptr != ')')
|
||||
{
|
||||
errorcode = ERR26;
|
||||
@ -4675,6 +4824,11 @@ while ((c = *(++ptr)) != 0)
|
||||
*optset |= PCRE_CASELESS;
|
||||
continue;
|
||||
|
||||
case 'J':
|
||||
*optset |= PCRE_DUPNAMES;
|
||||
options |= PCRE_JCHANGED; /* Record that it changed */
|
||||
continue;
|
||||
|
||||
case 'm':
|
||||
*optset |= PCRE_MULTILINE;
|
||||
continue;
|
||||
@ -4740,16 +4894,13 @@ while ((c = *(++ptr)) != 0)
|
||||
will lead to an over-estimate on the length, but this shouldn't
|
||||
matter very much. We also have to allow for resetting options at
|
||||
the start of any alternations, which we do by setting
|
||||
branch_newextra to 2. Finally, we record whether the case-dependent
|
||||
flag ever changes within the regex. This is used by the "required
|
||||
character" code. */
|
||||
branch_newextra to 2. */
|
||||
|
||||
case ':':
|
||||
if (((set|unset) & PCRE_IMS) != 0)
|
||||
{
|
||||
length += 4;
|
||||
branch_newextra = 2;
|
||||
if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
|
||||
}
|
||||
goto END_OPTIONS;
|
||||
|
||||
@ -4829,6 +4980,12 @@ while ((c = *(++ptr)) != 0)
|
||||
{
|
||||
duplength = length - brastack[--brastackptr];
|
||||
branch_extra = bralenstack[brastackptr];
|
||||
/* This is a paranoid check to stop integer overflow later on */
|
||||
if (duplength > MAX_DUPLENGTH)
|
||||
{
|
||||
errorcode = ERR50;
|
||||
goto PCRE_ERROR_RETURN;
|
||||
}
|
||||
}
|
||||
else duplength = 0;
|
||||
|
||||
@ -4933,7 +5090,8 @@ if (length > MAX_PATTERN_SIZE)
|
||||
}
|
||||
|
||||
/* Compute the size of data block needed and get it, either from malloc or
|
||||
externally provided function. */
|
||||
externally provided function. Integer overflow should no longer be possible
|
||||
because nowadays we limit the maximum value of name_count and max_name size. */
|
||||
|
||||
size = length + sizeof(real_pcre) + name_count * (max_name_size + 3);
|
||||
re = (real_pcre *)(pcre_malloc)(size);
|
||||
@ -4963,14 +5121,14 @@ re->nullpad = NULL;
|
||||
/* The starting points of the name/number translation table and of the code are
|
||||
passed around in the compile data block. */
|
||||
|
||||
compile_block.names_found = 0;
|
||||
compile_block.name_entry_size = max_name_size + 3;
|
||||
compile_block.name_table = (uschar *)re + re->name_table_offset;
|
||||
codestart = compile_block.name_table + re->name_entry_size * re->name_count;
|
||||
compile_block.start_code = codestart;
|
||||
compile_block.start_pattern = (const uschar *)pattern;
|
||||
compile_block.req_varyopt = 0;
|
||||
compile_block.nopartial = FALSE;
|
||||
cd->names_found = 0;
|
||||
cd->name_entry_size = max_name_size + 3;
|
||||
cd->name_table = (uschar *)re + re->name_table_offset;
|
||||
codestart = cd->name_table + re->name_entry_size * re->name_count;
|
||||
cd->start_code = codestart;
|
||||
cd->start_pattern = (const uschar *)pattern;
|
||||
cd->req_varyopt = 0;
|
||||
cd->nopartial = FALSE;
|
||||
|
||||
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
||||
error, errorcode will be set non-zero, so we don't need to look at the result
|
||||
@ -4981,11 +5139,11 @@ code = (uschar *)codestart;
|
||||
*code = OP_BRA;
|
||||
bracount = 0;
|
||||
(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,
|
||||
&errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block);
|
||||
&errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd);
|
||||
re->top_bracket = bracount;
|
||||
re->top_backref = compile_block.top_backref;
|
||||
re->top_backref = cd->top_backref;
|
||||
|
||||
if (compile_block.nopartial) re->options |= PCRE_NOPARTIAL;
|
||||
if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
|
||||
|
||||
/* If not reached end of pattern on success, there's an excess bracket. */
|
||||
|
||||
@ -5031,7 +5189,7 @@ start with ^. and also when all branches start with .* for non-DOTALL matches.
|
||||
if ((options & PCRE_ANCHORED) == 0)
|
||||
{
|
||||
int temp_options = options;
|
||||
if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map))
|
||||
if (is_anchored(codestart, &temp_options, 0, cd->backref_map))
|
||||
re->options |= PCRE_ANCHORED;
|
||||
else
|
||||
{
|
||||
@ -5041,10 +5199,10 @@ if ((options & PCRE_ANCHORED) == 0)
|
||||
{
|
||||
int ch = firstbyte & 255;
|
||||
re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
|
||||
compile_block.fcc[ch] == ch)? ch : firstbyte;
|
||||
cd->fcc[ch] == ch)? ch : firstbyte;
|
||||
re->options |= PCRE_FIRSTSET;
|
||||
}
|
||||
else if (is_startline(codestart, 0, compile_block.backref_map))
|
||||
else if (is_startline(codestart, 0, cd->backref_map))
|
||||
re->options |= PCRE_STARTLINE;
|
||||
}
|
||||
}
|
||||
@ -5058,7 +5216,7 @@ if (reqbyte >= 0 &&
|
||||
{
|
||||
int ch = reqbyte & 255;
|
||||
re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
|
||||
compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
|
||||
cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
|
||||
re->options |= PCRE_REQCHSET;
|
||||
}
|
||||
|
||||
@ -5072,11 +5230,10 @@ printf("Length = %d top_bracket = %d top_backref = %d\n",
|
||||
|
||||
if (re->options != 0)
|
||||
{
|
||||
printf("%s%s%s%s%s%s%s%s%s%s\n",
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",
|
||||
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
|
||||
((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
|
||||
((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
|
||||
((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
|
||||
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
|
||||
((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
|
||||
|
@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
|
||||
possible. There are also some static supporting functions. */
|
||||
|
||||
|
||||
#define NLBLOCK md /* The block containing newline information */
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@ -275,7 +275,7 @@ typedef struct heapframe {
|
||||
long int Xims;
|
||||
eptrblock *Xeptrb;
|
||||
int Xflags;
|
||||
int Xrdepth;
|
||||
unsigned int Xrdepth;
|
||||
|
||||
/* Function local variables */
|
||||
|
||||
@ -374,16 +374,16 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
|
||||
static int
|
||||
match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
|
||||
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
|
||||
int flags, int rdepth)
|
||||
int flags, unsigned int rdepth)
|
||||
{
|
||||
/* These variables do not need to be preserved over recursion in this function,
|
||||
so they can be ordinary variables in all cases. Mark them with "register"
|
||||
because they are used a lot in loops. */
|
||||
|
||||
register int rrc; /* Returns from recursive calls */
|
||||
register int i; /* Used for loops not involving calls to RMATCH() */
|
||||
register int c; /* Character values not kept over RMATCH() calls */
|
||||
register BOOL utf8; /* Local copy of UTF-8 flag for speed */
|
||||
register int rrc; /* Returns from recursive calls */
|
||||
register int i; /* Used for loops not involving calls to RMATCH() */
|
||||
register unsigned int c; /* Character values not kept over RMATCH() calls */
|
||||
register BOOL utf8; /* Local copy of UTF-8 flag for speed */
|
||||
|
||||
/* When recursion is not being used, all "local" variables that have to be
|
||||
preserved over calls to RMATCH() are part of a "frame" which is obtained from
|
||||
@ -527,6 +527,13 @@ prop_fail_result = 0;
|
||||
prop_test_variable = NULL;
|
||||
#endif
|
||||
|
||||
/* This label is used for tail recursion, which is used in a few cases even
|
||||
when NO_RECURSE is not defined, in order to reduce the amount of stack that is
|
||||
used. Thanks to Ian Taylor for noticing this possibility and sending the
|
||||
original patch. */
|
||||
|
||||
TAIL_RECURSE:
|
||||
|
||||
/* OK, now we can get on with the real code of the function. Recursive calls
|
||||
are specified by the macro RMATCH and RRETURN is used to return. When
|
||||
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
|
||||
@ -542,7 +549,12 @@ if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
|
||||
if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
|
||||
|
||||
original_ims = ims; /* Save for resetting on ')' */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
utf8 = md->utf8; /* Local copy of the flag */
|
||||
#else
|
||||
utf8 = FALSE;
|
||||
#endif
|
||||
|
||||
/* At the start of a bracketed group, add the current subject pointer to the
|
||||
stack of such pointers, to be re-instated at the end of the group when we hit
|
||||
@ -642,21 +654,38 @@ for (;;)
|
||||
{
|
||||
case OP_BRA: /* Non-capturing bracket: optimized */
|
||||
DPRINTF(("start bracket 0\n"));
|
||||
do
|
||||
|
||||
/* Loop for all the alternatives */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* When we get to the final alternative within the brackets, we would
|
||||
return the result of a recursive call to match() whatever happened. We
|
||||
can reduce stack usage by turning this into a tail recursion. */
|
||||
|
||||
if (ecode[GET(ecode, 1)] != OP_ALT)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_isgroup;
|
||||
DPRINTF(("bracket 0 tail recursion\n"));
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
|
||||
/* For non-final alternatives, continue the loop for a NOMATCH result;
|
||||
otherwise return. */
|
||||
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
|
||||
match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
DPRINTF(("bracket 0 failed\n"));
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never reaches here. */
|
||||
|
||||
/* Conditional group: compilation checked that there are no more than
|
||||
two branches. If the condition is false, skipping the first branch takes us
|
||||
past the end if there is only one branch, but that's OK because that is
|
||||
exactly what going to the ket would do. */
|
||||
exactly what going to the ket would do. As there is only one branch to be
|
||||
obeyed, we can use tail recursion to avoid using another stack frame. */
|
||||
|
||||
case OP_COND:
|
||||
if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */
|
||||
@ -665,10 +694,9 @@ for (;;)
|
||||
condition = (offset == CREF_RECURSE * 2)?
|
||||
(md->recursive != NULL) :
|
||||
(offset < offset_top && md->offset_vector[offset] >= 0);
|
||||
RMATCH(rrc, eptr, ecode + (condition?
|
||||
(LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),
|
||||
offset_top, md, ims, eptrb, match_isgroup);
|
||||
RRETURN(rrc);
|
||||
ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));
|
||||
flags = match_isgroup;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
@ -688,9 +716,13 @@ for (;;)
|
||||
RRETURN(rrc); /* Need braces because of following else */
|
||||
}
|
||||
else ecode += GET(ecode, 1);
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
|
||||
match_isgroup);
|
||||
RRETURN(rrc);
|
||||
|
||||
/* We are now at the branch that is to be obeyed. As there is only one,
|
||||
we can use tail recursion to avoid using another stack frame. */
|
||||
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_isgroup;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never reaches here */
|
||||
|
||||
@ -945,71 +977,72 @@ for (;;)
|
||||
the end of a normal bracket, leaving the subject pointer. */
|
||||
|
||||
case OP_ONCE:
|
||||
prev = ecode;
|
||||
saved_eptr = eptr;
|
||||
|
||||
do
|
||||
{
|
||||
prev = ecode;
|
||||
saved_eptr = eptr;
|
||||
|
||||
do
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
|
||||
eptrb, match_isgroup);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
|
||||
/* If hit the end of the group (which could be repeated), fail */
|
||||
|
||||
if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Continue as from after the assertion, updating the offsets high water
|
||||
mark, since extracts may have been taken. */
|
||||
|
||||
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
|
||||
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
|
||||
/* For a non-repeating ket, just continue at this level. This also
|
||||
happens for a repeating ket if no characters were matched in the group.
|
||||
This is the forcible breaking of infinite loops as implemented in Perl
|
||||
5.005. If there is an options reset, it will get obeyed in the normal
|
||||
course of events. */
|
||||
|
||||
if (*ecode == OP_KET || eptr == saved_eptr)
|
||||
{
|
||||
ecode += 1+LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. We need to reset any options
|
||||
that changed within the bracket before re-running it, so check the next
|
||||
opcode. */
|
||||
|
||||
if (ecode[1+LINK_SIZE] == OP_OPT)
|
||||
{
|
||||
ims = (ims & ~PCRE_IMS) | ecode[4];
|
||||
DPRINTF(("ims set to %02lx at group repeat\n", ims));
|
||||
}
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
{
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
}
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
|
||||
eptrb, match_isgroup);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
while (*ecode == OP_ALT);
|
||||
|
||||
/* If hit the end of the group (which could be repeated), fail */
|
||||
|
||||
if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Continue as from after the assertion, updating the offsets high water
|
||||
mark, since extracts may have been taken. */
|
||||
|
||||
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
|
||||
|
||||
offset_top = md->end_offset_top;
|
||||
eptr = md->end_match_ptr;
|
||||
|
||||
/* For a non-repeating ket, just continue at this level. This also
|
||||
happens for a repeating ket if no characters were matched in the group.
|
||||
This is the forcible breaking of infinite loops as implemented in Perl
|
||||
5.005. If there is an options reset, it will get obeyed in the normal
|
||||
course of events. */
|
||||
|
||||
if (*ecode == OP_KET || eptr == saved_eptr)
|
||||
{
|
||||
ecode += 1+LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. The second "call" of match()
|
||||
uses tail recursion, to avoid using another stack frame. We need to reset
|
||||
any options that changed within the bracket before re-running it, so
|
||||
check the next opcode. */
|
||||
|
||||
if (ecode[1+LINK_SIZE] == OP_OPT)
|
||||
{
|
||||
ims = (ims & ~PCRE_IMS) | ecode[4];
|
||||
DPRINTF(("ims set to %02lx at group repeat\n", ims));
|
||||
}
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode = prev;
|
||||
flags = match_isgroup;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
{
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
/* An alternation is the end of a branch; scan along to find the end of the
|
||||
bracketed group and go to there. */
|
||||
@ -1053,114 +1086,114 @@ for (;;)
|
||||
case OP_KET:
|
||||
case OP_KETRMIN:
|
||||
case OP_KETRMAX:
|
||||
prev = ecode - GET(ecode, 1);
|
||||
saved_eptr = eptrb->epb_saved_eptr;
|
||||
|
||||
/* Back up the stack of bracket start pointers. */
|
||||
|
||||
eptrb = eptrb->epb_prev;
|
||||
|
||||
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
|
||||
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
|
||||
*prev == OP_ONCE)
|
||||
{
|
||||
prev = ecode - GET(ecode, 1);
|
||||
saved_eptr = eptrb->epb_saved_eptr;
|
||||
md->end_match_ptr = eptr; /* For ONCE */
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
|
||||
/* Back up the stack of bracket start pointers. */
|
||||
/* In all other cases except a conditional group we have to check the
|
||||
group number back at the start and if necessary complete handling an
|
||||
extraction by setting the offsets and bumping the high water mark. */
|
||||
|
||||
eptrb = eptrb->epb_prev;
|
||||
if (*prev != OP_COND)
|
||||
{
|
||||
number = *prev - OP_BRA;
|
||||
|
||||
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
|
||||
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
|
||||
*prev == OP_ONCE)
|
||||
{
|
||||
md->end_match_ptr = eptr; /* For ONCE */
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
/* For extended extraction brackets (large number), we have to fish out
|
||||
the number from a dummy opcode at the start. */
|
||||
|
||||
/* In all other cases except a conditional group we have to check the
|
||||
group number back at the start and if necessary complete handling an
|
||||
extraction by setting the offsets and bumping the high water mark. */
|
||||
|
||||
if (*prev != OP_COND)
|
||||
{
|
||||
number = *prev - OP_BRA;
|
||||
|
||||
/* For extended extraction brackets (large number), we have to fish out
|
||||
the number from a dummy opcode at the start. */
|
||||
|
||||
if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
|
||||
offset = number << 1;
|
||||
if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
|
||||
offset = number << 1;
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("end bracket %d", number);
|
||||
printf("\n");
|
||||
printf("end bracket %d", number);
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
/* Test for a numbered group. This includes groups called as a result
|
||||
of recursion. Note that whole-pattern recursion is coded as a recurse
|
||||
into group 0, so it won't be picked up here. Instead, we catch it when
|
||||
the OP_END is reached. */
|
||||
/* Test for a numbered group. This includes groups called as a result
|
||||
of recursion. Note that whole-pattern recursion is coded as a recurse
|
||||
into group 0, so it won't be picked up here. Instead, we catch it when
|
||||
the OP_END is reached. */
|
||||
|
||||
if (number > 0)
|
||||
if (number > 0)
|
||||
{
|
||||
md->capture_last = number;
|
||||
if (offset >= md->offset_max) md->offset_overflow = TRUE; else
|
||||
{
|
||||
md->capture_last = number;
|
||||
if (offset >= md->offset_max) md->offset_overflow = TRUE; else
|
||||
{
|
||||
md->offset_vector[offset] =
|
||||
md->offset_vector[md->offset_end - number];
|
||||
md->offset_vector[offset+1] = eptr - md->start_subject;
|
||||
if (offset_top <= offset) offset_top = offset + 2;
|
||||
}
|
||||
|
||||
/* Handle a recursively called group. Restore the offsets
|
||||
appropriately and continue from after the call. */
|
||||
|
||||
if (md->recursive != NULL && md->recursive->group_num == number)
|
||||
{
|
||||
recursion_info *rec = md->recursive;
|
||||
DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
|
||||
md->recursive = rec->prevrec;
|
||||
md->start_match = rec->save_start;
|
||||
memcpy(md->offset_vector, rec->offset_save,
|
||||
rec->saved_max * sizeof(int));
|
||||
ecode = rec->after_call;
|
||||
ims = original_ims;
|
||||
break;
|
||||
}
|
||||
md->offset_vector[offset] =
|
||||
md->offset_vector[md->offset_end - number];
|
||||
md->offset_vector[offset+1] = eptr - md->start_subject;
|
||||
if (offset_top <= offset) offset_top = offset + 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Reset the value of the ims flags, in case they got changed during
|
||||
the group. */
|
||||
/* Handle a recursively called group. Restore the offsets
|
||||
appropriately and continue from after the call. */
|
||||
|
||||
ims = original_ims;
|
||||
DPRINTF(("ims reset to %02lx\n", ims));
|
||||
|
||||
/* For a non-repeating ket, just continue at this level. This also
|
||||
happens for a repeating ket if no characters were matched in the group.
|
||||
This is the forcible breaking of infinite loops as implemented in Perl
|
||||
5.005. If there is an options reset, it will get obeyed in the normal
|
||||
course of events. */
|
||||
|
||||
if (*ecode == OP_KET || eptr == saved_eptr)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. */
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
{
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (md->recursive != NULL && md->recursive->group_num == number)
|
||||
{
|
||||
recursion_info *rec = md->recursive;
|
||||
DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
|
||||
md->recursive = rec->prevrec;
|
||||
md->start_match = rec->save_start;
|
||||
memcpy(md->offset_vector, rec->offset_save,
|
||||
rec->saved_max * sizeof(int));
|
||||
ecode = rec->after_call;
|
||||
ims = original_ims;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Reset the value of the ims flags, in case they got changed during
|
||||
the group. */
|
||||
|
||||
ims = original_ims;
|
||||
DPRINTF(("ims reset to %02lx\n", ims));
|
||||
|
||||
/* For a non-repeating ket, just continue at this level. This also
|
||||
happens for a repeating ket if no characters were matched in the group.
|
||||
This is the forcible breaking of infinite loops as implemented in Perl
|
||||
5.005. If there is an options reset, it will get obeyed in the normal
|
||||
course of events. */
|
||||
|
||||
if (*ecode == OP_KET || eptr == saved_eptr)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. In the second case, we can use
|
||||
tail recursion to avoid using another stack frame. */
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode = prev;
|
||||
flags = match_isgroup;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
{
|
||||
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
/* Start of subject unless notbol, or after internal newline if multiline */
|
||||
|
||||
@ -1168,7 +1201,10 @@ for (;;)
|
||||
if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
if ((ims & PCRE_MULTILINE) != 0)
|
||||
{
|
||||
if (eptr != md->start_subject && eptr[-1] != NEWLINE)
|
||||
if (eptr != md->start_subject &&
|
||||
(eptr == md->end_subject ||
|
||||
eptr < md->start_subject + md->nllen ||
|
||||
!IS_NEWLINE(eptr - md->nllen)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
ecode++;
|
||||
break;
|
||||
@ -1196,7 +1232,7 @@ for (;;)
|
||||
if ((ims & PCRE_MULTILINE) != 0)
|
||||
{
|
||||
if (eptr < md->end_subject)
|
||||
{ if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }
|
||||
{ if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
|
||||
else
|
||||
{ if (md->noteol) RRETURN(MATCH_NOMATCH); }
|
||||
ecode++;
|
||||
@ -1207,14 +1243,14 @@ for (;;)
|
||||
if (md->noteol) RRETURN(MATCH_NOMATCH);
|
||||
if (!md->endonly)
|
||||
{
|
||||
if (eptr < md->end_subject - 1 ||
|
||||
(eptr == md->end_subject - 1 && *eptr != NEWLINE))
|
||||
if (eptr != md->end_subject &&
|
||||
(eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
ecode++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* ... else fall through */
|
||||
/* ... else fall through for endonly */
|
||||
|
||||
/* End of subject assertion (\z) */
|
||||
|
||||
@ -1226,8 +1262,9 @@ for (;;)
|
||||
/* End of subject or ending \n assertion (\Z) */
|
||||
|
||||
case OP_EODN:
|
||||
if (eptr < md->end_subject - 1 ||
|
||||
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);
|
||||
if (eptr != md->end_subject &&
|
||||
(eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
ecode++;
|
||||
break;
|
||||
|
||||
@ -1280,13 +1317,14 @@ for (;;)
|
||||
/* Match a single character type; inline for speed */
|
||||
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
{
|
||||
if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
#endif
|
||||
ecode++;
|
||||
break;
|
||||
|
||||
@ -2573,8 +2611,11 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
|
||||
((ims & PCRE_DOTALL) == 0 &&
|
||||
eptr <= md->end_subject - md->nllen &&
|
||||
IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
break;
|
||||
@ -2659,7 +2700,11 @@ for (;;)
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
{
|
||||
for (i = 1; i <= min; i++)
|
||||
if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);
|
||||
{
|
||||
if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
eptr++;
|
||||
}
|
||||
}
|
||||
else eptr += min;
|
||||
break;
|
||||
@ -2829,13 +2874,15 @@ for (;;)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (fi >= max || eptr >= md->end_subject ||
|
||||
(ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
|
||||
eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
GETCHARINC(c, eptr);
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
|
||||
case OP_ANY: /* This is the DOTALL case */
|
||||
break;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
@ -2884,12 +2931,15 @@ for (;;)
|
||||
{
|
||||
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (fi >= max || eptr >= md->end_subject ||
|
||||
((ims & PCRE_DOTALL) == 0 &&
|
||||
eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
c = *eptr++;
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
|
||||
case OP_ANY: /* This is the DOTALL case */
|
||||
break;
|
||||
|
||||
case OP_ANYBYTE:
|
||||
@ -3075,9 +3125,9 @@ for (;;)
|
||||
{
|
||||
case OP_ANY:
|
||||
|
||||
/* Special code is required for UTF8, but when the maximum is unlimited
|
||||
we don't need it, so we repeat the non-UTF8 code. This is probably
|
||||
worth it, because .* is quite a common idiom. */
|
||||
/* Special code is required for UTF8, but when the maximum is
|
||||
unlimited we don't need it, so we repeat the non-UTF8 code. This is
|
||||
probably worth it, because .* is quite a common idiom. */
|
||||
|
||||
if (max < INT_MAX)
|
||||
{
|
||||
@ -3085,7 +3135,9 @@ for (;;)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || *eptr == NEWLINE) break;
|
||||
if (eptr >= md->end_subject ||
|
||||
(eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
|
||||
break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
@ -3094,6 +3146,7 @@ for (;;)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
@ -3108,7 +3161,9 @@ for (;;)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || *eptr == NEWLINE) break;
|
||||
if (eptr >= md->end_subject ||
|
||||
(eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
|
||||
break;
|
||||
eptr++;
|
||||
}
|
||||
break;
|
||||
@ -3222,7 +3277,9 @@ for (;;)
|
||||
{
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject || *eptr == NEWLINE) break;
|
||||
if (eptr >= md->end_subject ||
|
||||
(eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
|
||||
break;
|
||||
eptr++;
|
||||
}
|
||||
break;
|
||||
@ -3419,7 +3476,8 @@ int rc, resetcount, ocount;
|
||||
int first_byte = -1;
|
||||
int req_byte = -1;
|
||||
int req_byte2 = -1;
|
||||
unsigned long int ims = 0;
|
||||
int newline;
|
||||
unsigned long int ims;
|
||||
BOOL using_temporary_offsets = FALSE;
|
||||
BOOL anchored;
|
||||
BOOL startline;
|
||||
@ -3427,6 +3485,7 @@ BOOL firstline;
|
||||
BOOL first_byte_caseless = FALSE;
|
||||
BOOL req_byte_caseless = FALSE;
|
||||
match_data match_block;
|
||||
match_data *md = &match_block;
|
||||
const uschar *tables;
|
||||
const uschar *start_bits = NULL;
|
||||
USPTR start_match = (USPTR)subject + start_offset;
|
||||
@ -3451,9 +3510,9 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
|
||||
the default values. */
|
||||
|
||||
study = NULL;
|
||||
match_block.match_limit = MATCH_LIMIT;
|
||||
match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;
|
||||
match_block.callout_data = NULL;
|
||||
md->match_limit = MATCH_LIMIT;
|
||||
md->match_limit_recursion = MATCH_LIMIT_RECURSION;
|
||||
md->callout_data = NULL;
|
||||
|
||||
/* The table pointer is always in native byte order. */
|
||||
|
||||
@ -3465,11 +3524,11 @@ if (extra_data != NULL)
|
||||
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
|
||||
match_block.match_limit = extra_data->match_limit;
|
||||
md->match_limit = extra_data->match_limit;
|
||||
if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
|
||||
match_block.match_limit_recursion = extra_data->match_limit_recursion;
|
||||
md->match_limit_recursion = extra_data->match_limit_recursion;
|
||||
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
|
||||
match_block.callout_data = extra_data->callout_data;
|
||||
md->callout_data = extra_data->callout_data;
|
||||
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
|
||||
}
|
||||
|
||||
@ -3499,39 +3558,64 @@ firstline = (re->options & PCRE_FIRSTLINE) != 0;
|
||||
|
||||
/* The code starts after the real_pcre block and the capture name table. */
|
||||
|
||||
match_block.start_code = (const uschar *)external_re + re->name_table_offset +
|
||||
md->start_code = (const uschar *)external_re + re->name_table_offset +
|
||||
re->name_count * re->name_entry_size;
|
||||
|
||||
match_block.start_subject = (USPTR)subject;
|
||||
match_block.start_offset = start_offset;
|
||||
match_block.end_subject = match_block.start_subject + length;
|
||||
end_subject = match_block.end_subject;
|
||||
md->start_subject = (USPTR)subject;
|
||||
md->start_offset = start_offset;
|
||||
md->end_subject = md->start_subject + length;
|
||||
end_subject = md->end_subject;
|
||||
|
||||
match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
||||
match_block.utf8 = (re->options & PCRE_UTF8) != 0;
|
||||
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
||||
md->utf8 = (re->options & PCRE_UTF8) != 0;
|
||||
|
||||
match_block.notbol = (options & PCRE_NOTBOL) != 0;
|
||||
match_block.noteol = (options & PCRE_NOTEOL) != 0;
|
||||
match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
|
||||
match_block.partial = (options & PCRE_PARTIAL) != 0;
|
||||
match_block.hitend = FALSE;
|
||||
md->notbol = (options & PCRE_NOTBOL) != 0;
|
||||
md->noteol = (options & PCRE_NOTEOL) != 0;
|
||||
md->notempty = (options & PCRE_NOTEMPTY) != 0;
|
||||
md->partial = (options & PCRE_PARTIAL) != 0;
|
||||
md->hitend = FALSE;
|
||||
|
||||
match_block.recursive = NULL; /* No recursion at top level */
|
||||
md->recursive = NULL; /* No recursion at top level */
|
||||
|
||||
match_block.lcc = tables + lcc_offset;
|
||||
match_block.ctypes = tables + ctypes_offset;
|
||||
md->lcc = tables + lcc_offset;
|
||||
md->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Handle different types of newline. The two bits give four cases. If nothing
|
||||
is set at run time, whatever was used at compile time applies. */
|
||||
|
||||
switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
|
||||
PCRE_NEWLINE_CRLF)
|
||||
{
|
||||
default: newline = NEWLINE; break; /* Compile-time default */
|
||||
case PCRE_NEWLINE_CR: newline = '\r'; break;
|
||||
case PCRE_NEWLINE_LF: newline = '\n'; break;
|
||||
case PCRE_NEWLINE_CR+
|
||||
PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
|
||||
}
|
||||
|
||||
if (newline > 255)
|
||||
{
|
||||
md->nllen = 2;
|
||||
md->nl[0] = (newline >> 8) & 255;
|
||||
md->nl[1] = newline & 255;
|
||||
}
|
||||
else
|
||||
{
|
||||
md->nllen = 1;
|
||||
md->nl[0] = newline;
|
||||
}
|
||||
|
||||
/* Partial matching is supported only for a restricted set of regexes at the
|
||||
moment. */
|
||||
|
||||
if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)
|
||||
if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
|
||||
return PCRE_ERROR_BADPARTIAL;
|
||||
|
||||
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
|
||||
back the character offset. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
|
||||
if (md->utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
|
||||
{
|
||||
if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
|
||||
return PCRE_ERROR_BADUTF8;
|
||||
@ -3563,17 +3647,17 @@ ocount = offsetcount - (offsetcount % 3);
|
||||
if (re->top_backref > 0 && re->top_backref >= ocount/3)
|
||||
{
|
||||
ocount = re->top_backref * 3 + 3;
|
||||
match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
|
||||
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
|
||||
if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
using_temporary_offsets = TRUE;
|
||||
DPRINTF(("Got memory to hold back references\n"));
|
||||
}
|
||||
else match_block.offset_vector = offsets;
|
||||
else md->offset_vector = offsets;
|
||||
|
||||
match_block.offset_end = ocount;
|
||||
match_block.offset_max = (2*ocount)/3;
|
||||
match_block.offset_overflow = FALSE;
|
||||
match_block.capture_last = -1;
|
||||
md->offset_end = ocount;
|
||||
md->offset_max = (2*ocount)/3;
|
||||
md->offset_overflow = FALSE;
|
||||
md->capture_last = -1;
|
||||
|
||||
/* Compute the minimum number of offsets that we need to reset each time. Doing
|
||||
this makes a huge difference to execution time when there aren't many brackets
|
||||
@ -3586,9 +3670,9 @@ if (resetcount > offsetcount) resetcount = ocount;
|
||||
never be used unless previously set, but they get saved and restored, and so we
|
||||
initialize them to avoid reading uninitialized locations. */
|
||||
|
||||
if (match_block.offset_vector != NULL)
|
||||
if (md->offset_vector != NULL)
|
||||
{
|
||||
register int *iptr = match_block.offset_vector + ocount;
|
||||
register int *iptr = md->offset_vector + ocount;
|
||||
register int *iend = iptr - resetcount/2 + 1;
|
||||
while (--iptr >= iend) *iptr = -1;
|
||||
}
|
||||
@ -3605,7 +3689,7 @@ if (!anchored)
|
||||
{
|
||||
first_byte = re->first_byte & 255;
|
||||
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
|
||||
first_byte = match_block.lcc[first_byte];
|
||||
first_byte = md->lcc[first_byte];
|
||||
}
|
||||
else
|
||||
if (!startline && study != NULL &&
|
||||
@ -3632,9 +3716,9 @@ do
|
||||
|
||||
/* Reset the maximum number of extractions we might see. */
|
||||
|
||||
if (match_block.offset_vector != NULL)
|
||||
if (md->offset_vector != NULL)
|
||||
{
|
||||
register int *iptr = match_block.offset_vector;
|
||||
register int *iptr = md->offset_vector;
|
||||
register int *iend = iptr + resetcount;
|
||||
while (iptr < iend) *iptr++ = -1;
|
||||
}
|
||||
@ -3648,7 +3732,7 @@ do
|
||||
if (firstline)
|
||||
{
|
||||
USPTR t = start_match;
|
||||
while (t < save_end_subject && *t != '\n') t++;
|
||||
while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
@ -3658,20 +3742,22 @@ do
|
||||
{
|
||||
if (first_byte_caseless)
|
||||
while (start_match < end_subject &&
|
||||
match_block.lcc[*start_match] != first_byte)
|
||||
md->lcc[*start_match] != first_byte)
|
||||
start_match++;
|
||||
else
|
||||
while (start_match < end_subject && *start_match != first_byte)
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* Or to just after \n for a multiline match if possible */
|
||||
/* Or to just after a linebreak for a multiline match if possible */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > match_block.start_subject + start_offset)
|
||||
if (start_match >= md->start_subject + md->nllen +
|
||||
start_offset)
|
||||
{
|
||||
while (start_match < end_subject && start_match[-1] != NEWLINE)
|
||||
while (start_match <= end_subject &&
|
||||
!IS_NEWLINE(start_match - md->nllen))
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
@ -3693,7 +3779,7 @@ do
|
||||
|
||||
#ifdef DEBUG /* Sigh. Some compilers never learn. */
|
||||
printf(">>>> Match against: ");
|
||||
pchars(start_match, end_subject - start_match, TRUE, &match_block);
|
||||
pchars(start_match, end_subject - start_match, TRUE, md);
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
@ -3715,7 +3801,7 @@ do
|
||||
|
||||
if (req_byte >= 0 &&
|
||||
end_subject - start_match < REQ_BYTE_MAX &&
|
||||
!match_block.partial)
|
||||
!md->partial)
|
||||
{
|
||||
register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
|
||||
|
||||
@ -3759,11 +3845,10 @@ do
|
||||
those back references that we can. In this case there need not be overflow
|
||||
if certain parts of the pattern were not used. */
|
||||
|
||||
match_block.start_match = start_match;
|
||||
match_block.match_call_count = 0;
|
||||
md->start_match = start_match;
|
||||
md->match_call_count = 0;
|
||||
|
||||
rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
|
||||
match_isgroup, 0);
|
||||
rc = match(start_match, md->start_code, 2, md, ims, NULL, match_isgroup, 0);
|
||||
|
||||
/* When the result is no match, if the subject's first character was a
|
||||
newline and the PCRE_FIRSTLINE option is set, break (which will return
|
||||
@ -3774,10 +3859,13 @@ do
|
||||
|
||||
if (rc == MATCH_NOMATCH)
|
||||
{
|
||||
if (firstline && *start_match == NEWLINE) break;
|
||||
if (firstline &&
|
||||
start_match <= md->end_subject - md->nllen &&
|
||||
IS_NEWLINE(start_match))
|
||||
break;
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (match_block.utf8)
|
||||
if (md->utf8)
|
||||
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
|
||||
start_match++;
|
||||
#endif
|
||||
@ -3797,23 +3885,23 @@ do
|
||||
{
|
||||
if (offsetcount >= 4)
|
||||
{
|
||||
memcpy(offsets + 2, match_block.offset_vector + 2,
|
||||
memcpy(offsets + 2, md->offset_vector + 2,
|
||||
(offsetcount - 2) * sizeof(int));
|
||||
DPRINTF(("Copied offsets from temporary memory\n"));
|
||||
}
|
||||
if (match_block.end_offset_top > offsetcount)
|
||||
match_block.offset_overflow = TRUE;
|
||||
if (md->end_offset_top > offsetcount)
|
||||
md->offset_overflow = TRUE;
|
||||
|
||||
DPRINTF(("Freeing temporary memory\n"));
|
||||
(pcre_free)(match_block.offset_vector);
|
||||
(pcre_free)(md->offset_vector);
|
||||
}
|
||||
|
||||
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
|
||||
rc = md->offset_overflow? 0 : md->end_offset_top/2;
|
||||
|
||||
if (offsetcount < 2) rc = 0; else
|
||||
{
|
||||
offsets[0] = start_match - match_block.start_subject;
|
||||
offsets[1] = match_block.end_match_ptr - match_block.start_subject;
|
||||
offsets[0] = start_match - md->start_subject;
|
||||
offsets[1] = md->end_match_ptr - md->start_subject;
|
||||
}
|
||||
|
||||
DPRINTF((">>>> returning %d\n", rc));
|
||||
@ -3827,10 +3915,10 @@ while (!anchored && start_match <= end_subject);
|
||||
if (using_temporary_offsets)
|
||||
{
|
||||
DPRINTF(("Freeing temporary memory\n"));
|
||||
(pcre_free)(match_block.offset_vector);
|
||||
(pcre_free)(md->offset_vector);
|
||||
}
|
||||
|
||||
if (match_block.partial && match_block.hitend)
|
||||
if (md->partial && md->hitend)
|
||||
{
|
||||
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
|
||||
return PCRE_ERROR_PARTIAL;
|
||||
|
@ -50,8 +50,8 @@ for these functions came from Scott Wimer. */
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the two extraction functions below, as well
|
||||
as being generally available.
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@ -93,6 +93,113 @@ return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable, *lastentry;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0)
|
||||
{
|
||||
uschar *first = entry;
|
||||
uschar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
{
|
||||
const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||
{
|
||||
int n = (entry[0] << 8) + entry[1];
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return (first[0] << 8) + first[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
@ -142,7 +249,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name.
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@ -168,7 +276,7 @@ int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
@ -299,7 +407,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store.
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@ -324,9 +433,10 @@ int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -118,6 +118,14 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
/* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The
|
||||
following macro is used to package up testing for newlines. NLBLOCK is defined
|
||||
in the various modules to indicate in which datablock the parameters exist. */
|
||||
|
||||
#define IS_NEWLINE(p) \
|
||||
((p)[0] == NLBLOCK->nl[0] && \
|
||||
(NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer can be replaced
|
||||
with a custom type. This makes it possible, for example, to allow pcre_exec()
|
||||
to process subject strings that are discontinuous by using a smart pointer
|
||||
@ -164,7 +172,7 @@ case in PCRE. */
|
||||
#if HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
void *
|
||||
static void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
{
|
||||
size_t i;
|
||||
@ -377,16 +385,17 @@ Standard C system should have one. */
|
||||
|
||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||
|
||||
/* Private options flags start at the most significant end of the four bytes,
|
||||
but skip the top bit so we can use ints for convenience without getting tangled
|
||||
with negative values. The public options defined in pcre.h start at the least
|
||||
significant end. Make sure they don't overlap! */
|
||||
/* Private options flags start at the most significant end of the four bytes.
|
||||
The public options defined in pcre.h start at the least significant end. Make
|
||||
sure they don't overlap! The bits are getting a bit scarce now -- when we run
|
||||
out, there is a dummy word in the structure that could be used for the private
|
||||
bits. */
|
||||
|
||||
#define PCRE_NOPARTIAL 0x80000000 /* can't use partial with this regex */
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||
#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */
|
||||
#define PCRE_JCHANGED 0x08000000 /* j option changes within regex */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
@ -398,15 +407,17 @@ time, run time, or study time, respectively. */
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL)
|
||||
PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \
|
||||
PCRE_NEWLINE_LF)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
@ -534,7 +545,7 @@ enum {
|
||||
OP_DOLL, /* 20 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 21 Match one character, casefully */
|
||||
OP_CHARNC, /* 22 Match one character, caselessly */
|
||||
OP_NOT, /* 23 Match anything but the following char */
|
||||
OP_NOT, /* 23 Match one character, not the following one */
|
||||
|
||||
OP_STAR, /* 24 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
|
||||
@ -714,7 +725,8 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@ -778,6 +790,8 @@ typedef struct compile_data {
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
} compile_data;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
@ -802,11 +816,11 @@ typedef struct recursion_info {
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre.c, close to the match() function, so that it is easy to keep it in step
|
||||
with any changes of local variable. However, the pointer to the current frame
|
||||
must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure.
|
||||
NOTE: This isn't used for a "normal" compilation of pcre. */
|
||||
pcre_exec.c, close to the match() function, so that it is easy to keep it in
|
||||
step with any changes of local variable. However, the pointer to the current
|
||||
frame must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure. NOTE:
|
||||
This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
@ -820,6 +834,8 @@ typedef struct match_data {
|
||||
int *offset_vector; /* Offset vector */
|
||||
int offset_end; /* One past the end */
|
||||
int offset_max; /* The maximum usable for return data */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
BOOL offset_overflow; /* Set if too many extractions */
|
||||
@ -853,6 +869,8 @@ typedef struct dfa_match_data {
|
||||
const uschar *tables; /* Character tables */
|
||||
int moptions; /* Match options */
|
||||
int poptions; /* Pattern options */
|
||||
int nllen; /* 1 or 2 for newline string length */
|
||||
uschar nl[4]; /* Newline string */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
} dfa_match_data;
|
||||
|
||||
@ -926,7 +944,7 @@ sense, but are not part of the PCRE public API. */
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findprop(const int, int *, int *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern int _pcre_ucp_othercase(const int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
@ -130,7 +130,9 @@ for (i = 0; i < 256; i++)
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
@ -111,9 +111,9 @@ for (i = _pcre_utt_size; i >= 0; i--)
|
||||
}
|
||||
return (i >= 0)? _pcre_utt[i].name : "??";
|
||||
#else
|
||||
ptype = ptype; /* Avoid compiler warning */
|
||||
pvalue = pvalue;
|
||||
return "??";
|
||||
/* It gets harder and harder to shut off unwanted compiler warnings. */
|
||||
ptype = ptype * pvalue;
|
||||
return (ptype == pvalue)? "??" : "??";
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -182,32 +182,26 @@ for(;;)
|
||||
break;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
break;
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARNC:
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
break;
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
|
@ -95,6 +95,13 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
{
|
||||
register int c;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||
when it was observed to cause compiler warnings about unused values, I took it
|
||||
out again. If anybody is still using OS/2, they will have to put it back
|
||||
manually. */
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
@ -102,6 +109,8 @@ disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
/* ========================================================================= */
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
@ -159,7 +168,11 @@ do
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
@ -215,15 +228,29 @@ do
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@ -277,14 +304,28 @@ do
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
@ -408,10 +449,9 @@ uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
uschar *code;
|
||||
compile_data compile_block;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
@ -427,6 +467,9 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
@ -62,8 +62,8 @@ Arguments:
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static long int
|
||||
byteflip(long int value, int n)
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
|
@ -79,7 +79,7 @@ Returns: the character type category
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ucp_findprop(const int c, int *type_ptr, int *script_ptr)
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
|
@ -332,6 +332,30 @@ bool RE::Replace(const StringPiece& rewrite,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
|
||||
// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
|
||||
static int NewlineMode(int pcre_options) {
|
||||
// TODO: if we can make it threadsafe, cache this var
|
||||
int newline_mode = 0;
|
||||
/* if (newline_mode) return newline_mode; */ // do this once it's cached
|
||||
if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
|
||||
newline_mode = (pcre_options &
|
||||
(PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
|
||||
} else {
|
||||
int newline;
|
||||
pcre_config(PCRE_CONFIG_NEWLINE, &newline);
|
||||
if (newline == 10)
|
||||
newline_mode = PCRE_NEWLINE_LF;
|
||||
else if (newline == 13)
|
||||
newline_mode = PCRE_NEWLINE_CR;
|
||||
else if (newline == 3338)
|
||||
newline_mode = PCRE_NEWLINE_CRLF;
|
||||
else
|
||||
assert("" == "Unexpected return value from pcre_config(NEWLINE)");
|
||||
}
|
||||
return newline_mode;
|
||||
}
|
||||
|
||||
int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
string *str) const {
|
||||
int count = 0;
|
||||
@ -350,9 +374,27 @@ int RE::GlobalReplace(const StringPiece& rewrite,
|
||||
if (matchstart == matchend && matchstart == lastend) {
|
||||
// advance one character if we matched an empty string at the same
|
||||
// place as the last match occurred
|
||||
if (start < static_cast<int>(str->length()))
|
||||
out.push_back((*str)[start]);
|
||||
start++;
|
||||
matchend = start + 1;
|
||||
// If the current char is CR and we're in CRLF mode, skip LF too.
|
||||
// Note it's better to call pcre_fullinfo() than to examine
|
||||
// all_options(), since options_ could have changed bewteen
|
||||
// compile-time and now, but this is simpler and safe enough.
|
||||
if (start+1 < static_cast<int>(str->length()) &&
|
||||
(*str)[start] == '\r' && (*str)[start+1] == '\n' &&
|
||||
NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
|
||||
matchend++;
|
||||
}
|
||||
// We also need to advance more than one char if we're in utf8 mode.
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (options_.utf8()) {
|
||||
while (matchend < static_cast<int>(str->length()) &&
|
||||
((*str)[matchend] & 0xc0) == 0x80)
|
||||
matchend++;
|
||||
}
|
||||
#endif
|
||||
if (matchend <= static_cast<int>(str->length()))
|
||||
out.append(*str, start, matchend - start);
|
||||
start = matchend;
|
||||
} else {
|
||||
out.append(*str, start, matchstart - start);
|
||||
Rewrite(&out, rewrite, *str, vec, matches);
|
||||
|
@ -32,6 +32,7 @@
|
||||
// TODO: Test extractions for PartialMatch/Consume
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include "config.h"
|
||||
#include "pcrecpp.h"
|
||||
@ -259,17 +260,71 @@ static void TestReplace() {
|
||||
"aaaaa",
|
||||
"bbaaaaa",
|
||||
"bbabbabbabbabbabb" },
|
||||
{ "b*",
|
||||
"bb",
|
||||
"aa\naa\n",
|
||||
"bbaa\naa\n",
|
||||
"bbabbabb\nbbabbabb\nbb" },
|
||||
{ "b*",
|
||||
"bb",
|
||||
"aa\raa\r",
|
||||
"bbaa\raa\r",
|
||||
"bbabbabb\rbbabbabb\rbb" },
|
||||
{ "b*",
|
||||
"bb",
|
||||
"aa\r\naa\r\n",
|
||||
"bbaa\r\naa\r\n",
|
||||
"bbabbabb\r\nbbabbabb\r\nbb" },
|
||||
#ifdef SUPPORT_UTF8
|
||||
{ "b*",
|
||||
"bb",
|
||||
"\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
|
||||
"bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
|
||||
"bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
|
||||
{ "b*",
|
||||
"bb",
|
||||
"\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
|
||||
"bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
|
||||
("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
|
||||
"bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
|
||||
#endif
|
||||
{ "", NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
const bool support_utf8 = true;
|
||||
#else
|
||||
const bool support_utf8 = false;
|
||||
#endif
|
||||
|
||||
for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
|
||||
RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
|
||||
assert(re.error().empty());
|
||||
string one(t->original);
|
||||
CHECK(RE(t->regexp).Replace(t->rewrite, &one));
|
||||
CHECK(re.Replace(t->rewrite, &one));
|
||||
CHECK_EQ(one, t->single);
|
||||
string all(t->original);
|
||||
CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
|
||||
CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
|
||||
CHECK_EQ(all, t->global);
|
||||
}
|
||||
|
||||
// One final test: test \r\n replacement when we're not in CRLF mode
|
||||
{
|
||||
RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
|
||||
assert(re.error().empty());
|
||||
string all("aa\r\naa\r\n");
|
||||
CHECK(re.GlobalReplace("bb", &all) > 0);
|
||||
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
|
||||
}
|
||||
{
|
||||
RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
|
||||
assert(re.error().empty());
|
||||
string all("aa\r\naa\r\n");
|
||||
CHECK(re.GlobalReplace("bb", &all) > 0);
|
||||
CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
|
||||
}
|
||||
// TODO: test what happens when no PCRE_NEWLINE_* flag is set.
|
||||
// Alas, the answer depends on how pcre was compiled.
|
||||
}
|
||||
|
||||
static void TestExtract() {
|
||||
|
@ -117,7 +117,7 @@ if (rc < 0)
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
free(re); /* Release memory used for the compiled pattern */
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -223,8 +223,8 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
||||
|
||||
if (!find_all)
|
||||
{
|
||||
free(re); /* Release the memory used for the compiled pattern */
|
||||
return 0; /* Finish unless -g was given */
|
||||
pcre_free(re); /* Release the memory used for the compiled pattern */
|
||||
return 0; /* Finish unless -g was given */
|
||||
}
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
@ -276,7 +276,7 @@ for (;;)
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
free(re); /* Release memory used for the compiled pattern */
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -317,7 +317,7 @@ for (;;)
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
free(re); /* Release memory used for the compiled pattern */
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#define VERSION "4.2 09-Jan-2006"
|
||||
#define VERSION "4.3 01-Jun-2006"
|
||||
#define MAX_PATTERN_COUNT 100
|
||||
|
||||
#if BUFSIZ > 8192
|
||||
@ -100,10 +100,14 @@ static const char *jfriedl_prefix = "";
|
||||
static const char *jfriedl_postfix = "";
|
||||
#endif
|
||||
|
||||
static int endlinebyte = '\n'; /* Last byte of endline sequence */
|
||||
static int endlineextra = 0; /* Extra bytes for endline sequence */
|
||||
|
||||
static char *colour_string = (char *)"1;31";
|
||||
static char *colour_option = NULL;
|
||||
static char *dee_option = NULL;
|
||||
static char *DEE_option = NULL;
|
||||
static char *newline = NULL;
|
||||
static char *pattern_filename = NULL;
|
||||
static char *stdin_name = (char *)"(standard input)";
|
||||
static char *locale = NULL;
|
||||
@ -185,6 +189,7 @@ static option_item optionlist[] = {
|
||||
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
|
||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
|
||||
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
|
||||
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
|
||||
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
|
||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||
@ -493,8 +498,9 @@ if (after_context > 0 && lastmatchnumber > 0)
|
||||
char *pp = lastmatchrestart;
|
||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
||||
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
||||
while (*pp != '\n') pp++;
|
||||
fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
|
||||
while (*pp != endlinebyte) pp++;
|
||||
fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
|
||||
stdout);
|
||||
lastmatchrestart = pp + 1;
|
||||
}
|
||||
hyphenpending = TRUE;
|
||||
@ -566,7 +572,7 @@ while (ptr < endptr)
|
||||
that any match is constrained to be in the first line. */
|
||||
|
||||
linelength = 0;
|
||||
while (t < endptr && *t++ != '\n') linelength++;
|
||||
while (t < endptr && *t++ != endlinebyte) linelength++;
|
||||
length = multiline? endptr - ptr : linelength;
|
||||
|
||||
|
||||
@ -705,7 +711,7 @@ while (ptr < endptr)
|
||||
|
||||
while (p < ptr && linecount < after_context)
|
||||
{
|
||||
while (*p != '\n') p++;
|
||||
while (*p != endlinebyte) p++;
|
||||
p++;
|
||||
linecount++;
|
||||
}
|
||||
@ -719,8 +725,9 @@ while (ptr < endptr)
|
||||
char *pp = lastmatchrestart;
|
||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
||||
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
|
||||
while (*pp != '\n') pp++;
|
||||
fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
|
||||
while (*pp != endlinebyte) pp++;
|
||||
fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
|
||||
(1 + endlineextra), stdout);
|
||||
lastmatchrestart = pp + 1;
|
||||
}
|
||||
if (lastmatchrestart != ptr) hyphenpending = TRUE;
|
||||
@ -748,7 +755,7 @@ while (ptr < endptr)
|
||||
{
|
||||
linecount++;
|
||||
p--;
|
||||
while (p > buffer && p[-1] != '\n') p--;
|
||||
while (p > buffer && p[-1] != endlinebyte) p--;
|
||||
}
|
||||
|
||||
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
|
||||
@ -759,8 +766,8 @@ while (ptr < endptr)
|
||||
char *pp = p;
|
||||
if (printname != NULL) fprintf(stdout, "%s-", printname);
|
||||
if (number) fprintf(stdout, "%d-", linenumber - linecount--);
|
||||
while (*pp != '\n') pp++;
|
||||
fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */
|
||||
while (*pp != endlinebyte) pp++;
|
||||
fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
|
||||
p = pp + 1;
|
||||
}
|
||||
}
|
||||
@ -777,14 +784,14 @@ while (ptr < endptr)
|
||||
/* In multiline mode, we want to print to the end of the line in which
|
||||
the end of the matched string is found, so we adjust linelength and the
|
||||
line number appropriately. Because the PCRE_FIRSTLINE option is set, the
|
||||
start of the match will always be before the first \n character. */
|
||||
start of the match will always be before the first newline sequence. */
|
||||
|
||||
if (multiline)
|
||||
{
|
||||
char *endmatch = ptr + offsets[1];
|
||||
t = ptr;
|
||||
while (t < endmatch) { if (*t++ == '\n') linenumber++; }
|
||||
while (endmatch < endptr && *endmatch != '\n') endmatch++;
|
||||
while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
|
||||
while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
|
||||
linelength = endmatch - ptr;
|
||||
}
|
||||
|
||||
@ -1206,7 +1213,7 @@ return FALSE;
|
||||
*************************************************/
|
||||
|
||||
/* When the -F option has been used, each string may be a list of strings,
|
||||
separated by newlines. They will be matched literally.
|
||||
separated by line breaks. They will be matched literally.
|
||||
|
||||
Arguments:
|
||||
pattern the pattern string
|
||||
@ -1227,10 +1234,10 @@ if ((process_options & PO_FIXED_STRINGS) != 0)
|
||||
char buffer[MBUFTHIRD];
|
||||
for(;;)
|
||||
{
|
||||
char *p = strchr(pattern, '\n');
|
||||
char *p = strchr(pattern, endlinebyte);
|
||||
if (p == NULL)
|
||||
return compile_single_pattern(pattern, options, filename, count);
|
||||
sprintf(buffer, "%.*s", p - pattern, pattern);
|
||||
sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
|
||||
pattern = p + 1;
|
||||
if (!compile_single_pattern(buffer, options, filename, count))
|
||||
return FALSE;
|
||||
@ -1260,6 +1267,16 @@ char *patterns[MAX_PATTERN_COUNT];
|
||||
const char *locale_from = "--locale";
|
||||
const char *error;
|
||||
|
||||
/* Set the default line ending value from the default in the PCRE library. */
|
||||
|
||||
(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
|
||||
switch(i)
|
||||
{
|
||||
default: newline = (char *)"lf"; break;
|
||||
case '\r': newline = (char *)"cr"; break;
|
||||
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
|
||||
}
|
||||
|
||||
/* Process the options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
@ -1543,6 +1560,28 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
|
||||
}
|
||||
}
|
||||
|
||||
/* Interpret the newline type; the default settings are Unix-like. */
|
||||
|
||||
if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
|
||||
{
|
||||
pcre_options |= PCRE_NEWLINE_CR;
|
||||
endlinebyte = '\r';
|
||||
}
|
||||
else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
|
||||
{
|
||||
pcre_options |= PCRE_NEWLINE_LF;
|
||||
}
|
||||
else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
|
||||
{
|
||||
pcre_options |= PCRE_NEWLINE_CRLF;
|
||||
endlineextra = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* Interpret the text values for -d and -D */
|
||||
|
||||
if (dee_option != NULL)
|
||||
|
@ -77,7 +77,7 @@ static const int eint[] = {
|
||||
REG_ASSERT, /* internal error: code overflow */
|
||||
REG_BADPAT, /* unrecognized character after (?< */
|
||||
REG_BADPAT, /* lookbehind assertion is not fixed length */
|
||||
REG_BADPAT, /* malformed number after (?( */
|
||||
REG_BADPAT, /* malformed number or name after (?( */
|
||||
REG_BADPAT, /* conditional group containe more than two branches */
|
||||
REG_BADPAT, /* assertion expected after (?( */
|
||||
REG_BADPAT, /* (?R or (?digits must be followed by ) */
|
||||
@ -94,11 +94,15 @@ static const int eint[] = {
|
||||
REG_BADPAT, /* recursive call could loop indefinitely */
|
||||
REG_BADPAT, /* unrecognized character after (?P */
|
||||
REG_BADPAT, /* syntax error after (?P */
|
||||
REG_BADPAT, /* two named groups have the same name */
|
||||
REG_BADPAT, /* two named subpatterns have the same name */
|
||||
REG_BADPAT, /* invalid UTF-8 string */
|
||||
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
|
||||
REG_BADPAT, /* malformed \P or \p sequence */
|
||||
REG_BADPAT /* unknown property name after \P or \p */
|
||||
REG_BADPAT, /* unknown property name after \P or \p */
|
||||
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
|
||||
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
|
||||
REG_BADPAT, /* repeated subpattern is too long */
|
||||
REG_BADPAT /* octal value is greater than \377 (not in UTF-8 mode) */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <locale.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#define PCRE_SPY /* For Win32 build, import data, not export */
|
||||
|
||||
/* We include pcre_internal.h because we need the internal info for displaying
|
||||
@ -101,11 +105,6 @@ function (define NOINFOCHECK). */
|
||||
|
||||
#define LOOPREPEAT 500000
|
||||
|
||||
#define BUFFER_SIZE 30000
|
||||
#define PBUFFER_SIZE BUFFER_SIZE
|
||||
#define DBUFFER_SIZE BUFFER_SIZE
|
||||
|
||||
|
||||
/* Static variables */
|
||||
|
||||
static FILE *outfile;
|
||||
@ -119,10 +118,95 @@ static int show_malloc;
|
||||
static int use_utf8;
|
||||
static size_t gotten_store;
|
||||
|
||||
/* The buffers grow automatically if very long input lines are encountered. */
|
||||
|
||||
static int buffer_size = 50000;
|
||||
static uschar *buffer = NULL;
|
||||
static uschar *dbuffer = NULL;
|
||||
static uschar *pbuffer = NULL;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read or extend an input line *
|
||||
*************************************************/
|
||||
|
||||
/* Input lines are read into buffer, but both patterns and data lines can be
|
||||
continued over multiple input lines. In addition, if the buffer fills up, we
|
||||
want to automatically expand it so as to be able to handle extremely large
|
||||
lines that are needed for certain stress tests. When the input buffer is
|
||||
expanded, the other two buffers must also be expanded likewise, and the
|
||||
contents of pbuffer, which are a copy of the input for callouts, must be
|
||||
preserved (for when expansion happens for a data line). This is not the most
|
||||
optimal way of handling this, but hey, this is just a test program!
|
||||
|
||||
Arguments:
|
||||
f the file to read
|
||||
start where in buffer to start (this *must* be within buffer)
|
||||
|
||||
Returns: pointer to the start of new data
|
||||
could be a copy of start, or could be moved
|
||||
NULL if no data read and EOF reached
|
||||
*/
|
||||
|
||||
static uschar *
|
||||
extend_inputline(FILE *f, uschar *start)
|
||||
{
|
||||
uschar *here = start;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int rlen = buffer_size - (here - buffer);
|
||||
if (rlen > 1000)
|
||||
{
|
||||
int dlen;
|
||||
if (fgets((char *)here, rlen, f) == NULL)
|
||||
return (here == start)? NULL : start;
|
||||
dlen = (int)strlen((char *)here);
|
||||
if (dlen > 0 && here[dlen - 1] == '\n') return start;
|
||||
here += dlen;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
int new_buffer_size = 2*buffer_size;
|
||||
uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
|
||||
uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
|
||||
uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
|
||||
|
||||
if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
memcpy(new_buffer, buffer, buffer_size);
|
||||
memcpy(new_pbuffer, pbuffer, buffer_size);
|
||||
|
||||
buffer_size = new_buffer_size;
|
||||
|
||||
start = new_buffer + (start - buffer);
|
||||
here = new_buffer + (here - buffer);
|
||||
|
||||
free(buffer);
|
||||
free(dbuffer);
|
||||
free(pbuffer);
|
||||
|
||||
buffer = new_buffer;
|
||||
dbuffer = new_dbuffer;
|
||||
pbuffer = new_pbuffer;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL; /* Control never gets here */
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read number from string *
|
||||
*************************************************/
|
||||
@ -159,19 +243,19 @@ return(result);
|
||||
and returns the value of the character.
|
||||
|
||||
Argument:
|
||||
buffer a pointer to the byte vector
|
||||
vptr a pointer to an int to receive the value
|
||||
utf8bytes a pointer to the byte vector
|
||||
vptr a pointer to an int to receive the value
|
||||
|
||||
Returns: > 0 => the number of bytes consumed
|
||||
-6 to 0 => malformed UTF-8 character at offset = (-return)
|
||||
Returns: > 0 => the number of bytes consumed
|
||||
-6 to 0 => malformed UTF-8 character at offset = (-return)
|
||||
*/
|
||||
|
||||
#if !defined NOUTF8
|
||||
|
||||
static int
|
||||
utf82ord(unsigned char *buffer, int *vptr)
|
||||
utf82ord(unsigned char *utf8bytes, int *vptr)
|
||||
{
|
||||
int c = *buffer++;
|
||||
int c = *utf8bytes++;
|
||||
int d = c;
|
||||
int i, j, s;
|
||||
|
||||
@ -191,7 +275,7 @@ d = (c & utf8_table3[i]) << s;
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
c = *buffer++;
|
||||
c = *utf8bytes++;
|
||||
if ((c & 0xc0) != 0x80) return -(j+1);
|
||||
s -= 6;
|
||||
d |= (c & 0x3f) << s;
|
||||
@ -222,24 +306,24 @@ and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 bytes long
|
||||
utf8bytes pointer to buffer for result - at least 6 bytes long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
static int
|
||||
ord2utf8(int cvalue, uschar *buffer)
|
||||
ord2utf8(int cvalue, uschar *utf8bytes)
|
||||
{
|
||||
register int i, j;
|
||||
for (i = 0; i < utf8_table1_size; i++)
|
||||
if (cvalue <= utf8_table1[i]) break;
|
||||
buffer += i;
|
||||
utf8bytes += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
*utf8bytes-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = utf8_table2[i] | cvalue;
|
||||
*utf8bytes = utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
@ -461,8 +545,8 @@ if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
|
||||
* Byte flipping function *
|
||||
*************************************************/
|
||||
|
||||
static long int
|
||||
byteflip(long int value, int n)
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
@ -525,6 +609,32 @@ return count;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check newline indicator *
|
||||
*************************************************/
|
||||
|
||||
/* This is used both at compile and run-time to check for <xxx> escapes, where
|
||||
xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
|
||||
|
||||
Arguments:
|
||||
p points after the leading '<'
|
||||
f file for error message
|
||||
|
||||
Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
|
||||
*/
|
||||
|
||||
static int
|
||||
check_newline(uschar *p, FILE *f)
|
||||
{
|
||||
if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
|
||||
if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
|
||||
if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
|
||||
fprintf(f, "Unknown newline type at: <%s\n", p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Main Program *
|
||||
*************************************************/
|
||||
@ -553,16 +663,23 @@ int debug = 0;
|
||||
int done = 0;
|
||||
int all_use_dfa = 0;
|
||||
int yield = 0;
|
||||
int stack_size;
|
||||
|
||||
unsigned char *buffer;
|
||||
unsigned char *dbuffer;
|
||||
/* These vectors store, end-to-end, a list of captured substring names. Assume
|
||||
that 1024 is plenty long enough for the few names we'll be testing. */
|
||||
|
||||
uschar copynames[1024];
|
||||
uschar getnames[1024];
|
||||
|
||||
uschar *copynamesptr;
|
||||
uschar *getnamesptr;
|
||||
|
||||
/* Get buffers from malloc() so that Electric Fence will check their misuse
|
||||
when I am debugging. */
|
||||
when I am debugging. They grow automatically when very long lines are read. */
|
||||
|
||||
buffer = (unsigned char *)malloc(BUFFER_SIZE);
|
||||
dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
|
||||
pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
|
||||
buffer = (unsigned char *)malloc(buffer_size);
|
||||
dbuffer = (unsigned char *)malloc(buffer_size);
|
||||
pbuffer = (unsigned char *)malloc(buffer_size);
|
||||
|
||||
/* The outfile variable is static so that new_malloc can use it. The _setmode()
|
||||
stuff is some magic that I don't understand, but which apparently does good
|
||||
@ -596,6 +713,28 @@ while (argc > 1 && argv[op][0] == '-')
|
||||
op++;
|
||||
argc--;
|
||||
}
|
||||
else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
|
||||
((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
|
||||
*endptr == 0))
|
||||
{
|
||||
#ifdef _WIN32
|
||||
printf("PCRE: -S not supported on this OS\n");
|
||||
exit(1);
|
||||
#else
|
||||
int rc;
|
||||
struct rlimit rlim;
|
||||
getrlimit(RLIMIT_STACK, &rlim);
|
||||
rlim.rlim_cur = stack_size * 1024 * 1024;
|
||||
rc = setrlimit(RLIMIT_STACK, &rlim);
|
||||
if (rc != 0)
|
||||
{
|
||||
printf("PCRE: setrlimit() failed with error %d\n", rc);
|
||||
exit(1);
|
||||
}
|
||||
op++;
|
||||
argc--;
|
||||
#endif
|
||||
}
|
||||
#if !defined NOPOSIX
|
||||
else if (strcmp(argv[op], "-p") == 0) posix = 1;
|
||||
#endif
|
||||
@ -609,7 +748,8 @@ while (argc > 1 && argv[op][0] == '-')
|
||||
(void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
|
||||
printf(" %sUnicode properties support\n", rc? "" : "No ");
|
||||
(void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
|
||||
printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
|
||||
printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
|
||||
(rc == '\n')? "LF" : "CRLF");
|
||||
(void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
|
||||
printf(" Internal link size = %d\n", rc);
|
||||
(void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
|
||||
@ -625,7 +765,7 @@ while (argc > 1 && argv[op][0] == '-')
|
||||
else
|
||||
{
|
||||
printf("** Unknown or malformed option %s\n", argv[op]);
|
||||
printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
|
||||
printf("Usage: pcretest [options] [<input> [<output>]]\n");
|
||||
printf(" -C show PCRE compile-time options and exit\n");
|
||||
printf(" -d debug: show compiled code; implies -i\n");
|
||||
#if !defined NODFA
|
||||
@ -637,6 +777,7 @@ while (argc > 1 && argv[op][0] == '-')
|
||||
#if !defined NOPOSIX
|
||||
printf(" -p use POSIX interface\n");
|
||||
#endif
|
||||
printf(" -S <n> set stack size to <n> megabytes\n");
|
||||
printf(" -s output store (memory) used information\n"
|
||||
" -t time compilation and execution\n");
|
||||
yield = 1;
|
||||
@ -723,7 +864,7 @@ while (!done)
|
||||
use_utf8 = 0;
|
||||
|
||||
if (infile == stdin) printf(" re> ");
|
||||
if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
|
||||
if (extend_inputline(infile, buffer) == NULL) break;
|
||||
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
|
||||
fflush(outfile);
|
||||
|
||||
@ -735,7 +876,7 @@ while (!done)
|
||||
|
||||
if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
|
||||
{
|
||||
unsigned long int magic;
|
||||
unsigned long int magic, get_options;
|
||||
uschar sbuf[8];
|
||||
FILE *f;
|
||||
|
||||
@ -783,8 +924,8 @@ while (!done)
|
||||
|
||||
/* Need to know if UTF-8 for printing data strings */
|
||||
|
||||
new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
|
||||
use_utf8 = (options & PCRE_UTF8) != 0;
|
||||
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
|
||||
use_utf8 = (get_options & PCRE_UTF8) != 0;
|
||||
|
||||
/* Now see if there is any following study data */
|
||||
|
||||
@ -838,16 +979,8 @@ while (!done)
|
||||
pp++;
|
||||
}
|
||||
if (*pp != 0) break;
|
||||
|
||||
len = BUFFER_SIZE - (pp - buffer);
|
||||
if (len < 256)
|
||||
{
|
||||
fprintf(outfile, "** Expression too long - missing delimiter?\n");
|
||||
goto SKIP_DATA;
|
||||
}
|
||||
|
||||
if (infile == stdin) printf(" > ");
|
||||
if (fgets((char *)pp, len, infile) == NULL)
|
||||
if ((pp = extend_inputline(infile, pp)) == NULL)
|
||||
{
|
||||
fprintf(outfile, "** Unexpected EOF\n");
|
||||
done = 1;
|
||||
@ -893,6 +1026,7 @@ while (!done)
|
||||
case 'F': do_flip = 1; break;
|
||||
case 'G': do_G = 1; break;
|
||||
case 'I': do_showinfo = 1; break;
|
||||
case 'J': options |= PCRE_DUPNAMES; break;
|
||||
case 'M': log_store = 1; break;
|
||||
case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
|
||||
|
||||
@ -927,6 +1061,15 @@ while (!done)
|
||||
*pp = 0;
|
||||
break;
|
||||
|
||||
case '<':
|
||||
{
|
||||
int x = check_newline(pp, outfile);
|
||||
if (x == 0) goto SKIP_DATA;
|
||||
options |= x;
|
||||
while (*pp++ != '>');
|
||||
}
|
||||
break;
|
||||
|
||||
case '\r': /* So that it works in Windows */
|
||||
case '\n':
|
||||
case ' ':
|
||||
@ -961,7 +1104,7 @@ while (!done)
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
(void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
|
||||
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
|
||||
fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
|
||||
goto SKIP_DATA;
|
||||
}
|
||||
@ -1002,7 +1145,7 @@ while (!done)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
|
||||
if (extend_inputline(infile, buffer) == NULL)
|
||||
{
|
||||
done = 1;
|
||||
goto CONTINUE;
|
||||
@ -1163,13 +1306,13 @@ while (!done)
|
||||
if (do_flip)
|
||||
{
|
||||
all_options = byteflip(all_options, sizeof(all_options));
|
||||
}
|
||||
}
|
||||
|
||||
if ((all_options & PCRE_NOPARTIAL) != 0)
|
||||
fprintf(outfile, "Partial matching not supported\n");
|
||||
|
||||
if (get_options == 0) fprintf(outfile, "No options\n");
|
||||
else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
|
||||
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
|
||||
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
|
||||
@ -1181,14 +1324,30 @@ while (!done)
|
||||
((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
|
||||
((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
|
||||
((get_options & PCRE_UTF8) != 0)? " utf8" : "",
|
||||
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
|
||||
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
|
||||
((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
|
||||
|
||||
if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
|
||||
fprintf(outfile, "Case state changes\n");
|
||||
switch (get_options & PCRE_NEWLINE_CRLF)
|
||||
{
|
||||
case PCRE_NEWLINE_CR:
|
||||
fprintf(outfile, "Forced newline sequence: CR\n");
|
||||
break;
|
||||
|
||||
case PCRE_NEWLINE_LF:
|
||||
fprintf(outfile, "Forced newline sequence: LF\n");
|
||||
break;
|
||||
|
||||
case PCRE_NEWLINE_CRLF:
|
||||
fprintf(outfile, "Forced newline sequence: CRLF\n");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (first_char == -1)
|
||||
{
|
||||
fprintf(outfile, "First char at start or follows \\n\n");
|
||||
fprintf(outfile, "First char at start or follows newline\n");
|
||||
}
|
||||
else if (first_char < 0)
|
||||
{
|
||||
@ -1343,6 +1502,12 @@ while (!done)
|
||||
|
||||
options = 0;
|
||||
|
||||
*copynames = 0;
|
||||
*getnames = 0;
|
||||
|
||||
copynamesptr = copynames;
|
||||
getnamesptr = getnames;
|
||||
|
||||
pcre_callout = callout;
|
||||
first_callout = 1;
|
||||
callout_extra = 0;
|
||||
@ -1351,15 +1516,24 @@ while (!done)
|
||||
callout_fail_id = -1;
|
||||
show_malloc = 0;
|
||||
|
||||
if (infile == stdin) printf("data> ");
|
||||
if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
|
||||
{
|
||||
done = 1;
|
||||
goto CONTINUE;
|
||||
}
|
||||
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
|
||||
if (extra != NULL) extra->flags &=
|
||||
~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
|
||||
|
||||
len = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (infile == stdin) printf("data> ");
|
||||
if (extend_inputline(infile, buffer + len) == NULL)
|
||||
{
|
||||
if (len > 0) break;
|
||||
done = 1;
|
||||
goto CONTINUE;
|
||||
}
|
||||
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
|
||||
len = (int)strlen((char *)buffer);
|
||||
if (buffer[len-1] == '\n') break;
|
||||
}
|
||||
|
||||
len = (int)strlen((char *)buffer);
|
||||
while (len > 0 && isspace(buffer[len-1])) len--;
|
||||
buffer[len] = 0;
|
||||
if (len == 0) break;
|
||||
@ -1389,6 +1563,17 @@ while (!done)
|
||||
c -= '0';
|
||||
while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
|
||||
c = c * 8 + *p++ - '0';
|
||||
|
||||
#if !defined NOUTF8
|
||||
if (use_utf8 && c > 255)
|
||||
{
|
||||
unsigned char buff8[8];
|
||||
int ii, utn;
|
||||
utn = ord2utf8(c, buff8);
|
||||
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
|
||||
c = buff8[ii]; /* Last byte */
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
@ -1450,14 +1635,14 @@ while (!done)
|
||||
}
|
||||
else if (isalnum(*p))
|
||||
{
|
||||
uschar name[256];
|
||||
uschar *npp = name;
|
||||
uschar *npp = copynamesptr;
|
||||
while (isalnum(*p)) *npp++ = *p++;
|
||||
*npp++ = 0;
|
||||
*npp = 0;
|
||||
n = pcre_get_stringnumber(re, (char *)name);
|
||||
n = pcre_get_stringnumber(re, (char *)copynamesptr);
|
||||
if (n < 0)
|
||||
fprintf(outfile, "no parentheses with name \"%s\"\n", name);
|
||||
else copystrings |= 1 << n;
|
||||
fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
|
||||
copynamesptr = npp;
|
||||
}
|
||||
else if (*p == '+')
|
||||
{
|
||||
@ -1518,14 +1703,14 @@ while (!done)
|
||||
}
|
||||
else if (isalnum(*p))
|
||||
{
|
||||
uschar name[256];
|
||||
uschar *npp = name;
|
||||
uschar *npp = getnamesptr;
|
||||
while (isalnum(*p)) *npp++ = *p++;
|
||||
*npp++ = 0;
|
||||
*npp = 0;
|
||||
n = pcre_get_stringnumber(re, (char *)name);
|
||||
n = pcre_get_stringnumber(re, (char *)getnamesptr);
|
||||
if (n < 0)
|
||||
fprintf(outfile, "no parentheses with name \"%s\"\n", name);
|
||||
else getstrings |= 1 << n;
|
||||
fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
|
||||
getnamesptr = npp;
|
||||
}
|
||||
continue;
|
||||
|
||||
@ -1564,6 +1749,28 @@ while (!done)
|
||||
options |= PCRE_PARTIAL;
|
||||
continue;
|
||||
|
||||
case 'Q':
|
||||
while(isdigit(*p)) n = n * 10 + *p++ - '0';
|
||||
if (extra == NULL)
|
||||
{
|
||||
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
||||
extra->flags = 0;
|
||||
}
|
||||
extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra->match_limit_recursion = n;
|
||||
continue;
|
||||
|
||||
case 'q':
|
||||
while(isdigit(*p)) n = n * 10 + *p++ - '0';
|
||||
if (extra == NULL)
|
||||
{
|
||||
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
||||
extra->flags = 0;
|
||||
}
|
||||
extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
|
||||
extra->match_limit = n;
|
||||
continue;
|
||||
|
||||
#if !defined NODFA
|
||||
case 'R':
|
||||
options |= PCRE_DFA_RESTART;
|
||||
@ -1581,6 +1788,15 @@ while (!done)
|
||||
case '?':
|
||||
options |= PCRE_NO_UTF8_CHECK;
|
||||
continue;
|
||||
|
||||
case '<':
|
||||
{
|
||||
int x = check_newline(p, outfile);
|
||||
if (x == 0) goto NEXT_DATA;
|
||||
options |= x;
|
||||
while (*p++ != '>');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
*q++ = c;
|
||||
}
|
||||
@ -1611,7 +1827,7 @@ while (!done)
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
(void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
|
||||
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
|
||||
fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
|
||||
}
|
||||
else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
|
||||
@ -1690,7 +1906,7 @@ while (!done)
|
||||
extra->flags = 0;
|
||||
}
|
||||
|
||||
count = check_match_limit(re, extra, bptr, len, start_offset,
|
||||
(void)check_match_limit(re, extra, bptr, len, start_offset,
|
||||
options|g_notempty, use_offsets, use_size_offsets,
|
||||
PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
|
||||
PCRE_ERROR_MATCHLIMIT, "match()");
|
||||
@ -1778,7 +1994,7 @@ while (!done)
|
||||
{
|
||||
if ((copystrings & (1 << i)) != 0)
|
||||
{
|
||||
char copybuffer[16];
|
||||
char copybuffer[256];
|
||||
int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
|
||||
i, copybuffer, sizeof(copybuffer));
|
||||
if (rc < 0)
|
||||
@ -1788,6 +2004,19 @@ while (!done)
|
||||
}
|
||||
}
|
||||
|
||||
for (copynamesptr = copynames;
|
||||
*copynamesptr != 0;
|
||||
copynamesptr += (int)strlen((char*)copynamesptr) + 1)
|
||||
{
|
||||
char copybuffer[256];
|
||||
int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
|
||||
count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
|
||||
if (rc < 0)
|
||||
fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
|
||||
else
|
||||
fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
if ((getstrings & (1 << i)) != 0)
|
||||
@ -1800,12 +2029,27 @@ while (!done)
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
|
||||
/* free((void *)substring); */
|
||||
pcre_free_substring(substring);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (getnamesptr = getnames;
|
||||
*getnamesptr != 0;
|
||||
getnamesptr += (int)strlen((char*)getnamesptr) + 1)
|
||||
{
|
||||
const char *substring;
|
||||
int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
|
||||
count, (char *)getnamesptr, &substring);
|
||||
if (rc < 0)
|
||||
fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
|
||||
else
|
||||
{
|
||||
fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
|
||||
pcre_free_substring(substring);
|
||||
}
|
||||
}
|
||||
|
||||
if (getlist)
|
||||
{
|
||||
const char **stringlist;
|
||||
@ -1905,6 +2149,8 @@ while (!done)
|
||||
len -= use_offsets[1];
|
||||
}
|
||||
} /* End of loop for /g and /G */
|
||||
|
||||
NEXT_DATA: continue;
|
||||
} /* End of loop for data lines */
|
||||
|
||||
CONTINUE:
|
||||
|
7
ext/pcre/pcrelib/testdata/grepinputx
vendored
7
ext/pcre/pcrelib/testdata/grepinputx
vendored
@ -39,4 +39,11 @@ eighteen
|
||||
nineteen
|
||||
twenty
|
||||
|
||||
Here follows some CR/LF/CRLF test data.
|
||||
|
||||
abc
|
||||
def
|
||||
ghi
|
||||
jkl
|
||||
|
||||
This is the last line of this file.
|
||||
|
41
ext/pcre/pcrelib/testdata/testinput1
vendored
41
ext/pcre/pcrelib/testdata/testinput1
vendored
@ -1474,11 +1474,11 @@
|
||||
/(abc)\323/
|
||||
abc\xd3
|
||||
|
||||
/(abc)\500/
|
||||
/(abc)\100/
|
||||
abc\x40
|
||||
abc\100
|
||||
|
||||
/(abc)\5000/
|
||||
/(abc)\1000/
|
||||
abc\x400
|
||||
abc\x40\x30
|
||||
abc\1000
|
||||
@ -3847,4 +3847,41 @@
|
||||
** Failers
|
||||
abcddefg
|
||||
|
||||
/(?<![^f]oo)(bar)/
|
||||
foobarX
|
||||
** Failers
|
||||
boobarX
|
||||
|
||||
/(?<![^f])X/
|
||||
offX
|
||||
** Failers
|
||||
onyX
|
||||
|
||||
/(?<=[^f])X/
|
||||
onyX
|
||||
** Failers
|
||||
offX
|
||||
|
||||
/^/mg
|
||||
a\nb\nc\n
|
||||
\
|
||||
|
||||
/(?<=C\n)^/mg
|
||||
A\nC\nC\n
|
||||
|
||||
/(?:(?(1)a|b)(X))+/
|
||||
bXaX
|
||||
|
||||
/(?:(?(1)\1a|b)(X|Y))+/
|
||||
bXXaYYaY
|
||||
bXYaXXaX
|
||||
|
||||
/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
|
||||
bXXaYYaY
|
||||
|
||||
/[[,abc,]+]/
|
||||
abc]
|
||||
a,b]
|
||||
[a,b,c]
|
||||
|
||||
/ End of testinput1 /
|
||||
|
155
ext/pcre/pcrelib/testdata/testinput2
vendored
155
ext/pcre/pcrelib/testdata/testinput2
vendored
@ -733,7 +733,7 @@
|
||||
Ab
|
||||
AB
|
||||
|
||||
/[\200-\410]/
|
||||
/[\200-\110]/
|
||||
|
||||
/^(?(0)f|b)oo/
|
||||
|
||||
@ -1490,4 +1490,157 @@
|
||||
|
||||
/\x{0000ff}/
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/J
|
||||
a1b\CA
|
||||
a2b\CA
|
||||
** Failers
|
||||
a1b\CZ\CA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)/J
|
||||
ab\CA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd/J
|
||||
ab\CA
|
||||
cd\CA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
|
||||
cdefgh\CA
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/J
|
||||
a1b\GA
|
||||
a2b\GA
|
||||
** Failers
|
||||
a1b\GZ\GA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)/J
|
||||
ab\GA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd/J
|
||||
ab\GA
|
||||
cd\GA
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
|
||||
cdefgh\GA
|
||||
|
||||
/(?J)^((?P<A>a1)|(?P<A>a2)b)/
|
||||
a1b\CA
|
||||
a2b\CA
|
||||
|
||||
/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<A>d)/
|
||||
|
||||
/ In this next test, J is not set at the outer level; consequently it isn't
|
||||
set in the pattern's options; consequently pcre_get_named_substring() produces
|
||||
a random value. /x
|
||||
|
||||
/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<C>d)/
|
||||
a bc d\CA\CB\CC
|
||||
|
||||
/^(?P<A>a)?(?(A)a|b)/
|
||||
aabc
|
||||
bc
|
||||
** Failers
|
||||
abc
|
||||
|
||||
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/
|
||||
bXaX
|
||||
|
||||
/(?:(?(2y)a|b)(X))+/
|
||||
|
||||
/(?:(?(ZA)a|b)(?P<ZZ>X))+/
|
||||
|
||||
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/
|
||||
bbXaaX
|
||||
|
||||
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/
|
||||
(b)\\Xa\\X
|
||||
|
||||
/(?P<ABC/
|
||||
|
||||
/(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
|
||||
bXXaYYaY
|
||||
bXYaXXaX
|
||||
|
||||
/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
|
||||
bXXaYYaY
|
||||
|
||||
/\777/
|
||||
|
||||
/\s*,\s*/S
|
||||
\x0b,\x0b
|
||||
\x0c,\x0d
|
||||
|
||||
/^abc/m
|
||||
xyz\nabc
|
||||
xyz\nabc\<lf>
|
||||
xyz\r\nabc\<lf>
|
||||
xyz\rabc\<cr>
|
||||
xyz\r\nabc\<crlf>
|
||||
** Failers
|
||||
xyz\nabc\<cr>
|
||||
xyz\r\nabc\<cr>
|
||||
xyz\nabc\<crlf>
|
||||
xyz\rabc\<crlf>
|
||||
xyz\rabc\<lf>
|
||||
|
||||
/abc$/m
|
||||
xyzabc
|
||||
xyzabc\n
|
||||
xyzabc\npqr
|
||||
xyzabc\r\<cr>
|
||||
xyzabc\rpqr\<cr>
|
||||
xyzabc\r\n\<crlf>
|
||||
xyzabc\r\npqr\<crlf>
|
||||
** Failers
|
||||
xyzabc\r
|
||||
xyzabc\rpqr
|
||||
xyzabc\r\n
|
||||
xyzabc\r\npqr
|
||||
|
||||
/^abc/m<cr>
|
||||
xyz\rabcdef
|
||||
xyz\nabcdef\<lf>
|
||||
** Failers
|
||||
xyz\nabcdef
|
||||
|
||||
/^abc/m<lf>
|
||||
xyz\nabcdef
|
||||
xyz\rabcdef\<cr>
|
||||
** Failers
|
||||
xyz\rabcdef
|
||||
|
||||
/^abc/m<crlf>
|
||||
xyz\r\nabcdef
|
||||
xyz\rabcdef\<cr>
|
||||
** Failers
|
||||
xyz\rabcdef
|
||||
|
||||
/^abc/m<bad>
|
||||
|
||||
/abc/
|
||||
xyz\rabc\<bad>
|
||||
abc
|
||||
|
||||
/.*/
|
||||
abc\ndef
|
||||
abc\rdef
|
||||
abc\r\ndef
|
||||
\<cr>abc\ndef
|
||||
\<cr>abc\rdef
|
||||
\<cr>abc\r\ndef
|
||||
\<crlf>abc\ndef
|
||||
\<crlf>abc\rdef
|
||||
\<crlf>abc\r\ndef
|
||||
|
||||
/\w+(.)(.)?def/s
|
||||
abc\ndef
|
||||
abc\rdef
|
||||
abc\r\ndef
|
||||
|
||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+
|
||||
/* this is a C style comment */\M
|
||||
|
||||
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/
|
||||
|
||||
/ End of testinput2 /
|
||||
|
7
ext/pcre/pcrelib/testdata/testinput4
vendored
7
ext/pcre/pcrelib/testdata/testinput4
vendored
@ -510,7 +510,14 @@
|
||||
/^\x{85}$/8i
|
||||
\x{85}
|
||||
|
||||
/^ሴ/8
|
||||
ሴ
|
||||
|
||||
/^\ሴ/8
|
||||
ሴ
|
||||
|
||||
"(?s)(.{1,5})"8
|
||||
abcdefg
|
||||
ab
|
||||
|
||||
/ End of testinput4 /
|
||||
|
6
ext/pcre/pcrelib/testdata/testinput5
vendored
6
ext/pcre/pcrelib/testdata/testinput5
vendored
@ -265,4 +265,10 @@
|
||||
|
||||
/^\ሴ/8D
|
||||
|
||||
/\777/I
|
||||
|
||||
/\777/8I
|
||||
\x{1ff}
|
||||
\777
|
||||
|
||||
/ End of testinput5 /
|
||||
|
9
ext/pcre/pcrelib/testdata/testinput6
vendored
9
ext/pcre/pcrelib/testdata/testinput6
vendored
@ -738,4 +738,13 @@
|
||||
\x{1c5}XY
|
||||
AXY
|
||||
|
||||
/^(\p{Z}[^\p{C}\p{Z}]+)*$/
|
||||
\xa0!
|
||||
|
||||
/^[\pL](abc)(?1)/
|
||||
AabcabcYZ
|
||||
|
||||
/([\pL]=(abc))*X/
|
||||
L=abcX
|
||||
|
||||
/ End of testinput6 /
|
||||
|
100
ext/pcre/pcrelib/testdata/testinput7
vendored
100
ext/pcre/pcrelib/testdata/testinput7
vendored
@ -1909,11 +1909,11 @@
|
||||
/(abc)\323/
|
||||
abc\xd3
|
||||
|
||||
/(abc)\500/
|
||||
/(abc)\100/
|
||||
abc\x40
|
||||
abc\100
|
||||
|
||||
/(abc)\5000/
|
||||
/(abc)\1000/
|
||||
abc\x400
|
||||
abc\x40\x30
|
||||
abc\1000
|
||||
@ -4019,4 +4019,100 @@
|
||||
123\P
|
||||
4\P\R
|
||||
|
||||
/^/mg
|
||||
a\nb\nc\n
|
||||
\
|
||||
|
||||
/(?<=C\n)^/mg
|
||||
A\nC\nC\n
|
||||
|
||||
/(?s)A?B/
|
||||
AB
|
||||
aB
|
||||
|
||||
/(?s)A*B/
|
||||
AB
|
||||
aB
|
||||
|
||||
/(?m)A?B/
|
||||
AB
|
||||
aB
|
||||
|
||||
/(?m)A*B/
|
||||
AB
|
||||
aB
|
||||
|
||||
/Content-Type\x3A[^\r\n]{6,}/
|
||||
Content-Type:xxxxxyyy
|
||||
|
||||
/Content-Type\x3A[^\r\n]{6,}z/
|
||||
Content-Type:xxxxxyyyz
|
||||
|
||||
/Content-Type\x3A[^a]{6,}/
|
||||
Content-Type:xxxyyy
|
||||
|
||||
/Content-Type\x3A[^a]{6,}z/
|
||||
Content-Type:xxxyyyz
|
||||
|
||||
/^abc/m
|
||||
xyz\nabc
|
||||
xyz\nabc\<lf>
|
||||
xyz\r\nabc\<lf>
|
||||
xyz\rabc\<cr>
|
||||
xyz\r\nabc\<crlf>
|
||||
** Failers
|
||||
xyz\nabc\<cr>
|
||||
xyz\r\nabc\<cr>
|
||||
xyz\nabc\<crlf>
|
||||
xyz\rabc\<crlf>
|
||||
xyz\rabc\<lf>
|
||||
|
||||
/abc$/m
|
||||
xyzabc
|
||||
xyzabc\n
|
||||
xyzabc\npqr
|
||||
xyzabc\r\<cr>
|
||||
xyzabc\rpqr\<cr>
|
||||
xyzabc\r\n\<crlf>
|
||||
xyzabc\r\npqr\<crlf>
|
||||
** Failers
|
||||
xyzabc\r
|
||||
xyzabc\rpqr
|
||||
xyzabc\r\n
|
||||
xyzabc\r\npqr
|
||||
|
||||
/^abc/m<cr>
|
||||
xyz\rabcdef
|
||||
xyz\nabcdef\<lf>
|
||||
** Failers
|
||||
xyz\nabcdef
|
||||
|
||||
/^abc/m<lf>
|
||||
xyz\nabcdef
|
||||
xyz\rabcdef\<cr>
|
||||
** Failers
|
||||
xyz\rabcdef
|
||||
|
||||
/^abc/m<crlf>
|
||||
xyz\r\nabcdef
|
||||
xyz\rabcdef\<cr>
|
||||
** Failers
|
||||
xyz\rabcdef
|
||||
|
||||
/.*/
|
||||
abc\ndef
|
||||
abc\rdef
|
||||
abc\r\ndef
|
||||
\<cr>abc\ndef
|
||||
\<cr>abc\rdef
|
||||
\<cr>abc\r\ndef
|
||||
\<crlf>abc\ndef
|
||||
\<crlf>abc\rdef
|
||||
\<crlf>abc\r\ndef
|
||||
|
||||
/\w+(.)(.)?def/s
|
||||
abc\ndef
|
||||
abc\rdef
|
||||
abc\r\ndef
|
||||
|
||||
/ End of testinput7 /
|
||||
|
76
ext/pcre/pcrelib/testdata/testoutput1
vendored
76
ext/pcre/pcrelib/testdata/testoutput1
vendored
@ -2127,7 +2127,7 @@ No match
|
||||
0: abc\xd3
|
||||
1: abc
|
||||
|
||||
/(abc)\500/
|
||||
/(abc)\100/
|
||||
abc\x40
|
||||
0: abc@
|
||||
1: abc
|
||||
@ -2135,7 +2135,7 @@ No match
|
||||
0: abc@
|
||||
1: abc
|
||||
|
||||
/(abc)\5000/
|
||||
/(abc)\1000/
|
||||
abc\x400
|
||||
0: abc@0
|
||||
1: abc
|
||||
@ -6282,4 +6282,76 @@ No match
|
||||
abcddefg
|
||||
No match
|
||||
|
||||
/(?<![^f]oo)(bar)/
|
||||
foobarX
|
||||
0: bar
|
||||
1: bar
|
||||
** Failers
|
||||
No match
|
||||
boobarX
|
||||
No match
|
||||
|
||||
/(?<![^f])X/
|
||||
offX
|
||||
0: X
|
||||
** Failers
|
||||
No match
|
||||
onyX
|
||||
No match
|
||||
|
||||
/(?<=[^f])X/
|
||||
onyX
|
||||
0: X
|
||||
** Failers
|
||||
No match
|
||||
offX
|
||||
No match
|
||||
|
||||
/^/mg
|
||||
a\nb\nc\n
|
||||
0:
|
||||
0:
|
||||
0:
|
||||
\
|
||||
0:
|
||||
|
||||
/(?<=C\n)^/mg
|
||||
A\nC\nC\n
|
||||
0:
|
||||
|
||||
/(?:(?(1)a|b)(X))+/
|
||||
bXaX
|
||||
0: bXaX
|
||||
1: X
|
||||
|
||||
/(?:(?(1)\1a|b)(X|Y))+/
|
||||
bXXaYYaY
|
||||
0: bXXaYYaY
|
||||
1: Y
|
||||
bXYaXXaX
|
||||
0: bX
|
||||
1: X
|
||||
|
||||
/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
|
||||
bXXaYYaY
|
||||
0: bX
|
||||
1:
|
||||
2:
|
||||
3:
|
||||
4:
|
||||
5:
|
||||
6:
|
||||
7:
|
||||
8:
|
||||
9:
|
||||
10: X
|
||||
|
||||
/[[,abc,]+]/
|
||||
abc]
|
||||
0: abc]
|
||||
a,b]
|
||||
0: a,b]
|
||||
[a,b,c]
|
||||
0: [a,b,c]
|
||||
|
||||
/ End of testinput1 /
|
||||
|
578
ext/pcre/pcrelib/testdata/testoutput2
vendored
578
ext/pcre/pcrelib/testdata/testoutput2
vendored
@ -115,14 +115,14 @@ Failed: unrecognized character after (? at offset 2
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'b'
|
||||
|
||||
/.*?b/
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'b'
|
||||
|
||||
/cat|dog|elephant/
|
||||
@ -326,7 +326,7 @@ No need char
|
||||
Capturing subpattern count = 3
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
defabc
|
||||
0: defabc
|
||||
@ -517,7 +517,6 @@ No need char
|
||||
/(^b|(?i)^d)/
|
||||
Capturing subpattern count = 1
|
||||
Options: anchored
|
||||
Case state changes
|
||||
No first char
|
||||
No need char
|
||||
|
||||
@ -552,13 +551,13 @@ Starting byte set: b c x y
|
||||
/(^a|^b)/m
|
||||
Capturing subpattern count = 1
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
|
||||
/(?i)(^a|^b)/m
|
||||
Capturing subpattern count = 1
|
||||
Options: caseless multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
|
||||
/(a)(?(1)a|b|c)/
|
||||
@ -568,13 +567,13 @@ Failed: conditional group contains more than two branches at offset 13
|
||||
Failed: conditional group contains more than two branches at offset 12
|
||||
|
||||
/(?(1a)/
|
||||
Failed: malformed number after (?( at offset 4
|
||||
Failed: reference to non-existent subpattern at offset 6
|
||||
|
||||
/(?(?i))/
|
||||
Failed: assertion expected after (?( at offset 3
|
||||
|
||||
/(?(abc))/
|
||||
Failed: assertion expected after (?( at offset 3
|
||||
Failed: reference to non-existent subpattern at offset 7
|
||||
|
||||
/(?(?<ab))/
|
||||
Failed: unrecognized character after (?< at offset 5
|
||||
@ -592,7 +591,6 @@ Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Partial matching not supported
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'b' (caseless)
|
||||
Need char = 'h' (caseless)
|
||||
|
||||
@ -609,7 +607,6 @@ Need char = 'h' (caseless)
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 1
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'b' (caseless)
|
||||
No need char
|
||||
Study returned NULL
|
||||
@ -618,7 +615,6 @@ Study returned NULL
|
||||
Capturing subpattern count = 1
|
||||
Partial matching not supported
|
||||
No options
|
||||
Case state changes
|
||||
No first char
|
||||
No need char
|
||||
Starting byte set: C a b c d
|
||||
@ -664,7 +660,7 @@ No need char
|
||||
/^abc/m
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
|
||||
@ -721,7 +717,7 @@ No match
|
||||
/^(?<=foo\n)bar/m
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'r'
|
||||
foo\nbarbar
|
||||
0: bar
|
||||
@ -737,7 +733,7 @@ No match
|
||||
/(?>^abc)/m
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
abc
|
||||
0: abc
|
||||
@ -782,7 +778,6 @@ No match
|
||||
/(?<=ab(?i)x|y|z)/
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Case state changes
|
||||
No first char
|
||||
No need char
|
||||
|
||||
@ -790,7 +785,7 @@ No need char
|
||||
Capturing subpattern count = 2
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
alphabetabcd
|
||||
0: alphabetabcd
|
||||
@ -803,7 +798,6 @@ No need char
|
||||
/(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'Z'
|
||||
Need char = 'Z'
|
||||
abxyZZ
|
||||
@ -966,7 +960,7 @@ Failed: unrecognized character after (? at offset 3
|
||||
Failed: unrecognized character after (? at offset 3
|
||||
|
||||
/(?(1?)a|b)/
|
||||
Failed: malformed number after (?( at offset 4
|
||||
Failed: malformed number or name after (?( at offset 4
|
||||
|
||||
/(?(1)a|b|c)/
|
||||
Failed: conditional group contains more than two branches at offset 10
|
||||
@ -1021,7 +1015,7 @@ No need char
|
||||
abcdefghijklmnopqrstuvwxyz\C1
|
||||
0: abcdefghijklmnopqrst
|
||||
1: abcdefghijklmnopqrst
|
||||
copy substring 1 failed -6
|
||||
1C abcdefghijklmnopqrst (20)
|
||||
abcdefghijklmnopqrstuvwxyz\G1
|
||||
0: abcdefghijklmnopqrst
|
||||
1: abcdefghijklmnopqrst
|
||||
@ -1054,7 +1048,7 @@ No need char
|
||||
abcdefghijklmnopqrstuvwxyz\C1\G1\L
|
||||
0: abcdefghijklmnop
|
||||
1: abcdefghijklmnop
|
||||
copy substring 1 failed -6
|
||||
1C abcdefghijklmnop (16)
|
||||
1G abcdefghijklmnop (16)
|
||||
0L abcdefghijklmnop
|
||||
1L abcdefghijklmnop
|
||||
@ -1128,7 +1122,7 @@ Need char = 'd'
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'X'
|
||||
|
||||
/.*X/Ds
|
||||
@ -1161,7 +1155,7 @@ Need char = 'X'
|
||||
Capturing subpattern count = 1
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
|
||||
/(.*X|^B)/Ds
|
||||
@ -1221,7 +1215,7 @@ No need char
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
|
||||
/\Biss\B/+
|
||||
@ -1306,7 +1300,7 @@ No need char
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 's'
|
||||
abciss\nxyzisspqr
|
||||
0: abciss
|
||||
@ -1365,7 +1359,7 @@ No need char
|
||||
/^ab\n/mg+
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 10
|
||||
ab\nab\ncd
|
||||
0: ab\x0a
|
||||
@ -2223,7 +2217,6 @@ No need char
|
||||
/((?-i)[[:lower:]])[[:lower:]]/i
|
||||
Capturing subpattern count = 1
|
||||
Options: caseless
|
||||
Case state changes
|
||||
No first char
|
||||
No need char
|
||||
ab
|
||||
@ -2240,11 +2233,11 @@ No match
|
||||
AB
|
||||
No match
|
||||
|
||||
/[\200-\410]/
|
||||
/[\200-\110]/
|
||||
Failed: range out of order in character class at offset 9
|
||||
|
||||
/^(?(0)f|b)oo/
|
||||
Failed: invalid condition (?(0) at offset 5
|
||||
Failed: invalid condition (?(0) at offset 6
|
||||
|
||||
/This one's here because of the large output vector needed/
|
||||
Capturing subpattern count = 0
|
||||
@ -2761,7 +2754,6 @@ No need char
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'a'
|
||||
Need char = 'b' (caseless)
|
||||
ab
|
||||
@ -2787,7 +2779,6 @@ No match
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 1
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'a'
|
||||
Need char = 'b' (caseless)
|
||||
ab
|
||||
@ -3370,7 +3361,7 @@ No need char
|
||||
Capturing subpattern count = 1
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
|
||||
/(.*)\d+\1/Is
|
||||
@ -3393,7 +3384,7 @@ Capturing subpattern count = 2
|
||||
Max back reference = 2
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'z'
|
||||
|
||||
/((.*))\d+\1/I
|
||||
@ -3430,7 +3421,6 @@ Need char = 'z' (caseless)
|
||||
/(?=abc)(?i).xyz/I
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'a'
|
||||
Need char = 'z' (caseless)
|
||||
|
||||
@ -3553,7 +3543,7 @@ Need char = 'b'
|
||||
/^a/mI
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows \n
|
||||
First char at start or follows newline
|
||||
Need char = 'a'
|
||||
abcde
|
||||
0: a
|
||||
@ -3580,7 +3570,6 @@ Starting byte set: A B a b
|
||||
/[ab](?i)cd/IS
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
Case state changes
|
||||
No first char
|
||||
Need char = 'd' (caseless)
|
||||
Starting byte set: a b
|
||||
@ -4503,12 +4492,12 @@ No first char
|
||||
Need char = 'z'
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\M
|
||||
Minimum match() limit = 8
|
||||
Minimum match() recursion limit = 7
|
||||
Minimum match() recursion limit = 6
|
||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
aaaaaaaaaaaaaz\M
|
||||
Minimum match() limit = 32768
|
||||
Minimum match() recursion limit = 43
|
||||
Minimum match() recursion limit = 42
|
||||
No match
|
||||
|
||||
/(aaa(?C1)bbb|ab)/
|
||||
@ -4555,18 +4544,19 @@ Need char = 'h'
|
||||
1: cd
|
||||
2: gh
|
||||
1C cd (2)
|
||||
2G gh (2)
|
||||
G gh (2) two
|
||||
abcdefgh\Cone\Ctwo
|
||||
0: abcdefgh
|
||||
1: cd
|
||||
2: gh
|
||||
1C cd (2)
|
||||
2C gh (2)
|
||||
C cd (2) one
|
||||
C gh (2) two
|
||||
abcdefgh\Cthree
|
||||
no parentheses with name "three"
|
||||
0: abcdefgh
|
||||
1: cd
|
||||
2: gh
|
||||
copy substring three failed -7
|
||||
|
||||
/(?P<Tes>)(?P<Test>)/D
|
||||
------------------------------------------------------------------
|
||||
@ -4616,18 +4606,18 @@ Need char = 'a'
|
||||
0: zzaa
|
||||
1: zz
|
||||
2: aa
|
||||
1C zz (2)
|
||||
C zz (2) Z
|
||||
zzaa\CA
|
||||
0: zzaa
|
||||
1: zz
|
||||
2: aa
|
||||
2C aa (2)
|
||||
C aa (2) A
|
||||
|
||||
/(?P<x>eks)(?P<x>eccs)/
|
||||
Failed: two named groups have the same name at offset 16
|
||||
Failed: two named subpatterns have the same name at offset 16
|
||||
|
||||
/(?P<abc>abc(?P<def>def)(?P<abc>xyz))/
|
||||
Failed: two named groups have the same name at offset 31
|
||||
Failed: two named subpatterns have the same name at offset 31
|
||||
|
||||
"\[((?P<elem>\d+)(,(?P>elem))*)\]"
|
||||
Capturing subpattern count = 3
|
||||
@ -5769,7 +5759,6 @@ Failed: number too big in {} quantifier at offset 15
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
No options
|
||||
Case state changes
|
||||
First char = 'a' (caseless)
|
||||
Need char = 'B'
|
||||
abcdefghijklAkB
|
||||
@ -6059,6 +6048,505 @@ No options
|
||||
First char = 255
|
||||
No need char
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/
|
||||
Failed: two named subpatterns have the same name at offset 18
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/J
|
||||
Capturing subpattern count = 3
|
||||
Named capturing subpatterns:
|
||||
A 2
|
||||
A 3
|
||||
Options: anchored dupnames
|
||||
No first char
|
||||
No need char
|
||||
a1b\CA
|
||||
0: a1
|
||||
1: a1
|
||||
2: a1
|
||||
C a1 (2) A
|
||||
a2b\CA
|
||||
0: a2b
|
||||
1: a2b
|
||||
2: <unset>
|
||||
3: a2
|
||||
C a2 (2) A
|
||||
** Failers
|
||||
No match
|
||||
a1b\CZ\CA
|
||||
no parentheses with name "Z"
|
||||
0: a1
|
||||
1: a1
|
||||
2: a1
|
||||
copy substring Z failed -7
|
||||
C a1 (2) A
|
||||
|
||||
/^(?P<A>a)(?P<A>b)/J
|
||||
Capturing subpattern count = 2
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
Options: anchored dupnames
|
||||
No first char
|
||||
No need char
|
||||
ab\CA
|
||||
0: ab
|
||||
1: a
|
||||
2: b
|
||||
C a (1) A
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd/J
|
||||
Capturing subpattern count = 2
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
Options: dupnames
|
||||
No first char
|
||||
No need char
|
||||
ab\CA
|
||||
0: ab
|
||||
1: a
|
||||
2: b
|
||||
C a (1) A
|
||||
cd\CA
|
||||
0: cd
|
||||
copy substring A failed -7
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
|
||||
Capturing subpattern count = 4
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
A 3
|
||||
A 4
|
||||
Options: dupnames
|
||||
No first char
|
||||
No need char
|
||||
cdefgh\CA
|
||||
0: cdefgh
|
||||
1: <unset>
|
||||
2: <unset>
|
||||
3: ef
|
||||
4: gh
|
||||
C ef (2) A
|
||||
|
||||
/^((?P<A>a1)|(?P<A>a2)b)/J
|
||||
Capturing subpattern count = 3
|
||||
Named capturing subpatterns:
|
||||
A 2
|
||||
A 3
|
||||
Options: anchored dupnames
|
||||
No first char
|
||||
No need char
|
||||
a1b\GA
|
||||
0: a1
|
||||
1: a1
|
||||
2: a1
|
||||
G a1 (2) A
|
||||
a2b\GA
|
||||
0: a2b
|
||||
1: a2b
|
||||
2: <unset>
|
||||
3: a2
|
||||
G a2 (2) A
|
||||
** Failers
|
||||
No match
|
||||
a1b\GZ\GA
|
||||
no parentheses with name "Z"
|
||||
0: a1
|
||||
1: a1
|
||||
2: a1
|
||||
copy substring Z failed -7
|
||||
G a1 (2) A
|
||||
|
||||
/^(?P<A>a)(?P<A>b)/J
|
||||
Capturing subpattern count = 2
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
Options: anchored dupnames
|
||||
No first char
|
||||
No need char
|
||||
ab\GA
|
||||
0: ab
|
||||
1: a
|
||||
2: b
|
||||
G a (1) A
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd/J
|
||||
Capturing subpattern count = 2
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
Options: dupnames
|
||||
No first char
|
||||
No need char
|
||||
ab\GA
|
||||
0: ab
|
||||
1: a
|
||||
2: b
|
||||
G a (1) A
|
||||
cd\GA
|
||||
0: cd
|
||||
copy substring A failed -7
|
||||
|
||||
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
|
||||
Capturing subpattern count = 4
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
A 2
|
||||
A 3
|
||||
A 4
|
||||
Options: dupnames
|
||||
No first char
|
||||
No need char
|
||||
cdefgh\GA
|
||||
0: cdefgh
|
||||
1: <unset>
|
||||
2: <unset>
|
||||
3: ef
|
||||
4: gh
|
||||
G ef (2) A
|
||||
|
||||
/(?J)^((?P<A>a1)|(?P<A>a2)b)/
|
||||
Capturing subpattern count = 3
|
||||
Named capturing subpatterns:
|
||||
A 2
|
||||
A 3
|
||||
Options: anchored dupnames
|
||||
No first char
|
||||
No need char
|
||||
a1b\CA
|
||||
0: a1
|
||||
1: a1
|
||||
2: a1
|
||||
C a1 (2) A
|
||||
a2b\CA
|
||||
0: a2b
|
||||
1: a2b
|
||||
2: <unset>
|
||||
3: a2
|
||||
C a2 (2) A
|
||||
|
||||
/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<A>d)/
|
||||
Failed: two named subpatterns have the same name at offset 38
|
||||
|
||||
/ In this next test, J is not set at the outer level; consequently it isn't
|
||||
set in the pattern's options; consequently pcre_get_named_substring() produces
|
||||
a random value. /x
|
||||
Capturing subpattern count = 1
|
||||
Options: extended
|
||||
First char = 'I'
|
||||
Need char = 'e'
|
||||
|
||||
/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<C>d)/
|
||||
Capturing subpattern count = 4
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
B 2
|
||||
B 3
|
||||
C 4
|
||||
Options: anchored
|
||||
No first char
|
||||
No need char
|
||||
a bc d\CA\CB\CC
|
||||
0: a bc d
|
||||
1: a
|
||||
2: b
|
||||
3: c
|
||||
4: d
|
||||
C a (1) A
|
||||
C b (1) B
|
||||
C d (1) C
|
||||
|
||||
/^(?P<A>a)?(?(A)a|b)/
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
Options: anchored
|
||||
No first char
|
||||
No need char
|
||||
aabc
|
||||
0: aa
|
||||
1: a
|
||||
bc
|
||||
0: b
|
||||
** Failers
|
||||
No match
|
||||
abc
|
||||
No match
|
||||
|
||||
/(?:(?(ZZ)a|b)(?P<ZZ>X))+/
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
No options
|
||||
No first char
|
||||
Need char = 'X'
|
||||
bXaX
|
||||
0: bXaX
|
||||
1: X
|
||||
|
||||
/(?:(?(2y)a|b)(X))+/
|
||||
Failed: reference to non-existent subpattern at offset 9
|
||||
|
||||
/(?:(?(ZA)a|b)(?P<ZZ>X))+/
|
||||
Failed: reference to non-existent subpattern at offset 9
|
||||
|
||||
/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
No options
|
||||
No first char
|
||||
Need char = 'X'
|
||||
bbXaaX
|
||||
0: bbXaaX
|
||||
1: X
|
||||
|
||||
/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
ZZ 1
|
||||
No options
|
||||
No first char
|
||||
Need char = 'X'
|
||||
(b)\\Xa\\X
|
||||
0: (b)\Xa\X
|
||||
1: X
|
||||
|
||||
/(?P<ABC/
|
||||
Failed: syntax error after (?P at offset 7
|
||||
|
||||
/(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
|
||||
Capturing subpattern count = 1
|
||||
Max back reference = 1
|
||||
Named capturing subpatterns:
|
||||
A 1
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
bXXaYYaY
|
||||
0: bXXaYYaY
|
||||
1: Y
|
||||
bXYaXXaX
|
||||
0: bX
|
||||
1: X
|
||||
|
||||
/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
|
||||
Capturing subpattern count = 10
|
||||
Max back reference = 10
|
||||
Named capturing subpatterns:
|
||||
A 10
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
bXXaYYaY
|
||||
0: bXXaYYaY
|
||||
1:
|
||||
2:
|
||||
3:
|
||||
4:
|
||||
5:
|
||||
6:
|
||||
7:
|
||||
8:
|
||||
9:
|
||||
10: Y
|
||||
|
||||
/\777/
|
||||
Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
|
||||
|
||||
/\s*,\s*/S
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
No first char
|
||||
Need char = ','
|
||||
Starting byte set: \x09 \x0a \x0c \x0d \x20 ,
|
||||
\x0b,\x0b
|
||||
0: ,
|
||||
\x0c,\x0d
|
||||
0: \x0c,\x0d
|
||||
|
||||
/^abc/m
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
xyz\nabc
|
||||
0: abc
|
||||
xyz\nabc\<lf>
|
||||
0: abc
|
||||
xyz\r\nabc\<lf>
|
||||
0: abc
|
||||
xyz\rabc\<cr>
|
||||
0: abc
|
||||
xyz\r\nabc\<crlf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\nabc\<cr>
|
||||
No match
|
||||
xyz\r\nabc\<cr>
|
||||
No match
|
||||
xyz\nabc\<crlf>
|
||||
No match
|
||||
xyz\rabc\<crlf>
|
||||
No match
|
||||
xyz\rabc\<lf>
|
||||
No match
|
||||
|
||||
/abc$/m
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
First char = 'a'
|
||||
Need char = 'c'
|
||||
xyzabc
|
||||
0: abc
|
||||
xyzabc\n
|
||||
0: abc
|
||||
xyzabc\npqr
|
||||
0: abc
|
||||
xyzabc\r\<cr>
|
||||
0: abc
|
||||
xyzabc\rpqr\<cr>
|
||||
0: abc
|
||||
xyzabc\r\n\<crlf>
|
||||
0: abc
|
||||
xyzabc\r\npqr\<crlf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyzabc\r
|
||||
No match
|
||||
xyzabc\rpqr
|
||||
No match
|
||||
xyzabc\r\n
|
||||
No match
|
||||
xyzabc\r\npqr
|
||||
No match
|
||||
|
||||
/^abc/m<cr>
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
Forced newline sequence: CR
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
xyz\rabcdef
|
||||
0: abc
|
||||
xyz\nabcdef\<lf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\nabcdef
|
||||
No match
|
||||
|
||||
/^abc/m<lf>
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
Forced newline sequence: LF
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
xyz\nabcdef
|
||||
0: abc
|
||||
xyz\rabcdef\<cr>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\rabcdef
|
||||
No match
|
||||
|
||||
/^abc/m<crlf>
|
||||
Capturing subpattern count = 0
|
||||
Options: multiline
|
||||
Forced newline sequence: CRLF
|
||||
First char at start or follows newline
|
||||
Need char = 'c'
|
||||
xyz\r\nabcdef
|
||||
0: abc
|
||||
xyz\rabcdef\<cr>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\rabcdef
|
||||
No match
|
||||
|
||||
/^abc/m<bad>
|
||||
Unknown newline type at: <bad>
|
||||
|
||||
|
||||
/abc/
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
First char = 'a'
|
||||
Need char = 'c'
|
||||
xyz\rabc\<bad>
|
||||
Unknown newline type at: <bad>
|
||||
abc
|
||||
0: abc
|
||||
|
||||
/.*/
|
||||
Capturing subpattern count = 0
|
||||
Partial matching not supported
|
||||
No options
|
||||
First char at start or follows newline
|
||||
No need char
|
||||
abc\ndef
|
||||
0: abc
|
||||
abc\rdef
|
||||
0: abc\x0ddef
|
||||
abc\r\ndef
|
||||
0: abc\x0d
|
||||
\<cr>abc\ndef
|
||||
0: abc\x0adef
|
||||
\<cr>abc\rdef
|
||||
0: abc
|
||||
\<cr>abc\r\ndef
|
||||
0: abc
|
||||
\<crlf>abc\ndef
|
||||
0: abc\x0adef
|
||||
\<crlf>abc\rdef
|
||||
0: abc\x0ddef
|
||||
\<crlf>abc\r\ndef
|
||||
0: abc
|
||||
|
||||
/\w+(.)(.)?def/s
|
||||
Capturing subpattern count = 2
|
||||
Partial matching not supported
|
||||
Options: dotall
|
||||
No first char
|
||||
Need char = 'f'
|
||||
abc\ndef
|
||||
0: abc\x0adef
|
||||
1: \x0a
|
||||
abc\rdef
|
||||
0: abc\x0ddef
|
||||
1: \x0d
|
||||
abc\r\ndef
|
||||
0: abc\x0d\x0adef
|
||||
1: \x0d
|
||||
2: \x0a
|
||||
|
||||
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+
|
||||
Capturing subpattern count = 1
|
||||
Partial matching not supported
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
/* this is a C style comment */\M
|
||||
Minimum match() limit = 120
|
||||
Minimum match() recursion limit = 6
|
||||
0: /* this is a C style comment */
|
||||
1: /* this is a C style comment */
|
||||
|
||||
/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/
|
||||
Capturing subpattern count = 1
|
||||
Named capturing subpatterns:
|
||||
B 1
|
||||
No options
|
||||
No first char
|
||||
Need char = '.'
|
||||
|
||||
/ End of testinput2 /
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
|
12
ext/pcre/pcrelib/testdata/testoutput4
vendored
12
ext/pcre/pcrelib/testdata/testoutput4
vendored
@ -898,8 +898,20 @@ No match
|
||||
\x{85}
|
||||
0: \x{85}
|
||||
|
||||
/^ሴ/8
|
||||
ሴ
|
||||
0: \x{1234}
|
||||
|
||||
/^\ሴ/8
|
||||
ሴ
|
||||
0: \x{1234}
|
||||
|
||||
"(?s)(.{1,5})"8
|
||||
abcdefg
|
||||
0: abcde
|
||||
1: abcde
|
||||
ab
|
||||
0: ab
|
||||
1: ab
|
||||
|
||||
/ End of testinput4 /
|
||||
|
13
ext/pcre/pcrelib/testdata/testoutput5
vendored
13
ext/pcre/pcrelib/testdata/testoutput5
vendored
@ -1107,4 +1107,17 @@ Options: anchored utf8
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/\777/I
|
||||
Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
|
||||
|
||||
/\777/8I
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 199
|
||||
Need char = 191
|
||||
\x{1ff}
|
||||
0: \x{1ff}
|
||||
\777
|
||||
0: \x{1ff}
|
||||
|
||||
/ End of testinput5 /
|
||||
|
16
ext/pcre/pcrelib/testdata/testoutput6
vendored
16
ext/pcre/pcrelib/testdata/testoutput6
vendored
@ -1394,4 +1394,20 @@ No match
|
||||
AXY
|
||||
No match
|
||||
|
||||
/^(\p{Z}[^\p{C}\p{Z}]+)*$/
|
||||
\xa0!
|
||||
0: \xa0!
|
||||
1: \xa0!
|
||||
|
||||
/^[\pL](abc)(?1)/
|
||||
AabcabcYZ
|
||||
0: Aabcabc
|
||||
1: abc
|
||||
|
||||
/([\pL]=(abc))*X/
|
||||
L=abcX
|
||||
0: L=abcX
|
||||
1: L=abc
|
||||
2: abc
|
||||
|
||||
/ End of testinput6 /
|
||||
|
210
ext/pcre/pcrelib/testdata/testoutput7
vendored
210
ext/pcre/pcrelib/testdata/testoutput7
vendored
@ -3004,13 +3004,13 @@ No match
|
||||
abc\xd3
|
||||
0: abc\xd3
|
||||
|
||||
/(abc)\500/
|
||||
/(abc)\100/
|
||||
abc\x40
|
||||
0: abc@
|
||||
abc\100
|
||||
0: abc@
|
||||
|
||||
/(abc)\5000/
|
||||
/(abc)\1000/
|
||||
abc\x400
|
||||
0: abc@0
|
||||
abc\x40\x30
|
||||
@ -6523,4 +6523,210 @@ Partial match: 123
|
||||
4\P\R
|
||||
0: 4
|
||||
|
||||
/^/mg
|
||||
a\nb\nc\n
|
||||
0:
|
||||
0:
|
||||
0:
|
||||
\
|
||||
0:
|
||||
|
||||
/(?<=C\n)^/mg
|
||||
A\nC\nC\n
|
||||
0:
|
||||
|
||||
/(?s)A?B/
|
||||
AB
|
||||
0: AB
|
||||
aB
|
||||
0: B
|
||||
|
||||
/(?s)A*B/
|
||||
AB
|
||||
0: AB
|
||||
aB
|
||||
0: B
|
||||
|
||||
/(?m)A?B/
|
||||
AB
|
||||
0: AB
|
||||
aB
|
||||
0: B
|
||||
|
||||
/(?m)A*B/
|
||||
AB
|
||||
0: AB
|
||||
aB
|
||||
0: B
|
||||
|
||||
/Content-Type\x3A[^\r\n]{6,}/
|
||||
Content-Type:xxxxxyyy
|
||||
0: Content-Type:xxxxxyyy
|
||||
1: Content-Type:xxxxxyy
|
||||
2: Content-Type:xxxxxy
|
||||
|
||||
/Content-Type\x3A[^\r\n]{6,}z/
|
||||
Content-Type:xxxxxyyyz
|
||||
0: Content-Type:xxxxxyyyz
|
||||
|
||||
/Content-Type\x3A[^a]{6,}/
|
||||
Content-Type:xxxyyy
|
||||
0: Content-Type:xxxyyy
|
||||
|
||||
/Content-Type\x3A[^a]{6,}z/
|
||||
Content-Type:xxxyyyz
|
||||
0: Content-Type:xxxyyyz
|
||||
|
||||
/^abc/m
|
||||
xyz\nabc
|
||||
0: abc
|
||||
xyz\nabc\<lf>
|
||||
0: abc
|
||||
xyz\r\nabc\<lf>
|
||||
0: abc
|
||||
xyz\rabc\<cr>
|
||||
0: abc
|
||||
xyz\r\nabc\<crlf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\nabc\<cr>
|
||||
No match
|
||||
xyz\r\nabc\<cr>
|
||||
No match
|
||||
xyz\nabc\<crlf>
|
||||
No match
|
||||
xyz\rabc\<crlf>
|
||||
No match
|
||||
xyz\rabc\<lf>
|
||||
No match
|
||||
|
||||
/abc$/m
|
||||
xyzabc
|
||||
0: abc
|
||||
xyzabc\n
|
||||
0: abc
|
||||
xyzabc\npqr
|
||||
0: abc
|
||||
xyzabc\r\<cr>
|
||||
0: abc
|
||||
xyzabc\rpqr\<cr>
|
||||
0: abc
|
||||
xyzabc\r\n\<crlf>
|
||||
0: abc
|
||||
xyzabc\r\npqr\<crlf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyzabc\r
|
||||
No match
|
||||
xyzabc\rpqr
|
||||
No match
|
||||
xyzabc\r\n
|
||||
No match
|
||||
xyzabc\r\npqr
|
||||
No match
|
||||
|
||||
/^abc/m<cr>
|
||||
xyz\rabcdef
|
||||
0: abc
|
||||
xyz\nabcdef\<lf>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\nabcdef
|
||||
No match
|
||||
|
||||
/^abc/m<lf>
|
||||
xyz\nabcdef
|
||||
0: abc
|
||||
xyz\rabcdef\<cr>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\rabcdef
|
||||
No match
|
||||
|
||||
/^abc/m<crlf>
|
||||
xyz\r\nabcdef
|
||||
0: abc
|
||||
xyz\rabcdef\<cr>
|
||||
0: abc
|
||||
** Failers
|
||||
No match
|
||||
xyz\rabcdef
|
||||
No match
|
||||
|
||||
/.*/
|
||||
abc\ndef
|
||||
0: abc
|
||||
1: ab
|
||||
2: a
|
||||
3:
|
||||
abc\rdef
|
||||
0: abc\x0ddef
|
||||
1: abc\x0dde
|
||||
2: abc\x0dd
|
||||
3: abc\x0d
|
||||
4: abc
|
||||
5: ab
|
||||
6: a
|
||||
7:
|
||||
abc\r\ndef
|
||||
0: abc\x0d
|
||||
1: abc
|
||||
2: ab
|
||||
3: a
|
||||
4:
|
||||
\<cr>abc\ndef
|
||||
0: abc\x0adef
|
||||
1: abc\x0ade
|
||||
2: abc\x0ad
|
||||
3: abc\x0a
|
||||
4: abc
|
||||
5: ab
|
||||
6: a
|
||||
7:
|
||||
\<cr>abc\rdef
|
||||
0: abc
|
||||
1: ab
|
||||
2: a
|
||||
3:
|
||||
\<cr>abc\r\ndef
|
||||
0: abc
|
||||
1: ab
|
||||
2: a
|
||||
3:
|
||||
\<crlf>abc\ndef
|
||||
0: abc\x0adef
|
||||
1: abc\x0ade
|
||||
2: abc\x0ad
|
||||
3: abc\x0a
|
||||
4: abc
|
||||
5: ab
|
||||
6: a
|
||||
7:
|
||||
\<crlf>abc\rdef
|
||||
0: abc\x0ddef
|
||||
1: abc\x0dde
|
||||
2: abc\x0dd
|
||||
3: abc\x0d
|
||||
4: abc
|
||||
5: ab
|
||||
6: a
|
||||
7:
|
||||
\<crlf>abc\r\ndef
|
||||
0: abc
|
||||
1: ab
|
||||
2: a
|
||||
3:
|
||||
|
||||
/\w+(.)(.)?def/s
|
||||
abc\ndef
|
||||
0: abc\x0adef
|
||||
abc\rdef
|
||||
0: abc\x0ddef
|
||||
abc\r\ndef
|
||||
0: abc\x0d\x0adef
|
||||
|
||||
/ End of testinput7 /
|
||||
|
Loading…
Reference in New Issue
Block a user