mirror of
https://github.com/php/php-src.git
synced 2024-11-25 19:05:31 +08:00
Upgrade to version 3.92.
This commit is contained in:
parent
b4348434be
commit
c65c182693
@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
if test "$PHP_PCRE_REGEX" = "yes"; then
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/maketables.c pcrelib/get.c pcrelib/study.c pcrelib/pcre.c php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -I@ext_srcdir@/pcrelib)
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/maketables.c pcrelib/get.c pcrelib/study.c pcrelib/pcre.c php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -DLINK_SIZE=2 -I@ext_srcdir@/pcrelib)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
|
||||
else
|
||||
@ -49,7 +49,7 @@ if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
|
||||
AC_DEFINE(HAVE_PCRE, 1, [ ])
|
||||
PHP_ADD_INCLUDE($PCRE_INCDIR)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DSUPPORT_UTF8)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -DLINK_SIZE=2)
|
||||
fi
|
||||
fi
|
||||
PHP_SUBST(PCRE_SHARED_LIBADD)
|
||||
|
@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
if test "$PHP_PCRE_REGEX" = "yes"; then
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/maketables.c pcrelib/get.c pcrelib/study.c pcrelib/pcre.c php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -I@ext_srcdir@/pcrelib)
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/maketables.c pcrelib/get.c pcrelib/study.c pcrelib/pcre.c php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -DLINK_SIZE=2 -I@ext_srcdir@/pcrelib)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
|
||||
else
|
||||
@ -49,7 +49,7 @@ if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
|
||||
AC_DEFINE(HAVE_PCRE, 1, [ ])
|
||||
PHP_ADD_INCLUDE($PCRE_INCDIR)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DSUPPORT_UTF8)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DSUPPORT_UTF8 -DLINK_SIZE=2)
|
||||
fi
|
||||
fi
|
||||
PHP_SUBST(PCRE_SHARED_LIBADD)
|
||||
|
@ -1,7 +1,214 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 3.0 02-Jan-02
|
||||
Version 4.00 ....
|
||||
-----------------
|
||||
|
||||
1. If a comment in an extended regex that started immediately after a meta-item
|
||||
extended to the end of string, PCRE compiled incorrect data. This could lead to
|
||||
all kinds of weird effects. Example: /#/ was bad; /()#/ was bad; /a#/ was not.
|
||||
|
||||
2. Moved to autoconf 2.53 and libtool 1.4.2.
|
||||
|
||||
3. Perl 5.8 no longer needs "use utf8" for doing UTF-8 things. Consequently,
|
||||
the special perltest8 script is no longer needed - all the tests can be run
|
||||
from a single perltest script.
|
||||
|
||||
4. From 5.004, Perl has not included the VT character (0x0b) in the set defined
|
||||
by \s. It has now been removed in PCRE. This means it isn't recognized as
|
||||
whitespace in /x regexes too, which is the same as Perl. Note that the POSIX
|
||||
class [:space:] *does* include VT, thereby creating a mess.
|
||||
|
||||
5. Added the class [:blank:] (a GNU extension from Perl 5.8) to match only
|
||||
space and tab.
|
||||
|
||||
6. Perl 5.005 was a long time ago. It's time to amalgamate the tests that use
|
||||
its new features into the main test script, reducing the number of scripts.
|
||||
|
||||
7. Perl 5.8 has changed the meaning of patterns like /a(?i)b/. Earlier
|
||||
versions were backward compatible, and made the (?i) apply to the whole
|
||||
pattern, as if /i were given. Now it behaves more logically, and applies the
|
||||
option setting only to what follows. PCRE has been changed to follow suit.
|
||||
However, if it finds options settings right at the start of the pattern, it
|
||||
extracts them into the global options, as before. Thus, they show up in the
|
||||
info data.
|
||||
|
||||
8. Added support for the \Q...\E escape sequence. Characters in between are
|
||||
treated as literals. This is slightly different from Perl in that $ and @ are
|
||||
also handled as literals inside the quotes. In Perl, they will cause variable
|
||||
interpolation. Note the following examples:
|
||||
|
||||
Pattern PCRE matches Perl matches
|
||||
|
||||
\Qabc$xyz\E abc$xyz abc followed by the contents of $xyz
|
||||
\Qabc\$xyz\E abc\$xyz abc\$xyz
|
||||
\Qabc\E\$\Qxyz\E abc$xyz abc$xyz
|
||||
|
||||
9. Re-organized 3 code statements in pcretest to avoid "overflow in
|
||||
floating-point constant arithmetic" warnings from a Microsoft compiler. Added a
|
||||
(size_t) cast to one statement in pcretest and one in pcreposix to avoid
|
||||
signed/unsigned warnings.
|
||||
|
||||
10. SunOS4 doesn't have strtoul(). This was used only for unpicking the -o
|
||||
option for pcretest, so I've replaced it by a simple function that does just
|
||||
that job.
|
||||
|
||||
11. pcregrep was ending with code 0 instead of 2 for the commands "pcregrep" or
|
||||
"pcregrep -".
|
||||
|
||||
12. Added "possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's
|
||||
Java package. This provides some syntactic sugar for simple cases of what my
|
||||
documentation calls "once-only subpatterns". A pattern such as x*+ is the
|
||||
same as (?>x*). In other words, if what is inside (?>...) is just a single
|
||||
repeated item, you can use this simplified notation. Note that only makes sense
|
||||
with greedy quantifiers. Consequently, the use of the possessive quantifier
|
||||
forces greediness, whatever the setting of the PCRE_UNGREEDY option.
|
||||
|
||||
13. A change of greediness default within a pattern was not taking effect at
|
||||
the current level for patterns like /(b+(?U)a+)/. It did apply to parenthesized
|
||||
subpatterns that followed. Patterns like /b+(?U)a+/ worked because the option
|
||||
was abstracted outside.
|
||||
|
||||
14. PCRE now supports the \G assertion. It is true when the current matching
|
||||
position is at the start point of the match. This differs from \A when the
|
||||
starting offset is non-zero. Used with the /g option of pcretest (or similar
|
||||
code), it works in the same way as it does for Perl's /g option.
|
||||
|
||||
15. Some bugs concerning the handling of certain option changes within patterns
|
||||
have been fixed. These applied to options other than (?ims). For example,
|
||||
"a(?x: b c )d" did not match "XabcdY" but did match "Xa b c dY". It should have
|
||||
been the other way round. Some of this was related to change 7 above.
|
||||
|
||||
16. PCRE now gives errors for /[.x.]/ and /[=x=]/ as unsupported POSIX
|
||||
features, as Perl does. Previously, PCRE gave the warnings only for /[[.x.]]/
|
||||
and /[[=x=]]/. PCRE now also gives an error for /[:name:]/ because it supports
|
||||
POSIX classes only within a class (e.g. /[[:alpha:]]/).
|
||||
|
||||
17. Added support for Perl's \C escape. This matches one byte, even in UTF8
|
||||
mode. Unlike ".", it always matches newline, whatever the setting of
|
||||
PCRE_DOTALL. However, PCRE does not permit \C to appear in lookbehind
|
||||
assertions. (Perl allows it, but it doesn't (in general) work because it can't
|
||||
calculate the length of the lookbehind. At least, that's the case for Perl
|
||||
5.8.0)
|
||||
|
||||
18. Added an error diagnosis for escapes that PCRE does not support: these are
|
||||
\L, \l, \N, \P, \p, \U, \u, and \X.
|
||||
|
||||
19. Although correctly diagnosing a missing ']' in a character class, PCRE was
|
||||
reading past the end of the pattern in cases such as /[abcd/.
|
||||
|
||||
20. PCRE was getting more memory than necessary for patterns with classes that
|
||||
contained both POSIX named classes and other characters, e.g. /[[:space:]abc/.
|
||||
|
||||
21. Added some code, conditional on #ifdef VPCOMPAT, to make life easier for
|
||||
compiling PCRE for use with Virtual Pascal.
|
||||
|
||||
22. Small fix to the Makefile to make it work properly if the build is done
|
||||
outside the source tree.
|
||||
|
||||
23. Added a new extension: a condition to go with recursion. If a conditional
|
||||
subpattern starts with (?(R) the "true" branch is used if recursion has
|
||||
happened, whereas the "false" branch is used only at the top level.
|
||||
|
||||
24. When there was a very long string of literal characters (over 255 bytes
|
||||
without UTF support, over 250 bytes with UTF support), the computation of how
|
||||
much memory was required could be incorrect, leading to segfaults or other
|
||||
strange effects.
|
||||
|
||||
25. PCRE was incorrectly assuming anchoring (either to start of subject or to
|
||||
start of line for a non-DOTALL pattern) when a pattern started with (.*) and
|
||||
there was a subsequent back reference to those brackets. This meant that, for
|
||||
example, /(.*)\d+\1/ failed to match "abc123bc". Unfortunately, it isn't
|
||||
possible to check for precisely this case. All we can do is abandon the
|
||||
optimization if .* occurs inside capturing brackets when there are any back
|
||||
references whatsoever.
|
||||
|
||||
26. The handling of the optimization for finding the first character of a
|
||||
non-anchored pattern, and for finding a character that is required later in the
|
||||
match were failing in some cases. This didn't break the matching; it just
|
||||
failed to optimize when it could. The way this is done has been re-implemented.
|
||||
|
||||
27. Fixed typo in error message for invalid (?R item (it said "(?p").
|
||||
|
||||
28. Added a new feature that provides some of the functionality that Perl
|
||||
provides with (?{...}). The facility is termed a "callout". The way it is done
|
||||
in PCRE is for the caller to provide an optional function, by setting
|
||||
pcre_callout to its entry point. Like pcre_malloc and pcre_free, this is a
|
||||
global variable. By default it is unset, which disables all calling out. To get
|
||||
the function called, the regex must include (?C) at appropriate points. This
|
||||
is, in fact, equivalent to (?C0), and any number <= 255 may be given with (?C).
|
||||
This provides a means of identifying different callout points. When PCRE
|
||||
reaches such a point in the regex, if pcre_callout has been set, the external
|
||||
function is called. It is provided with data in a structure called
|
||||
pcre_callout_block, which is defined in pcre.h. If the function returns 0,
|
||||
matching continues; if it returns a non-zero value, the match at the current
|
||||
point fails. However, backtracking will occur if possible.
|
||||
|
||||
29. pcretest is upgraded to test the callout functionality. It provides a
|
||||
callout function that displays information. By default, it shows the start of
|
||||
the match and the current position in the text. There are some new data escapes
|
||||
to vary what happens:
|
||||
|
||||
\C+ in addition, show current contents of captured substrings
|
||||
\C- do not supply a callout function
|
||||
\C!n return 1 when callout number n is reached
|
||||
\C!n!m return 1 when callout number n is reached for the mth time
|
||||
|
||||
30. If pcregrep was called with the -l option and just a single file name, it
|
||||
output "<stdin>" if a match was found, instead of the file name.
|
||||
|
||||
31. Improve the efficiency of the POSIX API to PCRE. If the number of capturing
|
||||
slots is less than POSIX_MALLOC_THRESHOLD, use a block on the stack to pass to
|
||||
pcre_exec(). This saves a malloc/free per call. The default value of
|
||||
POSIX_MALLOC_THRESHOLD is 5; it can be changed by --with-posix-malloc-threshold
|
||||
when configuring.
|
||||
|
||||
32. The default maximum size of a compiled pattern is 64K. There have been a
|
||||
few cases of people hitting this limit. The code now uses macros to handle the
|
||||
storing of links as offsets within the compiled pattern. It defaults to 2-byte
|
||||
links, but this can be changed to 3 or 4 bytes by --with-link-size when
|
||||
configuring. Tests 2 and 5 work only with 2-byte links because they output
|
||||
debugging information about compiled patterns.
|
||||
|
||||
33. Internal code re-arrangements:
|
||||
|
||||
(a) Moved the debugging function for printing out a compiled regex into
|
||||
its own source file (printint.c) and used #include to pull it into
|
||||
pcretest.c and, when DEBUG is defined, into pcre.c, instead of having
|
||||
two separate copies.
|
||||
|
||||
(b) Defined the list of op-code names for debugging as a macro in
|
||||
internal.h so that it is next to the definition of the opcodes.
|
||||
|
||||
(c) Defined a table of op-code lengths for simpler skipping along compiled
|
||||
code. This is again a macro in internal.h so that it is next to the
|
||||
definition of the opcodes.
|
||||
|
||||
34. Added support for recursive calls to individual subpatterns, along the
|
||||
lines of Robin Houston's patch (but implemented somewhat differently).
|
||||
|
||||
35. Further mods to the Makefile to help Win32. Also, added code to pcregrep
|
||||
to allow it to read and process whole directories in Win32. This code was
|
||||
contributed by Lionel Fourquaux; it has not been tested by me.
|
||||
|
||||
36. Added support for named subpatterns. The Python syntax (?P<name>...) is
|
||||
used to name a group. Names consist of alphanumerics and underscores, and
|
||||
must be unique. Back references use the syntax (?P=name) and recursive
|
||||
calls use (?P>name) which is a PCRE extension to the Python extension.
|
||||
Groups still have numbers. The function pcre_fullinfo() can be used after
|
||||
compilation to extract a name/number map. There are three relevant calls:
|
||||
|
||||
PCRE_INFO_NAMEENTRYSIZE yields the size of each entry in the map
|
||||
PCRE_INFO_NAMECOUNT yields the number of entries
|
||||
PCRE_INFO_NAMETABLE yields a pointer to the map.
|
||||
|
||||
The map is a vector of fixed-size entries. The size of each entry depends
|
||||
on the length of the longest name used. The first two bytes of each entry
|
||||
are the group number, most significant byte first. There follows the
|
||||
corresponding name, zero terminated. The names are in alphabetical order.
|
||||
|
||||
|
||||
Version 3.9 02-Jan-02
|
||||
---------------------
|
||||
|
||||
1. A bit of extraneous text had somehow crept into the pcregrep documentation.
|
||||
|
@ -41,13 +41,49 @@ Makefile.in to create Makefile, substituting suitable values for the variables
|
||||
at the head of the file.
|
||||
|
||||
Some help in building a Win32 DLL of PCRE in GnuWin32 environments was
|
||||
contributed by Paul.Sokolovsky@technologist.com. These environments are
|
||||
Mingw32 (http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and
|
||||
CygWin (http://sourceware.cygnus.com/cygwin/). Paul comments:
|
||||
contributed by Paul Sokolovsky. These environments are Mingw32
|
||||
(http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and CygWin
|
||||
(http://sourceware.cygnus.com/cygwin/). Paul comments:
|
||||
|
||||
For CygWin, set CFLAGS=-mno-cygwin, and do 'make dll'. You'll get
|
||||
pcre.dll (containing pcreposix also), libpcre.dll.a, and dynamically
|
||||
linked pgrep and pcretest. If you have /bin/sh, run RunTest (three
|
||||
main test go ok, locale not supported).
|
||||
|
||||
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
|
||||
was contributed by Alexander Tokarev. It is called makevp.bat.
|
||||
|
||||
These are some further comments about Win32 builds from Mark Evans:
|
||||
|
||||
The documentation for Win32 builds is a bit shy. Under MSVC6 I
|
||||
followed their instructions to the letter, but there were still
|
||||
some things missing.
|
||||
|
||||
(1) Must #define STATIC for entire project if linking statically.
|
||||
(I see no reason to use DLLs for code this compact.) This of
|
||||
course is a project setting in MSVC under Preprocessor.
|
||||
|
||||
(2) Missing some #ifdefs relating to the function pointers
|
||||
pcre_malloc and pcre_free. See my solution below. (The stubs
|
||||
may not be mandatory but they made me feel better.)
|
||||
|
||||
=========================
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
|
||||
void* malloc_stub(size_t N)
|
||||
{ return malloc(N); }
|
||||
void free_stub(void* p)
|
||||
{ free(p); }
|
||||
void *(*pcre_malloc)(size_t) = &malloc_stub;
|
||||
void (*pcre_free)(void *) = &free_stub;
|
||||
|
||||
#else
|
||||
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
|
||||
#endif
|
||||
=========================
|
||||
|
||||
****
|
||||
|
@ -30,13 +30,14 @@ Windows systems (I myself do not use Windows). Some are complete in themselves;
|
||||
others are pointers to URLs containing relevant files.
|
||||
|
||||
|
||||
Building PCRE on a Unix system
|
||||
------------------------------
|
||||
Building PCRE on a Unix-like system
|
||||
-----------------------------------
|
||||
|
||||
To build PCRE on a Unix system, first run the "configure" command from the PCRE
|
||||
distribution directory, with your current directory set to the directory where
|
||||
you want the files to be created. This command is a standard GNU "autoconf"
|
||||
configuration script, for which generic instructions are supplied in INSTALL.
|
||||
To build PCRE on a Unix-like system, first run the "configure" command from the
|
||||
PCRE distribution directory, with your current directory set to the directory
|
||||
where you want the files to be created. This command is a standard GNU
|
||||
"autoconf" configuration script, for which generic instructions are supplied in
|
||||
INSTALL.
|
||||
|
||||
Most commonly, people build PCRE within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient, but the
|
||||
@ -147,13 +148,11 @@ A file called testtry is used to hold the output from pcretest. To run pcretest
|
||||
on just one of the test files, give its number as an argument to RunTest, for
|
||||
example:
|
||||
|
||||
RunTest 3
|
||||
RunTest 2
|
||||
|
||||
The first and third test files can also be fed directly into the perltest
|
||||
script to check that Perl gives the same results. The third file requires the
|
||||
additional features of release 5.005, which is why it is kept separate from the
|
||||
main test input, which needs only Perl 5.004. In the long run, when 5.005 (or
|
||||
higher) is widespread, these two test files may get amalgamated.
|
||||
The first file can also be fed directly into the perltest script to check that
|
||||
Perl gives the same results. The only difference you should see is in the first
|
||||
few lines, where the Perl version is given instead of the PCRE version.
|
||||
|
||||
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
|
||||
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
|
||||
@ -171,12 +170,12 @@ listed for checking. Where the comparison test output contains [\x00-\x7f] the
|
||||
test will contain [\x00-\xff], and similarly in some other cases. This is not a
|
||||
bug in PCRE.
|
||||
|
||||
The fourth set of tests checks pcre_maketables(), the facility for building a
|
||||
The third set of tests checks pcre_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The tests make use of the "fr" (French) locale. Before running
|
||||
the test, the script checks for the presence of this locale by running the
|
||||
"locale" command. If that command fails, or if it doesn't include "fr" in the
|
||||
list of available locales, the fourth test cannot be run, and a comment is
|
||||
list of available locales, the third test cannot be run, and a comment is
|
||||
output to say why. If running this test produces instances of the error
|
||||
|
||||
** Failed to set locale "fr"
|
||||
@ -184,10 +183,14 @@ output to say why. If running this test produces instances of the error
|
||||
in the comparison output, it means that locale is not available on your system,
|
||||
despite being listed by "locale". This does not mean that PCRE is broken.
|
||||
|
||||
The fifth test checks the experimental, incomplete UTF-8 support. It is not run
|
||||
automatically unless PCRE is built with UTF-8 support. This file can be fed
|
||||
directly to the perltest8 script, which requires Perl 5.6 or higher. The sixth
|
||||
file tests internal UTF-8 features of PCRE that are not relevant to Perl.
|
||||
The fourth test checks the experimental, incomplete UTF-8 support. It is not
|
||||
run automatically unless PCRE is built with UTF-8 support. To do this you must
|
||||
set --enable-utf8 when running "configure". This file can be also fed directly
|
||||
to the perltest script, provided you are running Perl 5.8 or higher. (For Perl
|
||||
5.6, a small patch, commented in the script, can be be used.)
|
||||
|
||||
The fifth and final file tests error handling with UTF-8 encoding, and internal
|
||||
UTF-8 features of PCRE that are not relevant to Perl.
|
||||
|
||||
|
||||
Character tables
|
||||
@ -285,23 +288,25 @@ The distribution should contain the following files:
|
||||
perltest8 Perl test program for UTF-8 tests
|
||||
pcregrep.c source of a grep utility that uses PCRE
|
||||
pcre-config.in source of script which retains PCRE information
|
||||
testdata/testinput1 test data, compatible with Perl 5.004 and 5.005
|
||||
testdata/testinput1 test data, compatible with Perl
|
||||
testdata/testinput2 test data for error messages and non-Perl things
|
||||
testdata/testinput3 test data, compatible with Perl 5.005
|
||||
testdata/testinput4 test data for locale-specific tests
|
||||
testdata/testinput5 test data for UTF-8 tests compatible with Perl 5.6
|
||||
testdata/testinput6 test data for other UTF-8 tests
|
||||
testdata/testinput3 test data for locale-specific tests
|
||||
testdata/testinput4 test data for UTF-8 tests compatible with Perl
|
||||
testdata/testinput5 test data for other UTF-8 tests
|
||||
testdata/testoutput1 test results corresponding to testinput1
|
||||
testdata/testoutput2 test results corresponding to testinput2
|
||||
testdata/testoutput3 test results corresponding to testinput3
|
||||
testdata/testoutput4 test results corresponding to testinput4
|
||||
testdata/testoutput5 test results corresponding to testinput5
|
||||
testdata/testoutput6 test results corresponding to testinput6
|
||||
|
||||
(C) Auxiliary files for Win32 DLL
|
||||
|
||||
dll.mk
|
||||
pcre.def
|
||||
|
||||
(D) Auxiliary file for VPASCAL
|
||||
|
||||
makevp.bat
|
||||
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
August 2001
|
||||
August 2002
|
||||
|
@ -1,149 +0,0 @@
|
||||
#! /bin/sh
|
||||
|
||||
# This file is generated by configure from RunTest.in. Make any changes
|
||||
# to that file.
|
||||
|
||||
# Run PCRE tests
|
||||
|
||||
cf=diff
|
||||
testdata=./testdata
|
||||
|
||||
# Select which tests to run; if no selection, run all
|
||||
|
||||
do1=no
|
||||
do2=no
|
||||
do3=no
|
||||
do4=no
|
||||
do5=no
|
||||
do6=no
|
||||
|
||||
while [ $# -gt 0 ] ; do
|
||||
case $1 in
|
||||
1) do1=yes;;
|
||||
2) do2=yes;;
|
||||
3) do3=yes;;
|
||||
4) do4=yes;;
|
||||
5) do5=yes;;
|
||||
6) do6=yes;;
|
||||
*) echo "Unknown test number $1"; exit 1;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if [ "-DSUPPORT_UTF8" = "" ] ; then
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 because UFT8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because UFT8 support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a\
|
||||
$do5 = no -a $do6 = no ] ; then
|
||||
do1=yes
|
||||
do2=yes
|
||||
do3=yes
|
||||
do4=yes
|
||||
if [ "-DSUPPORT_UTF8" != "" ] ; then do5=yes; fi
|
||||
if [ "-DSUPPORT_UTF8" != "" ] ; then do6=yes; fi
|
||||
fi
|
||||
|
||||
# Primary test, Perl-compatible
|
||||
|
||||
if [ $do1 = yes ] ; then
|
||||
echo "Testing main functionality (Perl compatible)"
|
||||
./pcretest $testdata/testinput1 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput1
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# PCRE tests that are not Perl-compatible - API & error tests, mostly
|
||||
|
||||
if [ $do2 = yes ] ; then
|
||||
echo "Testing API and error handling (not Perl compatible)"
|
||||
./pcretest -i $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput2
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Additional Perl-compatible tests for Perl 5.005's new features
|
||||
|
||||
if [ $do3 = yes ] ; then
|
||||
echo "Testing Perl 5.005 features (Perl 5.005 compatible)"
|
||||
./pcretest $testdata/testinput3 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput3
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $do1 = yes -a $do2 = yes -a $do3 = yes ] ; then
|
||||
echo " "
|
||||
echo "The three main tests all ran OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
# Locale-specific tests, provided the "fr" locale is available
|
||||
|
||||
if [ $do4 = yes ] ; then
|
||||
locale -a | grep '^fr$' >/dev/null
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "Testing locale-specific features (using 'fr' locale)"
|
||||
./pcretest $testdata/testinput4 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput4
|
||||
if [ $? != 0 ] ; then
|
||||
echo " "
|
||||
echo "Locale test did not run entirely successfully."
|
||||
echo "This usually means that there is a problem with the locale"
|
||||
echo "settings rather than a bug in PCRE."
|
||||
else
|
||||
echo "Locale test ran OK"
|
||||
fi
|
||||
echo " "
|
||||
else exit 1
|
||||
fi
|
||||
else
|
||||
echo "Cannot test locale-specific features - 'fr' locale not found,"
|
||||
echo "or the \"locale\" command is not available to check for it."
|
||||
echo " "
|
||||
fi
|
||||
fi
|
||||
|
||||
# Additional tests for UTF8 support
|
||||
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Testing experimental, incomplete UTF8 support (Perl compatible)"
|
||||
./pcretest $testdata/testinput5 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput5
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "UTF8 test ran OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Testing API and internals for UTF8 support (not Perl compatible)"
|
||||
./pcretest $testdata/testinput6 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$cf testtry $testdata/testoutput6
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else exit 1
|
||||
fi
|
||||
echo "UTF8 internals test ran OK"
|
||||
echo " "
|
||||
fi
|
||||
|
||||
# End
|
@ -231,23 +231,23 @@ Conditional subpatterns
|
||||
These are like other subpatterns, but they start with the opcode OP_COND. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by two bytes containing the
|
||||
reference number. Otherwise, a conditional subpattern will always start with
|
||||
one of the assertions.
|
||||
reference number. If the condition is "in recursion" (coded as "(?(R)"), the
|
||||
same scheme is used, with a "reference number" of 0xffff. Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Changing options
|
||||
----------------
|
||||
|
||||
If any of the /i, /m, or /s options are changed within a parenthesized group,
|
||||
an OP_OPT opcode is compiled, followed by one byte containing the new settings
|
||||
of these flags. If there are several alternatives in a group, there is an
|
||||
occurrence of OP_OPT at the start of all those following the first options
|
||||
change, to set appropriate options for the start of the alternative.
|
||||
Immediately after the end of the group there is another such item to reset the
|
||||
flags to their previous values. Other changes of flag within the pattern can be
|
||||
handled entirely at compile time, and so do not cause anything to be put into
|
||||
the compiled data.
|
||||
|
||||
If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
|
||||
opcode is compiled, followed by one byte containing the new settings of these
|
||||
flags. If there are several alternatives, there is an occurrence of OP_OPT at
|
||||
the start of all those following the first options change, to set appropriate
|
||||
options for the start of the alternative. Immediately after the end of the
|
||||
group there is another such item to reset the flags to their previous values. A
|
||||
change of flag right at the very start of the pattern can be handled entirely
|
||||
at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
August 2001
|
||||
August 2002
|
||||
|
@ -1100,6 +1100,8 @@ are
|
||||
word "word" characters (same as \\w)
|
||||
xdigit hexadecimal digits
|
||||
|
||||
>>>>>>>>>>>>Only WORD is perl. BLANK is GNU.
|
||||
|
||||
The names "ascii" and "word" are Perl extensions. Another Perl extension is
|
||||
negation, which is indicated by a ^ character after the colon. For example,
|
||||
|
||||
|
@ -1411,6 +1411,9 @@ are
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
>>>>>>>>>>>>Only WORD is perl. BLANK is GNU.
|
||||
</P>
|
||||
<P>
|
||||
The names "ascii" and "word" are Perl extensions. Another Perl extension is
|
||||
negation, which is indicated by a ^ character after the colon. For example,
|
||||
</P>
|
||||
|
@ -1273,6 +1273,8 @@ POSIX CHARACTER CLASSES
|
||||
word "word" characters (same as \w)
|
||||
xdigit hexadecimal digits
|
||||
|
||||
>>>>>>>>>>>>Only WORD is perl. BLANK is GNU.
|
||||
|
||||
The names "ascii" and "word" are Perl extensions. Another
|
||||
Perl extension is negation, which is indicated by a ^ char-
|
||||
acter after the colon. For example,
|
||||
@ -1416,7 +1418,6 @@ SUBPATTERNS
|
||||
are numbered 1 and 2. The maximum number of captured sub-
|
||||
strings is 99, and the maximum number of all subpatterns,
|
||||
both capturing and non-capturing, is 200.
|
||||
|
||||
As a convenient shorthand, if any option settings are
|
||||
required at the start of a non-capturing subpattern, the
|
||||
option letters may appear between the "?" and the ":". Thus
|
||||
@ -1468,8 +1469,9 @@ REPETITION
|
||||
matches exactly 8 digits. An opening curly bracket that
|
||||
appears in a position where a quantifier is not allowed, or
|
||||
one that does not match the syntax of a quantifier, is taken
|
||||
as a literal character. For example, {,6} is not a quantif-
|
||||
ier, but a literal string of four characters.
|
||||
as a literal character. For example, {,6} is not a
|
||||
quantifier, but a literal string of four characters.
|
||||
|
||||
The quantifier {0} is permitted, causing the expression to
|
||||
behave as if the previous item and the quantifier were not
|
||||
present.
|
||||
@ -1519,8 +1521,8 @@ REPETITION
|
||||
|
||||
does the right thing with the C comments. The meaning of the
|
||||
various quantifiers is not otherwise changed, just the pre-
|
||||
ferred number of matches. Do not confuse this use of ques-
|
||||
tion mark with its use as a quantifier in its own right.
|
||||
ferred number of matches. Do not confuse this use of
|
||||
question mark with its use as a quantifier in its own right.
|
||||
Because it has two uses, it can sometimes appear doubled, as
|
||||
in
|
||||
|
||||
@ -1571,17 +1573,10 @@ REPETITION
|
||||
|
||||
|
||||
|
||||
|
||||
BACK REFERENCES
|
||||
Outside a character class, a backslash followed by a digit
|
||||
greater than 0 (and possibly further digits) is a back
|
||||
|
||||
|
||||
|
||||
|
||||
SunOS 5.8 Last change: 30
|
||||
|
||||
|
||||
|
||||
reference to a capturing subpattern earlier (i.e. to its
|
||||
left) in the pattern, provided there have been that many
|
||||
previous capturing left parentheses.
|
||||
@ -1630,8 +1625,8 @@ SunOS 5.8 Last change: 30
|
||||
A back reference that occurs inside the parentheses to which
|
||||
it refers fails when the subpattern is first used, so, for
|
||||
example, (a\1) never matches. However, such references can
|
||||
be useful inside repeated subpatterns. For example, the pat-
|
||||
tern
|
||||
be useful inside repeated subpatterns. For example, the
|
||||
pattern
|
||||
|
||||
(a|b\1)+
|
||||
|
||||
@ -2100,12 +2095,11 @@ UTF-8 SUPPORT
|
||||
UTF-8 codes. It does not diagnose invalid UTF-8 strings. If
|
||||
you pass invalid UTF-8 strings to PCRE, the results are
|
||||
undefined.
|
||||
|
||||
Running with PCRE_UTF8 set causes these changes in the way
|
||||
PCRE works:
|
||||
|
||||
1. In a pattern, the escape sequence \x{...}, where the
|
||||
contents of the braces is a string of hexadecimal digits, is
|
||||
1. In a pattern, the escape sequence \x{...}, where the con-
|
||||
tents of the braces is a string of hexadecimal digits, is
|
||||
interpreted as a UTF-8 character whose code number is the
|
||||
given hexadecimal number, for example: \x{1234}. This
|
||||
inserts from one to six literal bytes into the pattern,
|
||||
@ -2153,7 +2147,6 @@ UTF-8 SUPPORT
|
||||
9. The character types such as \d and \w do not work
|
||||
correctly with UTF-8 characters. They continue to test a
|
||||
single byte.
|
||||
|
||||
10. Anything not explicitly mentioned here continues to work
|
||||
in bytes rather than in characters.
|
||||
|
||||
@ -2310,6 +2303,5 @@ AUTHOR
|
||||
New Museums Site,
|
||||
Cambridge CB2 3QG, England.
|
||||
Phone: +44 1223 334714
|
||||
|
||||
Last updated: 15 August 2001
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
|
@ -2,7 +2,7 @@
|
||||
.SH NAME
|
||||
pcregrep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
.B pcregrep [-Vcfhilnrsvx] pattern [file] ...
|
||||
.B pcregrep [-Vcfhilnrsvx] [pattern] [file1 file2 ...]
|
||||
|
||||
|
||||
.SH DESCRIPTION
|
||||
@ -11,6 +11,9 @@ grep commands do, but it uses the PCRE regular expression library to support
|
||||
patterns that are compatible with the regular expressions of Perl 5. See
|
||||
\fBpcre(3)\fR for a full description of syntax and semantics.
|
||||
|
||||
A pattern must be specified on the command line unless the \fB-f\fR option is
|
||||
used (see below).
|
||||
|
||||
If no files are specified, \fBpcregrep\fR reads the standard input. By default,
|
||||
each line that matches the pattern is copied to the standard output, and if
|
||||
there is more than one file, the file name is printed before each line of
|
||||
@ -32,11 +35,12 @@ Do not print individual lines; instead just print a count of the number of
|
||||
lines that would otherwise have been printed. If several files are given, a
|
||||
count is printed for each of them.
|
||||
.TP
|
||||
\fB-f\fIfilename\fR
|
||||
Read patterns from the file, one per line, and match all patterns against each
|
||||
line. There is a maximum of 100 patterns. Trailing white space is removed, and
|
||||
blank lines are ignored. An empty file contains no patterns and therefore
|
||||
matches nothing.
|
||||
\fB-f\fIfilename\fR Read a number of patterns from the file, one per line, and
|
||||
match all of them against each line of input. A line is output if any of the
|
||||
patterns match it. When \fB-f\fR is used, no pattern is taken from the command
|
||||
line; all arguments are treated as file names. There is a maximum of 100
|
||||
patterns. Trailing white space is removed, and blank lines are ignored. An
|
||||
empty file contains no patterns and therefore matches nothing.
|
||||
.TP
|
||||
\fB-h\fR
|
||||
Suppress printing of filenames when searching multiple files.
|
||||
@ -83,6 +87,6 @@ for syntax errors or inacessible files (even if matches were found).
|
||||
.SH AUTHOR
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Last updated: 15 August 2001
|
||||
Last updated: 25 July 2002
|
||||
.br
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -22,7 +22,7 @@ pcregrep - a grep with Perl-compatible regular expressions.
|
||||
</P>
|
||||
<LI><A NAME="SEC2" HREF="#TOC1">SYNOPSIS</A>
|
||||
<P>
|
||||
<B>pcregrep [-Vcfhilnrsvx] pattern [file] ...</B>
|
||||
<B>pcregrep [-Vcfhilnrsvx] [pattern] [file1 file2 ...]</B>
|
||||
</P>
|
||||
<LI><A NAME="SEC3" HREF="#TOC1">DESCRIPTION</A>
|
||||
<P>
|
||||
@ -32,6 +32,10 @@ patterns that are compatible with the regular expressions of Perl 5. See
|
||||
<B>pcre(3)</B> for a full description of syntax and semantics.
|
||||
</P>
|
||||
<P>
|
||||
A pattern must be specified on the command line unless the <B>-f</B> option is
|
||||
used (see below).
|
||||
</P>
|
||||
<P>
|
||||
If no files are specified, <B>pcregrep</B> reads the standard input. By default,
|
||||
each line that matches the pattern is copied to the standard output, and if
|
||||
there is more than one file, the file name is printed before each line of
|
||||
@ -55,11 +59,12 @@ lines that would otherwise have been printed. If several files are given, a
|
||||
count is printed for each of them.
|
||||
</P>
|
||||
<P>
|
||||
\fB-f<I>filename</I>
|
||||
Read patterns from the file, one per line, and match all patterns against each
|
||||
line. There is a maximum of 100 patterns. Trailing white space is removed, and
|
||||
blank lines are ignored. An empty file contains no patterns and therefore
|
||||
matches nothing.
|
||||
\fB-f<I>filename</I> Read a number of patterns from the file, one per line, and
|
||||
match all of them against each line of input. A line is output if any of the
|
||||
patterns match it. When <B>-f</B> is used, no pattern is taken from the command
|
||||
line; all arguments are treated as file names. There is a maximum of 100
|
||||
patterns. Trailing white space is removed, and blank lines are ignored. An
|
||||
empty file contains no patterns and therefore matches nothing.
|
||||
</P>
|
||||
<P>
|
||||
<B>-h</B>
|
||||
@ -115,6 +120,6 @@ for syntax errors or inacessible files (even if matches were found).
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
</P>
|
||||
<P>
|
||||
Last updated: 15 August 2001
|
||||
Last updated: 25 July 2002
|
||||
<BR>
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -4,7 +4,7 @@ NAME
|
||||
|
||||
|
||||
SYNOPSIS
|
||||
pcregrep [-Vcfhilnrsvx] pattern [file] ...
|
||||
pcregrep [-Vcfhilnrsvx] [pattern] [file1 file2 ...]
|
||||
|
||||
|
||||
|
||||
@ -15,6 +15,9 @@ DESCRIPTION
|
||||
with the regular expressions of Perl 5. See pcre(3) for a
|
||||
full description of syntax and semantics.
|
||||
|
||||
A pattern must be specified on the command line unless the
|
||||
-f option is used (see below).
|
||||
|
||||
If no files are specified, pcregrep reads the standard
|
||||
input. By default, each line that matches the pattern is
|
||||
copied to the standard output, and if there is more than one
|
||||
@ -37,13 +40,19 @@ OPTIONS
|
||||
wise have been printed. If several files are
|
||||
given, a count is printed for each of them.
|
||||
|
||||
-ffilename
|
||||
Read patterns from the file, one per line, and
|
||||
match all patterns against each line. There is a
|
||||
maximum of 100 patterns. Trailing white space is
|
||||
removed, and blank lines are ignored. An empty
|
||||
file contains no patterns and therefore matches
|
||||
nothing.
|
||||
|
||||
|
||||
and
|
||||
-
|
||||
ffilename Read a number of patterns from the file, one per line,
|
||||
match all of them against each line of input. A
|
||||
line is output if any of the patterns match it.
|
||||
When -f is used, no pattern is taken from the com-
|
||||
mand line; all arguments are treated as file
|
||||
names. There is a maximum of 100 patterns. Trail-
|
||||
ing white space is removed, and blank lines are
|
||||
ignored. An empty file contains no patterns and
|
||||
therefore matches nothing.
|
||||
|
||||
-h Suppress printing of filenames when searching mul-
|
||||
tiple files.
|
||||
@ -52,7 +61,6 @@ OPTIONS
|
||||
parisons.
|
||||
|
||||
-l Instead of printing lines from the files, just
|
||||
|
||||
print the names of the files containing lines that
|
||||
would have been printed. Each file name is printed
|
||||
once, on a separate line.
|
||||
@ -97,5 +105,5 @@ DIAGNOSTICS
|
||||
AUTHOR
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Last updated: 15 August 2001
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Last updated: 25 July 2002
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -35,7 +35,7 @@ Behave as if each regex has \fB/P\fR modifier; the POSIX wrapper API is used
|
||||
to call PCRE. None of the other options has any effect when \fB-p\fR is set.
|
||||
.TP 10
|
||||
\fB-t\fR
|
||||
Run each compile, study, and match 20000 times with a timer, and output
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set \fB-t\fR with
|
||||
\fB-m\fR, because you will then get the size output 20000 times and the timing
|
||||
will be distorted.
|
||||
@ -51,10 +51,16 @@ expressions, and "data>" to prompt for data lines.
|
||||
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern. An empty line signals the end of the
|
||||
data lines, at which point a new regular expression is read. The regular
|
||||
expressions are given enclosed in any non-alphameric delimiters other than
|
||||
backslash, for example
|
||||
lines to be matched against the pattern.
|
||||
|
||||
Each line is matched separately and independently. If you want to do
|
||||
multiple-line matches, you have to use the \\n escape sequence in a single line
|
||||
of input to encode the newline characters. The maximum length of data line is
|
||||
30,000 characters.
|
||||
|
||||
An empty line signals the end of the data lines, at which point a new regular
|
||||
expression is read. The regular expressions are given enclosed in any
|
||||
non-alphameric delimiters other than backslash, for example
|
||||
|
||||
/(a|bc)x+yz/
|
||||
|
||||
@ -277,6 +283,6 @@ Cambridge CB2 3QG, England.
|
||||
.br
|
||||
Phone: +44 1223 334714
|
||||
|
||||
Last updated: 15 August 2001
|
||||
Last updated: 25 August 2002
|
||||
.br
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -62,7 +62,7 @@ to call PCRE. None of the other options has any effect when <B>-p</B> is set.
|
||||
</P>
|
||||
<P>
|
||||
<B>-t</B>
|
||||
Run each compile, study, and match 20000 times with a timer, and output
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set <B>-t</B> with
|
||||
<B>-m</B>, because you will then get the size output 20000 times and the timing
|
||||
will be distorted.
|
||||
@ -78,10 +78,18 @@ expressions, and "data>" to prompt for data lines.
|
||||
<P>
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern. An empty line signals the end of the
|
||||
data lines, at which point a new regular expression is read. The regular
|
||||
expressions are given enclosed in any non-alphameric delimiters other than
|
||||
backslash, for example
|
||||
lines to be matched against the pattern.
|
||||
</P>
|
||||
<P>
|
||||
Each line is matched separately and independently. If you want to do
|
||||
multiple-line matches, you have to use the \n escape sequence in a single line
|
||||
of input to encode the newline characters. The maximum length of data line is
|
||||
30,000 characters.
|
||||
</P>
|
||||
<P>
|
||||
An empty line signals the end of the data lines, at which point a new regular
|
||||
expression is read. The regular expressions are given enclosed in any
|
||||
non-alphameric delimiters other than backslash, for example
|
||||
</P>
|
||||
<P>
|
||||
<PRE>
|
||||
@ -364,6 +372,6 @@ Cambridge CB2 3QG, England.
|
||||
Phone: +44 1223 334714
|
||||
</P>
|
||||
<P>
|
||||
Last updated: 15 August 2001
|
||||
Last updated: 25 August 2002
|
||||
<BR>
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -42,11 +42,11 @@ OPTIONS
|
||||
wrapper API is used to call PCRE. None of the
|
||||
other options has any effect when -p is set.
|
||||
|
||||
-t Run each compile, study, and match 20000 times
|
||||
with a timer, and output resulting time per com-
|
||||
pile or match (in milliseconds). Do not set -t
|
||||
with -m, because you will then get the size output
|
||||
20000 times and the timing will be distorted.
|
||||
-t Run each compile, study, and match many times with
|
||||
a timer, and output resulting time per compile or
|
||||
match (in milliseconds). Do not set -t with -m,
|
||||
because you will then get the size output 20000
|
||||
times and the timing will be distorted.
|
||||
|
||||
|
||||
|
||||
@ -70,10 +70,18 @@ SunOS 5.8 Last change: 1
|
||||
The program handles any number of sets of input on a single
|
||||
input file. Each set starts with a regular expression, and
|
||||
continues with any number of data lines to be matched
|
||||
against the pattern. An empty line signals the end of the
|
||||
data lines, at which point a new regular expression is read.
|
||||
The regular expressions are given enclosed in any non-
|
||||
alphameric delimiters other than backslash, for example
|
||||
against the pattern.
|
||||
|
||||
Each line is matched separately and independently. If you
|
||||
want to do multiple-line matches, you have to use the \n
|
||||
escape sequence in a single line of input to encode the new-
|
||||
line characters. The maximum length of data line is 30,000
|
||||
characters.
|
||||
|
||||
An empty line signals the end of the data lines, at which
|
||||
point a new regular expression is read. The regular expres-
|
||||
sions are given enclosed in any non-alphameric delimiters
|
||||
other than backslash, for example
|
||||
|
||||
/(a|bc)x+yz/
|
||||
|
||||
@ -165,6 +173,7 @@ PATTERN MODIFIERS
|
||||
pcre_fullinfo() after compiling an expression, and output-
|
||||
ting the information it gets back. If the pattern is stu-
|
||||
died, the results of that are also output.
|
||||
|
||||
The /D modifier is a PCRE debugging feature, which also
|
||||
assumes /I. It causes the internal form of compiled regular
|
||||
expressions to be output after compilation.
|
||||
@ -208,7 +217,8 @@ DATA LINES
|
||||
\t tab
|
||||
\v vertical tab
|
||||
\nnn octal character (up to 3 octal digits)
|
||||
\xhh hexadecimal character (up to 2 hex digits)
|
||||
|
||||
hexadecimal character (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal UTF-8 character
|
||||
|
||||
\A pass the PCRE_ANCHORED option to pcre_exec()
|
||||
@ -217,7 +227,6 @@ DATA LINES
|
||||
after a successful match (any decimal number
|
||||
less than 32)
|
||||
\Gdd call pcre_get_substring() for substring dd
|
||||
|
||||
after a successful match (any decimal number
|
||||
less than 32)
|
||||
\L call pcre_get_substringlist() after a
|
||||
@ -261,6 +270,7 @@ OUTPUT FROM PCRETEST
|
||||
|
||||
re> /^abc(\d+)/
|
||||
data> abc123
|
||||
|
||||
0: abc123
|
||||
1: 123
|
||||
data> xyz
|
||||
@ -315,5 +325,5 @@ AUTHOR
|
||||
Cambridge CB2 3QG, England.
|
||||
Phone: +44 1223 334714
|
||||
|
||||
Last updated: 15 August 2001
|
||||
Copyright (c) 1997-2001 University of Cambridge.
|
||||
Last updated: 25 August 2002
|
||||
Copyright (c) 1997-2002 University of Cambridge.
|
||||
|
@ -13,10 +13,15 @@ for perltest as well as for pcretest, and the special upper case modifiers such
|
||||
as /A that pcretest recognizes are not used in these files. The output should
|
||||
be identical, apart from the initial identifying banner.
|
||||
|
||||
For testing UTF-8 features, an alternative form of perltest, called perltest8,
|
||||
is supplied. This requires Perl 5.6 or higher. It recognizes the special
|
||||
modifier /8 that pcretest uses to invoke UTF-8 functionality. The testinput5
|
||||
file can be fed to perltest8.
|
||||
The perltest script can also test UTF-8 features. It works as is for Perl 5.8
|
||||
or higher. It recognizes the special modifier /8 that pcretest uses to invoke
|
||||
UTF-8 functionality. The testinput5 file can be fed to perltest to run UTF-8
|
||||
tests.
|
||||
|
||||
For Perl 5.6, perltest won't work unmodified for the UTF-8 tests. You need to
|
||||
uncomment the "use utf8" lines that it contains. It is best to do this on a
|
||||
copy of the script, because for non-UTF-8 tests, these lines should remain
|
||||
commented out.
|
||||
|
||||
The testinput2 and testinput4 files are not suitable for feeding to perltest,
|
||||
since they do make use of the special upper case modifiers and escapes that
|
||||
@ -26,4 +31,4 @@ them correctly. Similarly, testinput6 tests UTF-8 features that do not relate
|
||||
to Perl.
|
||||
|
||||
Philip Hazel <ph10@cam.ac.uk>
|
||||
August 2000
|
||||
August 2002
|
||||
|
@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2001 University of Cambridge
|
||||
Copyright (c) 1997-2002 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
@ -52,7 +52,18 @@ On Unix systems, "configure" can be used to override this default. */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE '\n'
|
||||
#endif
|
||||
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
@ -64,7 +75,7 @@ case in PCRE. */
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#if HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else
|
||||
#else /* HAVE_BCOPY */
|
||||
void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
{
|
||||
@ -74,8 +85,85 @@ src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities by default.
|
||||
These are used, for example, to link from the start of a subpattern to its
|
||||
alternatives and its end. The use of 2 bytes per offset limits the size of the
|
||||
compiled regex to around 64K, which is big enough for almost everybody.
|
||||
However, I received a request for an even bigger limit. For this reason, and
|
||||
also to make the code easier to maintain, the storing and loading of offsets
|
||||
from the byte string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
||||
the config.h file, but can be overridden by using -D on the command line. This
|
||||
is automated on Unix systems via the "configure" command. */
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 8), \
|
||||
(a[(n)+1] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 16), \
|
||||
(a[(n)+1] = (d) >> 8), \
|
||||
(a[(n)+2] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 24), \
|
||||
(a[(n)+1] = (d) >> 16), \
|
||||
(a[(n)+2] = (d) >> 8), \
|
||||
(a[(n)+3] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be either 2, 3, or 4
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Convenience macro defined in terms of the others */
|
||||
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
|
||||
|
||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
||||
offsets changes. There are used for repeat counts and for other things such as
|
||||
capturing parenthesis numbers in back references. */
|
||||
|
||||
#define PUT2(a,n,d) \
|
||||
a[n] = (d) >> 8; \
|
||||
a[(n)+1] = (d) & 255
|
||||
|
||||
#define GET2(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
||||
|
||||
|
||||
/* Standard C headers plus the external interface definition */
|
||||
|
||||
@ -107,8 +195,7 @@ to four bytes there is plenty of space. */
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_char is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_char is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_INGROUP 0x08000000 /* compiling inside a group */
|
||||
#define PCRE_ICHANGED 0x04000000 /* i option changes within regex */
|
||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
@ -130,6 +217,15 @@ time, run time or study time, respectively. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* Negative values for the firstchar and reqchar variables */
|
||||
|
||||
#define REQ_UNSET (-2)
|
||||
#define REQ_NONE (-1)
|
||||
|
||||
/* Flags added to firstchar or reqchar */
|
||||
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
|
||||
typedef int BOOL;
|
||||
@ -138,143 +234,213 @@ typedef int BOOL;
|
||||
#define TRUE 1
|
||||
|
||||
/* Escape items that are just an encoding of a particular data value. Note that
|
||||
ESC_N is defined as yet another macro, which is set in config.h to either \n
|
||||
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
||||
(the default) or \r (which some people want). */
|
||||
|
||||
#ifndef ESC_E
|
||||
#define ESC_E 27
|
||||
#ifndef ESC_e
|
||||
#define ESC_e 27
|
||||
#endif
|
||||
|
||||
#ifndef ESC_F
|
||||
#define ESC_F '\f'
|
||||
#ifndef ESC_f
|
||||
#define ESC_f '\f'
|
||||
#endif
|
||||
|
||||
#ifndef ESC_N
|
||||
#define ESC_N NEWLINE
|
||||
#ifndef ESC_n
|
||||
#define ESC_n NEWLINE
|
||||
#endif
|
||||
|
||||
#ifndef ESC_R
|
||||
#define ESC_R '\r'
|
||||
#ifndef ESC_r
|
||||
#define ESC_r '\r'
|
||||
#endif
|
||||
|
||||
#ifndef ESC_T
|
||||
#define ESC_T '\t'
|
||||
#ifndef ESC_t
|
||||
#define ESC_t '\t'
|
||||
#endif
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_z. The final one must be ESC_REF as subsequent
|
||||
values are used for \1, \2, \3, etc. There is a test in the code for an escape
|
||||
greater than ESC_b and less than ESC_Z to detect the types that may be
|
||||
repeated. If any new escapes are put in-between that don't consume a character,
|
||||
that code will have to change. */
|
||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||
corresponds to "." rather than an escape sequence. The final one must be
|
||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
||||
detect the types that may be repeated. These are the types that consume a
|
||||
character. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF };
|
||||
|
||||
enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w,
|
||||
ESC_Z, ESC_z, ESC_REF };
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above. */
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
Note that whenever this list is updated, the two macro definitions that follow
|
||||
must also be updated to match. */
|
||||
|
||||
enum {
|
||||
OP_END, /* End of pattern */
|
||||
OP_END, /* 0 End of pattern */
|
||||
|
||||
/* Values corresponding to backslashed metacharacters */
|
||||
|
||||
OP_SOD, /* Start of data: \A */
|
||||
OP_NOT_WORD_BOUNDARY, /* \B */
|
||||
OP_WORD_BOUNDARY, /* \b */
|
||||
OP_NOT_DIGIT, /* \D */
|
||||
OP_DIGIT, /* \d */
|
||||
OP_NOT_WHITESPACE, /* \S */
|
||||
OP_WHITESPACE, /* \s */
|
||||
OP_NOT_WORDCHAR, /* \W */
|
||||
OP_WORDCHAR, /* \w */
|
||||
OP_EODN, /* End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* End of data: \z */
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
||||
OP_WORD_BOUNDARY, /* 4 \b */
|
||||
OP_NOT_DIGIT, /* 5 \D */
|
||||
OP_DIGIT, /* 6 \d */
|
||||
OP_NOT_WHITESPACE, /* 7 \S */
|
||||
OP_WHITESPACE, /* 8 \s */
|
||||
OP_NOT_WORDCHAR, /* 9 \W */
|
||||
OP_WORDCHAR, /* 10 \w */
|
||||
OP_ANY, /* 11 Match any character */
|
||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_EODN, /* 13 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 14 End of data: \z */
|
||||
|
||||
OP_OPT, /* Set runtime options */
|
||||
OP_CIRC, /* Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* End of line - varies with multiline switch */
|
||||
OP_ANY, /* Match any character */
|
||||
OP_CHARS, /* Match string of characters */
|
||||
OP_NOT, /* Match anything but the following char */
|
||||
OP_OPT, /* 15 Set runtime options */
|
||||
OP_CIRC, /* 16 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 17 End of line - varies with multiline switch */
|
||||
OP_CHARS, /* 18 Match string of characters */
|
||||
OP_NOT, /* 19 Match anything but the following char */
|
||||
|
||||
OP_STAR, /* The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* all these opcodes must come in pairs, with */
|
||||
OP_PLUS, /* the minimizing one second. */
|
||||
OP_MINPLUS, /* This first set applies to single characters */
|
||||
OP_QUERY,
|
||||
OP_MINQUERY,
|
||||
OP_UPTO, /* From 0 to n matches */
|
||||
OP_MINUPTO,
|
||||
OP_EXACT, /* Exactly n matches */
|
||||
OP_STAR, /* 20 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 21 all these opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 22 the minimizing one second. */
|
||||
OP_MINPLUS, /* 23 This first set applies to single characters */
|
||||
OP_QUERY, /* 24 */
|
||||
OP_MINQUERY, /* 25 */
|
||||
OP_UPTO, /* 26 From 0 to n matches */
|
||||
OP_MINUPTO, /* 27 */
|
||||
OP_EXACT, /* 28 Exactly n matches */
|
||||
|
||||
OP_NOTSTAR, /* The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* all these opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* the minimizing one second. */
|
||||
OP_NOTMINPLUS, /* This first set applies to "not" single characters */
|
||||
OP_NOTQUERY,
|
||||
OP_NOTMINQUERY,
|
||||
OP_NOTUPTO, /* From 0 to n matches */
|
||||
OP_NOTMINUPTO,
|
||||
OP_NOTEXACT, /* Exactly n matches */
|
||||
OP_NOTSTAR, /* 29 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 30 all these opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 31 the minimizing one second. */
|
||||
OP_NOTMINPLUS, /* 32 This set applies to "not" single characters */
|
||||
OP_NOTQUERY, /* 33 */
|
||||
OP_NOTMINQUERY, /* 34 */
|
||||
OP_NOTUPTO, /* 35 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 36 */
|
||||
OP_NOTEXACT, /* 37 Exactly n matches */
|
||||
|
||||
OP_TYPESTAR, /* The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* all these opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY,
|
||||
OP_TYPEUPTO, /* From 0 to n matches */
|
||||
OP_TYPEMINUPTO,
|
||||
OP_TYPEEXACT, /* Exactly n matches */
|
||||
OP_TYPESTAR, /* 38 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 39 all these opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 40 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 41 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 42 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 43 */
|
||||
OP_TYPEUPTO, /* 44 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 45 */
|
||||
OP_TYPEEXACT, /* 46 Exactly n matches */
|
||||
|
||||
OP_CRSTAR, /* The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* These are for character classes and back refs */
|
||||
OP_CRMINQUERY,
|
||||
OP_CRRANGE, /* These are different to the three seta above. */
|
||||
OP_CRMINRANGE,
|
||||
OP_CRSTAR, /* 47 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 48 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 49 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 50 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 51 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 52 */
|
||||
OP_CRRANGE, /* 53 These are different to the three seta above. */
|
||||
OP_CRMINRANGE, /* 54 */
|
||||
|
||||
OP_CLASS, /* Match a character class */
|
||||
OP_REF, /* Match a back reference */
|
||||
OP_RECURSE, /* Match this pattern recursively */
|
||||
OP_CLASS, /* 55 Match a character class */
|
||||
OP_REF, /* 56 Match a back reference */
|
||||
OP_RECURSE, /* 57 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 58 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* Start of alternation */
|
||||
OP_KET, /* End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* These two must remain together and in this */
|
||||
OP_KETRMIN, /* order. They are for groups the repeat for ever. */
|
||||
OP_ALT, /* 59 Start of alternation */
|
||||
OP_KET, /* 60 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 61 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 62 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before ONCE and COND */
|
||||
|
||||
OP_ASSERT, /* Positive lookahead */
|
||||
OP_ASSERT_NOT, /* Negative lookahead */
|
||||
OP_ASSERTBACK, /* Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* Negative lookbehind */
|
||||
OP_REVERSE, /* Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 63 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 64 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 65 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 66 Negative lookbehind */
|
||||
OP_REVERSE, /* 67 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
||||
a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* Once matched, don't back up into the subpattern */
|
||||
OP_COND, /* Conditional group */
|
||||
OP_CREF, /* Used to hold an extraction string number (cond ref) */
|
||||
OP_ONCE, /* 68 Once matched, don't back up into the subpattern */
|
||||
OP_COND, /* 69 Conditional group */
|
||||
OP_CREF, /* 70 Used to hold an extraction string number (cond ref) */
|
||||
|
||||
OP_BRAZERO, /* These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* order. */
|
||||
OP_BRAZERO, /* 71 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 72 order. */
|
||||
|
||||
OP_BRANUMBER, /* Used for extracting brackets whose number is greater
|
||||
than can fit into an opcode. */
|
||||
OP_BRANUMBER, /* 73 Used for extracting brackets whose number is greater
|
||||
than can fit into an opcode. */
|
||||
|
||||
OP_BRA /* This and greater values are used for brackets that
|
||||
extract substrings up to a basic limit. After that,
|
||||
use is made of OP_BRANUMBER. */
|
||||
OP_BRA /* 74 This and greater values are used for brackets that
|
||||
extract substrings up to a basic limit. After that,
|
||||
use is made of OP_BRANUMBER. */
|
||||
};
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. There are used only
|
||||
for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The
|
||||
macro is referenced only in printint.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \
|
||||
"Opt", "^", "$", "chars", "not", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"class", "Ref", "Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
|
||||
"Brazero", "Braminzero", "Branumber", "Bra"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
regex. The lengths are used when searching for specific things, and also in the
|
||||
debugging printing of a compiled regex. We use a macro so that it can be
|
||||
incorporated both into pcre.c and pcretest.c without being publicly exposed. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Chars - the minimum length */ \
|
||||
2, /* not */ \
|
||||
/* Positive single-char repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* upto, minupto, exact */ \
|
||||
/* Negative single-char repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* NOT upto, minupto, exact */ \
|
||||
/* Positive type repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* Type upto, minupto, exact */ \
|
||||
/* Multi-char class repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
5, 5, /* CRRANGE, CRMINRANGE */ \
|
||||
33, 3, /* CLASS, REF */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
2, /* CALLOUT */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
1+LINK_SIZE, /* KetRmin */ \
|
||||
1+LINK_SIZE, /* Assert */ \
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* Reverse */ \
|
||||
1+LINK_SIZE, /* Once */ \
|
||||
1, /* COND */ \
|
||||
3, /* CREF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
3, /* BRANUMBER */ \
|
||||
1+LINK_SIZE /* BRA */ \
|
||||
|
||||
|
||||
/* The highest extraction number before we have to start using additional
|
||||
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||
@ -283,6 +449,10 @@ opcodes. */
|
||||
|
||||
#define EXTRACT_BASIC_MAX 150
|
||||
|
||||
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
||||
|
||||
#define CREF_RECURSE 0xffff
|
||||
|
||||
/* The texts of compile-time error messages are defined as macros here so that
|
||||
they can be accessed by the POSIX wrapper and converted into error codes. Yes,
|
||||
I could have used error codes in the first place, but didn't feel like changing
|
||||
@ -300,9 +470,9 @@ just to accommodate the POSIX wrapper. */
|
||||
#define ERR10 "operand of unlimited repeat could match the empty string"
|
||||
#define ERR11 "internal error: unexpected repeat"
|
||||
#define ERR12 "unrecognized character after (?"
|
||||
#define ERR13 "unused error"
|
||||
#define ERR13 "POSIX named classes are supported only within a class"
|
||||
#define ERR14 "missing )"
|
||||
#define ERR15 "back reference to non-existent subpattern"
|
||||
#define ERR15 "reference to non-existent subpattern"
|
||||
#define ERR16 "erroffset passed as NULL"
|
||||
#define ERR17 "unknown option bit(s) set"
|
||||
#define ERR18 "missing ) after comment"
|
||||
@ -316,13 +486,21 @@ just to accommodate the POSIX wrapper. */
|
||||
#define ERR26 "malformed number after (?("
|
||||
#define ERR27 "conditional group contains more than two branches"
|
||||
#define ERR28 "assertion expected after (?("
|
||||
#define ERR29 "(?p must be followed by )"
|
||||
#define ERR29 "(?R or (?digits must be followed by )"
|
||||
#define ERR30 "unknown POSIX class name"
|
||||
#define ERR31 "POSIX collating elements are not supported"
|
||||
#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support"
|
||||
#define ERR33 "characters with values > 255 are not yet supported in classes"
|
||||
#define ERR34 "character value in \\x{...} sequence is too large"
|
||||
#define ERR35 "invalid condition (?(0)"
|
||||
#define ERR36 "\\C not allowed in lookbehind assertion"
|
||||
#define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X"
|
||||
#define ERR38 "number after (?C is > 255"
|
||||
#define ERR39 "closing ) for (?C expected"
|
||||
#define ERR40 "recursive call could loop indefinitely"
|
||||
#define ERR41 "unrecognized character after (?P"
|
||||
#define ERR42 "syntax error after (?P"
|
||||
#define ERR43 "two named groups have the same name"
|
||||
|
||||
/* All character handling must be done as unsigned characters. Otherwise there
|
||||
are problems with top-bit-set characters and functions such as isspace().
|
||||
@ -333,19 +511,20 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
/* The real format of the start of the pcre block; the actual code vector
|
||||
runs on as long as necessary after the end. */
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. */
|
||||
|
||||
typedef struct real_pcre {
|
||||
unsigned long int magic_number;
|
||||
size_t size;
|
||||
const unsigned char *tables;
|
||||
size_t size; /* Total that was malloced */
|
||||
const unsigned char *tables; /* Pointer to tables */
|
||||
unsigned long int options;
|
||||
unsigned short int top_bracket;
|
||||
unsigned short int top_backref;
|
||||
uschar first_char;
|
||||
uschar req_char;
|
||||
uschar code[1];
|
||||
unsigned short int first_char;
|
||||
unsigned short int req_char;
|
||||
unsigned short int name_entry_size; /* Size of any name items; 0 => none */
|
||||
unsigned short int name_count; /* Number of name items */
|
||||
} real_pcre;
|
||||
|
||||
/* The real format of the extra block returned by pcre_study(). */
|
||||
@ -364,8 +543,32 @@ typedef struct compile_data {
|
||||
const uschar *fcc; /* Points to case-flipping table */
|
||||
const uschar *cbits; /* Points to character type table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
const uschar *start_code; /* The start of the compiled code */
|
||||
uschar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
} compile_data;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
uschar *current;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern. */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prev; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
const uschar *save_start; /* Old value of md->start_match */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the matching, so that they are thread-safe. */
|
||||
|
||||
@ -382,12 +585,15 @@ typedef struct match_data {
|
||||
BOOL utf8; /* UTF8 flag */
|
||||
BOOL endonly; /* Dollar not before final \n */
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
const uschar *start_pattern; /* For use when recursing */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of the subject string */
|
||||
const uschar *start_match; /* Start of this match attempt */
|
||||
const uschar *end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
int start_offset; /* The start offset value */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
} match_data;
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
@ -82,7 +82,9 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort
|
||||
on exclusive ones - in some locales things may be different. */
|
||||
on exclusive ones - in some locales things may be different. Note that the
|
||||
table for "space" includes everything "isspace" gives, including VT in the
|
||||
default locale. This makes it work for the POSIX class [:space:]. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
@ -112,12 +114,14 @@ for (i = 0; i < 256; i++)
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table */
|
||||
/* Finally, the character type table. In this, we exclude VT from the white
|
||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||
within regexes. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (isspace(i)) x += ctype_space;
|
||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
|
@ -1,59 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
exec_prefix_set=no
|
||||
|
||||
usage="\
|
||||
Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]"
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
|
||||
*) optarg= ;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--prefix=*)
|
||||
prefix=$optarg
|
||||
if test $exec_prefix_set = no ; then
|
||||
exec_prefix=$optarg
|
||||
fi
|
||||
;;
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
--exec-prefix=*)
|
||||
exec_prefix=$optarg
|
||||
exec_prefix_set=yes
|
||||
;;
|
||||
--exec-prefix)
|
||||
echo $exec_prefix
|
||||
;;
|
||||
--version)
|
||||
echo @PCRE_VERSION@
|
||||
;;
|
||||
--cflags | --cflags-posix)
|
||||
if test @includedir@ != /usr/include ; then
|
||||
includes=-I@includedir@
|
||||
fi
|
||||
echo $includes
|
||||
;;
|
||||
--libs-posix)
|
||||
echo -L@libdir@ -lpcreposix -lpcre
|
||||
;;
|
||||
--libs)
|
||||
echo -L@libdir@ -lpcre
|
||||
;;
|
||||
*)
|
||||
echo "${usage}" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,10 @@ pcre_copy_substring
|
||||
pcre_exec
|
||||
pcre_get_substring
|
||||
pcre_get_substring_list
|
||||
pcre_free_substring
|
||||
pcre_free_substring_list
|
||||
pcre_info
|
||||
pcre_fullinfo
|
||||
pcre_maketables
|
||||
pcre_study
|
||||
pcre_version
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* Copyright (c) 1997-2001 University of Cambridge */
|
||||
/* Copyright (c) 1997-2002 University of Cambridge */
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
@ -11,9 +11,10 @@
|
||||
make changes to pcre.in. */
|
||||
|
||||
#include "php_compat.h"
|
||||
|
||||
#define PCRE_MAJOR 3
|
||||
#define PCRE_MINOR 9
|
||||
#define PCRE_DATE 02-Jan-2002
|
||||
#define PCRE_MINOR 92
|
||||
#define PCRE_DATE 11-Sep-2002
|
||||
|
||||
/* Win32 uses DLL by default */
|
||||
|
||||
@ -72,6 +73,9 @@ extern "C" {
|
||||
#define PCRE_INFO_FIRSTCHAR 4
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
|
||||
/* Types */
|
||||
|
||||
@ -81,32 +85,64 @@ struct real_pcre_extra; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
typedef struct real_pcre_extra pcre_extra;
|
||||
|
||||
/* Store get and free functions. These can be set to alternative malloc/free
|
||||
functions if required. Some magic is required for Win32 DLL; it is null on
|
||||
other OS. */
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
const char *subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. There is also an optional
|
||||
callout function that is triggered by the (?) regex item. Some magic is
|
||||
required for Win32 DLL; it is null on other OS. For Virtual Pascal, these have
|
||||
to be different again. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_DL_IMPORT extern void *(*pcre_malloc)(size_t);
|
||||
PCRE_DL_IMPORT extern void (*pcre_free)(void *);
|
||||
PCRE_DL_IMPORT extern int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
extern void *pcre_malloc(size_t);
|
||||
extern void pcre_free(void *);
|
||||
extern int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DL_IMPORT extern pcre *pcre_compile(const char *, int, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DL_IMPORT extern int pcre_copy_substring(const char *, int *, int, int,
|
||||
char *, int);
|
||||
PCRE_DL_IMPORT extern int pcre_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int);
|
||||
PCRE_DL_IMPORT extern void pcre_free_substring(const char *);
|
||||
PCRE_DL_IMPORT extern void pcre_free_substring_list(const char **);
|
||||
PCRE_DL_IMPORT extern int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DL_IMPORT extern int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DL_IMPORT extern int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DL_IMPORT extern int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DL_IMPORT extern const unsigned char *pcre_maketables(void);
|
||||
PCRE_DL_IMPORT extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DL_IMPORT extern const char *pcre_version(void);
|
||||
|
||||
#undef PCRE_DL_IMPORT
|
||||
|
||||
/* Functions */
|
||||
|
||||
extern pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
extern int pcre_copy_substring(const char *, int *, int, int, char *, int);
|
||||
extern int pcre_exec(const pcre *, const pcre_extra *, const char *,
|
||||
int, int, int, int *, int);
|
||||
extern void pcre_free_substring(const char *);
|
||||
extern void pcre_free_substring_list(const char **);
|
||||
extern int pcre_get_substring(const char *, int *, int, int, const char **);
|
||||
extern int pcre_get_substring_list(const char *, int *, int, const char ***);
|
||||
extern int pcre_info(const pcre *, int *, int *);
|
||||
extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *);
|
||||
extern const unsigned char *pcre_maketables(void);
|
||||
extern pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
extern const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -3,7 +3,8 @@
|
||||
*************************************************/
|
||||
|
||||
/* This is a grep program that uses the PCRE regular expression library to do
|
||||
its pattern matching. On a Unix system it can recurse into directories. */
|
||||
its pattern matching. On a Unix or Win32 system it can recurse into
|
||||
directories. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
@ -18,7 +19,7 @@ its pattern matching. On a Unix system it can recurse into directories. */
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
#define VERSION "2.0 01-Aug-2001"
|
||||
#define VERSION "2.2 10-Sep-2002"
|
||||
#define MAX_PATTERN_COUNT 100
|
||||
|
||||
|
||||
@ -70,8 +71,8 @@ static option_item optionlist[] = {
|
||||
*************************************************/
|
||||
|
||||
/* These functions are defined so that they can be made system specific,
|
||||
although at present the only ones are for Unix, and for "no directory recursion
|
||||
support". */
|
||||
although at present the only ones are for Unix, Win32, and for "no directory
|
||||
recursion support". */
|
||||
|
||||
|
||||
/************* Directory scanning in Unix ***********/
|
||||
@ -118,13 +119,105 @@ closedir(dir);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
/************* Directory scanning in Win32 ***********/
|
||||
|
||||
/* I (Philip Hazel) have no means of testing this code. It was contributed by
|
||||
Lionel Fourquaux. */
|
||||
|
||||
|
||||
#elif HAVE_WIN32API
|
||||
|
||||
#ifndef STRICT
|
||||
# define STRICT
|
||||
#endif
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
|
||||
typedef struct directory_type
|
||||
{
|
||||
HANDLE handle;
|
||||
BOOL first;
|
||||
WIN32_FIND_DATA data;
|
||||
} directory_type;
|
||||
|
||||
int
|
||||
isdirectory(char *filename)
|
||||
{
|
||||
DWORD attr = GetFileAttributes(filename);
|
||||
if (attr == INVALID_FILE_ATTRIBUTES)
|
||||
return 0;
|
||||
return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
|
||||
}
|
||||
|
||||
directory_type *
|
||||
opendirectory(char *filename)
|
||||
{
|
||||
size_t len;
|
||||
char *pattern;
|
||||
directory_type *dir;
|
||||
DWORD err;
|
||||
len = strlen(filename);
|
||||
pattern = (char *) malloc(len + 3);
|
||||
dir = (directory_type *) malloc(sizeof(*dir));
|
||||
if ((pattern == NULL) || (dir == NULL))
|
||||
{
|
||||
fprintf(stderr, "pcregrep: malloc failed\n");
|
||||
exit(2);
|
||||
}
|
||||
memcpy(pattern, filename, len);
|
||||
memcpy(&(pattern[len]), "\\*", 3);
|
||||
dir->handle = FindFirstFile(pattern, &(dir->data));
|
||||
if (dir->handle != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
free(pattern);
|
||||
dir->first = TRUE;
|
||||
return dir;
|
||||
}
|
||||
err = GetLastError();
|
||||
free(pattern);
|
||||
free(dir);
|
||||
errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *
|
||||
readdirectory(directory_type *dir)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (!dir->first)
|
||||
{
|
||||
if (!FindNextFile(dir->handle, &(dir->data)))
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
dir->first = FALSE;
|
||||
}
|
||||
if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
|
||||
return dir->data.cFileName;
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
return NULL; /* Keep compiler happy; never executed */
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
closedirectory(directory_type *dir)
|
||||
{
|
||||
FindClose(dir->handle);
|
||||
free(dir);
|
||||
}
|
||||
|
||||
|
||||
/************* Directory scanning when we can't do it ***********/
|
||||
|
||||
/* The type is void, and apart from isdirectory(), the functions do nothing. */
|
||||
|
||||
#else
|
||||
|
||||
typedef void directory_type;
|
||||
|
||||
int isdirectory(char *filename) { return FALSE; }
|
||||
@ -262,8 +355,9 @@ if ((sep = isdirectory(filename)) != 0 && recurse)
|
||||
}
|
||||
|
||||
/* If the file is not a directory, or we are not recursing, scan it. If this is
|
||||
the first and only argument at top level, we don't show the file name.
|
||||
Otherwise, control is via the show_filenames variable. */
|
||||
the first and only argument at top level, we don't show the file name (unless
|
||||
we are only showing the file name). Otherwise, control is via the
|
||||
show_filenames variable. */
|
||||
|
||||
in = fopen(filename, "r");
|
||||
if (in == NULL)
|
||||
@ -272,7 +366,8 @@ if (in == NULL)
|
||||
return 2;
|
||||
}
|
||||
|
||||
rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);
|
||||
rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
|
||||
filename : NULL);
|
||||
fclose(in);
|
||||
return rc;
|
||||
}
|
||||
@ -287,7 +382,7 @@ return rc;
|
||||
static int
|
||||
usage(int rc)
|
||||
{
|
||||
fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");
|
||||
fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
|
||||
fprintf(stderr, "Type `pcregrep --help' for more information.\n");
|
||||
return rc;
|
||||
}
|
||||
@ -304,8 +399,9 @@ help(void)
|
||||
{
|
||||
option_item *op;
|
||||
|
||||
printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");
|
||||
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
|
||||
printf("Search for PATTERN in each FILE or standard input.\n");
|
||||
printf("PATTERN must be present if -f is not used.\n");
|
||||
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
|
||||
|
||||
printf("Options:\n");
|
||||
@ -390,6 +486,10 @@ for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (argv[i][0] != '-') break;
|
||||
|
||||
/* Missing options */
|
||||
|
||||
if (argv[i][1] == 0) exit(usage(2));
|
||||
|
||||
/* Long name options */
|
||||
|
||||
if (argv[i][1] == '-')
|
||||
@ -492,7 +592,7 @@ if (pattern_filename != NULL)
|
||||
|
||||
else
|
||||
{
|
||||
if (i >= argc) return usage(0);
|
||||
if (i >= argc) return usage(2);
|
||||
pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
|
||||
if (pattern_list[0] == NULL)
|
||||
{
|
||||
|
@ -12,7 +12,7 @@ functions.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2001 University of Cambridge
|
||||
Copyright (c) 1997-2002 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
@ -47,7 +47,8 @@ static const char *estring[] = {
|
||||
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
|
||||
ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
|
||||
ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
|
||||
ERR31 };
|
||||
ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
|
||||
ERR41, ERR42, ERR43 };
|
||||
|
||||
static int eint[] = {
|
||||
REG_EESCAPE, /* "\\ at end of pattern" */
|
||||
@ -62,9 +63,9 @@ static int eint[] = {
|
||||
REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
|
||||
REG_ASSERT, /* "internal error: unexpected repeat" */
|
||||
REG_BADPAT, /* "unrecognized character after (?" */
|
||||
REG_ASSERT, /* "unused error" */
|
||||
REG_BADPAT, /* "POSIX named classes are supported only within a class" */
|
||||
REG_EPAREN, /* "missing )" */
|
||||
REG_ESUBREG, /* "back reference to non-existent subpattern" */
|
||||
REG_ESUBREG, /* "reference to non-existent subpattern" */
|
||||
REG_INVARG, /* "erroffset passed as NULL" */
|
||||
REG_INVARG, /* "unknown option bit(s) set" */
|
||||
REG_EPAREN, /* "missing ) after comment" */
|
||||
@ -78,13 +79,21 @@ static int eint[] = {
|
||||
REG_BADPAT, /* "malformed number after (?(" */
|
||||
REG_BADPAT, /* "conditional group containe more than two branches" */
|
||||
REG_BADPAT, /* "assertion expected after (?(" */
|
||||
REG_BADPAT, /* "(?p must be followed by )" */
|
||||
REG_BADPAT, /* "(?R or (?digits must be followed by )" */
|
||||
REG_ECTYPE, /* "unknown POSIX class name" */
|
||||
REG_BADPAT, /* "POSIX collating elements are not supported" */
|
||||
REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
|
||||
REG_BADPAT, /* "characters with values > 255 are not yet supported in classes" */
|
||||
REG_BADPAT, /* "character value in \x{...} sequence is too large" */
|
||||
REG_BADPAT /* "invalid condition (?(0)" */
|
||||
REG_BADPAT, /* "invalid condition (?(0)" */
|
||||
REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
|
||||
REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
|
||||
REG_BADPAT, /* "number after (?C is > 255" */
|
||||
REG_BADPAT, /* "closing ) for (?C expected" */
|
||||
REG_BADPAT, /* "recursive call could loop indefinitely" */
|
||||
REG_BADPAT, /* "unrecognized character after (?P" */
|
||||
REG_BADPAT, /* "syntax error after (?P" */
|
||||
REG_BADPAT /* "two named groups have the same name" */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
@ -222,7 +231,9 @@ return 0;
|
||||
/* Unfortunately, PCRE requires 3 ints of working space for each captured
|
||||
substring, so we have to get and release working store instead of just using
|
||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
||||
ints. */
|
||||
ints. However, if the number of possible capturing brackets is small, use a
|
||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
||||
in a macro that can be changed at configure time. */
|
||||
|
||||
int
|
||||
regexec(regex_t *preg, const char *string, size_t nmatch,
|
||||
@ -231,6 +242,8 @@ regexec(regex_t *preg, const char *string, size_t nmatch,
|
||||
int rc;
|
||||
int options = 0;
|
||||
int *ovector = NULL;
|
||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
||||
BOOL allocated_ovector = FALSE;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
||||
@ -239,8 +252,16 @@ preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
|
||||
if (nmatch > 0)
|
||||
{
|
||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
||||
if (ovector == NULL) return REG_ESPACE;
|
||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
||||
{
|
||||
ovector = &(small_ovector[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
||||
if (ovector == NULL) return REG_ESPACE;
|
||||
allocated_ovector = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
|
||||
@ -251,19 +272,19 @@ if (rc == 0) rc = nmatch; /* All captured slots were filled in */
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < rc; i++)
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = ovector[i*2];
|
||||
pmatch[i].rm_eo = ovector[i*2+1];
|
||||
}
|
||||
if (ovector != NULL) free(ovector);
|
||||
if (allocated_ovector) free(ovector);
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ovector != NULL) free(ovector);
|
||||
if (allocated_ovector) free(ovector);
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
|
@ -2,6 +2,10 @@
|
||||
* PCRE testing program *
|
||||
*************************************************/
|
||||
|
||||
/* This program was hacked up as a tester for PCRE. I really should have
|
||||
written it more tidily in the first place. Will I ever learn? It has grown and
|
||||
been extended and consequently is now rather untidy in places. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@ -9,7 +13,8 @@
|
||||
#include <time.h>
|
||||
#include <locale.h>
|
||||
|
||||
/* Use the internal info for displaying the results of pcre_study(). */
|
||||
/* We need the internal info for displaying the results of pcre_study(). Also
|
||||
for getting the opcodes for showing compiled code. */
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -29,11 +34,17 @@ Makefile. */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define LOOPREPEAT 20000
|
||||
#define LOOPREPEAT 50000
|
||||
|
||||
|
||||
static FILE *outfile;
|
||||
static int log_store = 0;
|
||||
static int callout_count;
|
||||
static int callout_extra;
|
||||
static int callout_fail_count;
|
||||
static int callout_fail_id;
|
||||
static int first_callout;
|
||||
static int utf8;
|
||||
static size_t gotten_store;
|
||||
|
||||
|
||||
@ -48,6 +59,49 @@ static int utf8_table3[] = {
|
||||
0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* The code for doing this is held in a separate file that is also included in
|
||||
pcre.c when it is compiled with the debug switch. It defines a function called
|
||||
print_internals(), which uses a table of opcode lengths defined by the macro
|
||||
OP_LENGTHS, whose name must be OP_lengths. */
|
||||
|
||||
static uschar OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
#include "printint.c"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Read number from string *
|
||||
*************************************************/
|
||||
|
||||
/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
|
||||
around with conditional compilation, just do the job by hand. It is only used
|
||||
for unpicking the -o argument, so just keep it simple.
|
||||
|
||||
Arguments:
|
||||
str string to be converted
|
||||
endptr where to put the end pointer
|
||||
|
||||
Returns: the unsigned long
|
||||
*/
|
||||
|
||||
static int
|
||||
get_value(unsigned char *str, unsigned char **endptr)
|
||||
{
|
||||
int result = 0;
|
||||
while(*str != 0 && isspace(*str)) str++;
|
||||
while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
|
||||
*endptr = str;
|
||||
return(result);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
@ -143,271 +197,152 @@ return i+1;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print character string *
|
||||
*************************************************/
|
||||
|
||||
/* Character string printing function. Must handle UTF-8 strings in utf8
|
||||
mode. Yields number of characters printed. If handed a NULL file, just counts
|
||||
chars without printing. */
|
||||
|
||||
|
||||
/* Debugging function to print the internal form of the regex. This is the same
|
||||
code as contained in pcre.c under the DEBUG macro. */
|
||||
|
||||
static const char *OP_names[] = {
|
||||
"End", "\\A", "\\B", "\\b", "\\D", "\\d",
|
||||
"\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
|
||||
"Opt", "^", "$", "Any", "chars", "not",
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{",
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{",
|
||||
"class", "Ref", "Recurse",
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
|
||||
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
|
||||
"Brazero", "Braminzero", "Branumber", "Bra"
|
||||
};
|
||||
|
||||
|
||||
static void print_internals(pcre *re)
|
||||
{
|
||||
unsigned char *code = ((real_pcre *)re)->code;
|
||||
|
||||
fprintf(outfile, "------------------------------------------------------------------\n");
|
||||
|
||||
for(;;)
|
||||
{
|
||||
int c;
|
||||
int charlength;
|
||||
|
||||
fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));
|
||||
|
||||
if (*code >= OP_BRA)
|
||||
{
|
||||
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
|
||||
fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);
|
||||
else
|
||||
fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
|
||||
code += 2;
|
||||
}
|
||||
|
||||
else switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(outfile, " %s\n", OP_names[*code]);
|
||||
fprintf(outfile, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_OPT:
|
||||
fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);
|
||||
code++;
|
||||
break;
|
||||
|
||||
case OP_CHARS:
|
||||
charlength = *(++code);
|
||||
fprintf(outfile, "%3d ", charlength);
|
||||
while (charlength-- > 0)
|
||||
if (isprint(c = *(++code))) fprintf(outfile, "%c", c);
|
||||
else fprintf(outfile, "\\x%02x", c);
|
||||
break;
|
||||
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_BRANUMBER:
|
||||
case OP_REVERSE:
|
||||
case OP_CREF:
|
||||
fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
if (*code >= OP_TYPESTAR)
|
||||
fprintf(outfile, " %s", OP_names[code[1]]);
|
||||
else if (isprint(c = code[1])) fprintf(outfile, " %c", c);
|
||||
else fprintf(outfile, " \\x%02x", c);
|
||||
fprintf(outfile, "%s", OP_names[*code++]);
|
||||
break;
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
if (isprint(c = code[3])) fprintf(outfile, " %c{", c);
|
||||
else fprintf(outfile, " \\x%02x{", c);
|
||||
if (*code != OP_EXACT) fprintf(outfile, ",");
|
||||
fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
|
||||
if (*code == OP_MINUPTO) fprintf(outfile, "?");
|
||||
code += 3;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
fprintf(outfile, " %s{", OP_names[code[3]]);
|
||||
if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");
|
||||
fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");
|
||||
code += 3;
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c);
|
||||
else fprintf(outfile, " [^\\x%02x]", c);
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c);
|
||||
else fprintf(outfile, " [^\\x%02x]", c);
|
||||
fprintf(outfile, "%s", OP_names[*code++]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c);
|
||||
else fprintf(outfile, " [^\\x%02x]{", c);
|
||||
if (*code != OP_NOTEXACT) fprintf(outfile, ",");
|
||||
fprintf(outfile, "%d}", (code[1] << 8) + code[2]);
|
||||
if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");
|
||||
code += 3;
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
fprintf(outfile, " \\%d", (code[1] << 8) | code[2]);
|
||||
code += 3;
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CLASS:
|
||||
{
|
||||
int i, min, max;
|
||||
code++;
|
||||
fprintf(outfile, " [");
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((code[i/8] & (1 << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((code[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(outfile, "\\");
|
||||
if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
fprintf(outfile, "-");
|
||||
if (j == '-' || j == ']') fprintf(outfile, "\\");
|
||||
if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
fprintf(outfile, "]");
|
||||
code += 32;
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
fprintf(outfile, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
min = (code[1] << 8) + code[2];
|
||||
max = (code[3] << 8) + code[4];
|
||||
if (max == 0) fprintf(outfile, "{%d,}", min);
|
||||
else fprintf(outfile, "{%d,%d}", min, max);
|
||||
if (*code == OP_CRMINRANGE) fprintf(outfile, "?");
|
||||
code += 4;
|
||||
break;
|
||||
|
||||
default:
|
||||
code--;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Anything else is just a one-node item */
|
||||
|
||||
default:
|
||||
fprintf(outfile, " %s", OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code++;
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Character string printing function. A "normal" and a UTF-8 version. */
|
||||
|
||||
static void pchars(unsigned char *p, int length, int utf8)
|
||||
static int pchars(unsigned char *p, int length, FILE *f)
|
||||
{
|
||||
int c;
|
||||
int yield = 0;
|
||||
|
||||
while (length-- > 0)
|
||||
{
|
||||
if (utf8)
|
||||
{
|
||||
int rc = utf82ord(p, &c);
|
||||
if (rc > 0)
|
||||
|
||||
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
|
||||
{
|
||||
length -= rc - 1;
|
||||
p += rc;
|
||||
if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
|
||||
else fprintf(outfile, "\\x{%02x}", c);
|
||||
if (c < 256 && isprint(c))
|
||||
{
|
||||
if (f != NULL) fprintf(f, "%c", c);
|
||||
yield++;
|
||||
}
|
||||
else
|
||||
{
|
||||
int n;
|
||||
if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
|
||||
yield += n;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not UTF-8, or malformed UTF-8 */
|
||||
|
||||
if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
|
||||
else fprintf(outfile, "\\x%02x", c);
|
||||
if (isprint(c = *(p++)))
|
||||
{
|
||||
if (f != NULL) fprintf(f, "%c", c);
|
||||
yield++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (f != NULL) fprintf(f, "\\x%02x", c);
|
||||
yield += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout function *
|
||||
*************************************************/
|
||||
|
||||
/* Called from PCRE as a result of the (?C) item. We print out where we are in
|
||||
the match. Yield OK unless more callouts than the fail count. . */
|
||||
|
||||
static int callout(pcre_callout_block *cb)
|
||||
{
|
||||
FILE *f = (first_callout | callout_extra)? outfile : NULL;
|
||||
int i, pre_start, post_start;
|
||||
|
||||
if (callout_extra)
|
||||
{
|
||||
int i;
|
||||
fprintf(f, "Callout %d: last capture = %d\n",
|
||||
cb->callout_number, cb->capture_last);
|
||||
|
||||
for (i = 0; i < cb->capture_top * 2; i += 2)
|
||||
{
|
||||
if (cb->offset_vector[i] < 0)
|
||||
fprintf(f, "%2d: <unset>\n", i/2);
|
||||
else
|
||||
{
|
||||
fprintf(f, "%2d: ", i/2);
|
||||
(void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
|
||||
cb->offset_vector[i+1] - cb->offset_vector[i], f);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Re-print the subject in canonical form, the first time or if giving full
|
||||
datails. On subsequent calls in the same match, we use pchars just to find the
|
||||
printed lengths of the substrings. */
|
||||
|
||||
if (f != NULL) fprintf(f, "--->");
|
||||
|
||||
pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
|
||||
post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
|
||||
cb->current_position - cb->start_match, f);
|
||||
|
||||
(void)pchars((unsigned char *)(cb->subject + cb->current_position),
|
||||
cb->subject_length - cb->current_position, f);
|
||||
|
||||
if (f != NULL) fprintf(f, "\n");
|
||||
|
||||
/* Always print appropriate indicators, with callout number if not already
|
||||
shown */
|
||||
|
||||
if (callout_extra) fprintf(outfile, " ");
|
||||
else fprintf(outfile, "%3d ", cb->callout_number);
|
||||
|
||||
for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
|
||||
fprintf(outfile, "^");
|
||||
|
||||
if (post_start > 0)
|
||||
{
|
||||
for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
|
||||
fprintf(outfile, "^");
|
||||
}
|
||||
|
||||
fprintf(outfile, "\n");
|
||||
|
||||
first_callout = 0;
|
||||
|
||||
return (cb->callout_number != callout_fail_id)? 0 :
|
||||
(++callout_count >= callout_fail_count)? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Local malloc function *
|
||||
*************************************************/
|
||||
|
||||
/* Alternative malloc function, to test functionality and show the size of the
|
||||
compiled re. */
|
||||
|
||||
static void *new_malloc(size_t size)
|
||||
{
|
||||
gotten_store = size;
|
||||
if (log_store)
|
||||
fprintf(outfile, "Memory allocation (code space): %d\n",
|
||||
(int)((int)size - offsetof(real_pcre, code[0])));
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Call pcre_fullinfo() *
|
||||
*************************************************/
|
||||
|
||||
/* Get one piece of information from the pcre_fullinfo() function */
|
||||
|
||||
@ -420,6 +355,9 @@ if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Main Program *
|
||||
*************************************************/
|
||||
|
||||
/* Read lines from named file or stdin and write to named file or stdout; lines
|
||||
consist of a regular expression, in delimiters and optionally followed by
|
||||
@ -453,7 +391,7 @@ outfile = stdout;
|
||||
|
||||
while (argc > 1 && argv[op][0] == '-')
|
||||
{
|
||||
char *endptr;
|
||||
unsigned char *endptr;
|
||||
|
||||
if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
|
||||
showstore = 1;
|
||||
@ -461,7 +399,7 @@ while (argc > 1 && argv[op][0] == '-')
|
||||
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
|
||||
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
|
||||
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
|
||||
((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
|
||||
((size_offsets = get_value(argv[op+1], &endptr)), *endptr == 0))
|
||||
{
|
||||
op++;
|
||||
argc--;
|
||||
@ -549,12 +487,14 @@ while (!done)
|
||||
int do_g = 0;
|
||||
int do_showinfo = showinfo;
|
||||
int do_showrest = 0;
|
||||
int utf8 = 0;
|
||||
int erroroffset, len, delimiter;
|
||||
|
||||
utf8 = 0;
|
||||
|
||||
if (infile == stdin) printf(" re> ");
|
||||
if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;
|
||||
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
|
||||
fflush(outfile);
|
||||
|
||||
p = buffer;
|
||||
while (isspace(*p)) p++;
|
||||
@ -705,8 +645,8 @@ while (!done)
|
||||
}
|
||||
time_taken = clock() - start_time;
|
||||
fprintf(outfile, "Compile time %.3f milliseconds\n",
|
||||
((double)time_taken * 1000.0) /
|
||||
((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
|
||||
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
|
||||
(double)CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
|
||||
@ -740,14 +680,26 @@ while (!done)
|
||||
info-returning functions. The old one has a limited interface and
|
||||
returns only limited data. Check that it agrees with the newer one. */
|
||||
|
||||
if (log_store)
|
||||
fprintf(outfile, "Memory allocation (code space): %d\n",
|
||||
(int)(gotten_store -
|
||||
sizeof(real_pcre) -
|
||||
((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
|
||||
|
||||
if (do_showinfo)
|
||||
{
|
||||
unsigned long int get_options;
|
||||
int old_first_char, old_options, old_count;
|
||||
int count, backrefmax, first_char, need_char;
|
||||
int nameentrysize, namecount;
|
||||
const uschar *nametable;
|
||||
size_t size;
|
||||
|
||||
if (do_debug) print_internals(re);
|
||||
if (do_debug)
|
||||
{
|
||||
fprintf(outfile, "------------------------------------------------------------------\n");
|
||||
print_internals(re, outfile);
|
||||
}
|
||||
|
||||
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
|
||||
new_info(re, NULL, PCRE_INFO_SIZE, &size);
|
||||
@ -755,6 +707,9 @@ while (!done)
|
||||
new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
|
||||
new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
|
||||
new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
|
||||
new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
|
||||
new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
|
||||
new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);
|
||||
|
||||
old_count = pcre_info(re, &old_options, &old_first_char);
|
||||
if (count < 0) fprintf(outfile,
|
||||
@ -781,6 +736,19 @@ while (!done)
|
||||
fprintf(outfile, "Capturing subpattern count = %d\n", count);
|
||||
if (backrefmax > 0)
|
||||
fprintf(outfile, "Max back reference = %d\n", backrefmax);
|
||||
|
||||
if (namecount > 0)
|
||||
{
|
||||
fprintf(outfile, "Named capturing subpatterns:\n");
|
||||
while (namecount-- > 0)
|
||||
{
|
||||
fprintf(outfile, " %s %*s%3d\n", nametable + 2,
|
||||
nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
|
||||
GET2(nametable, 0));
|
||||
nametable += nameentrysize;
|
||||
}
|
||||
}
|
||||
|
||||
if (get_options == 0) fprintf(outfile, "No options\n");
|
||||
else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
|
||||
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
|
||||
@ -806,10 +774,13 @@ while (!done)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isprint(first_char))
|
||||
fprintf(outfile, "First char = \'%c\'\n", first_char);
|
||||
int ch = first_char & 255;
|
||||
char *caseless = ((first_char & REQ_CASELESS) == 0)?
|
||||
"" : " (caseless)";
|
||||
if (isprint(ch))
|
||||
fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
|
||||
else
|
||||
fprintf(outfile, "First char = %d\n", first_char);
|
||||
fprintf(outfile, "First char = %d%s\n", ch, caseless);
|
||||
}
|
||||
|
||||
if (need_char < 0)
|
||||
@ -818,10 +789,13 @@ while (!done)
|
||||
}
|
||||
else
|
||||
{
|
||||
int ch = need_char & 255;
|
||||
char *caseless = ((need_char & REQ_CASELESS) == 0)?
|
||||
"" : " (caseless)";
|
||||
if (isprint(need_char))
|
||||
fprintf(outfile, "Need char = \'%c\'\n", need_char);
|
||||
fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
|
||||
else
|
||||
fprintf(outfile, "Need char = %d\n", need_char);
|
||||
fprintf(outfile, "Need char = %d%s\n", ch, caseless);
|
||||
}
|
||||
}
|
||||
|
||||
@ -840,8 +814,8 @@ while (!done)
|
||||
time_taken = clock() - start_time;
|
||||
if (extra != NULL) free(extra);
|
||||
fprintf(outfile, " Study time %.3f milliseconds\n",
|
||||
((double)time_taken * 1000.0)/
|
||||
((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
|
||||
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
|
||||
(double)CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
extra = pcre_study(re, study_options, &error);
|
||||
@ -906,6 +880,13 @@ while (!done)
|
||||
|
||||
options = 0;
|
||||
|
||||
pcre_callout = callout;
|
||||
first_callout = 1;
|
||||
callout_extra = 0;
|
||||
callout_count = 0;
|
||||
callout_fail_count = 999999;
|
||||
callout_fail_id = -1;
|
||||
|
||||
if (infile == stdin) printf("data> ");
|
||||
if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
|
||||
{
|
||||
@ -927,6 +908,7 @@ while (!done)
|
||||
{
|
||||
int i = 0;
|
||||
int n = 0;
|
||||
|
||||
if (c == '\\') switch ((c = *p++))
|
||||
{
|
||||
case 'a': c = 7; break;
|
||||
@ -991,8 +973,35 @@ while (!done)
|
||||
continue;
|
||||
|
||||
case 'C':
|
||||
while(isdigit(*p)) n = n * 10 + *p++ - '0';
|
||||
copystrings |= 1 << n;
|
||||
if (isdigit(*p)) /* Set copy string */
|
||||
{
|
||||
while(isdigit(*p)) n = n * 10 + *p++ - '0';
|
||||
copystrings |= 1 << n;
|
||||
}
|
||||
else if (*p == '+')
|
||||
{
|
||||
callout_extra = 1;
|
||||
p++;
|
||||
}
|
||||
else if (*p == '-')
|
||||
{
|
||||
pcre_callout = NULL;
|
||||
p++;
|
||||
}
|
||||
else if (*p == '!')
|
||||
{
|
||||
callout_fail_id = 0;
|
||||
p++;
|
||||
while(isdigit(*p))
|
||||
callout_fail_id = callout_fail_id * 10 + *p++ - '0';
|
||||
callout_fail_count = 0;
|
||||
if (*p == '!')
|
||||
{
|
||||
p++;
|
||||
while(isdigit(*p))
|
||||
callout_fail_count = callout_fail_count * 10 + *p++ - '0';
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
||||
case 'G':
|
||||
@ -1023,7 +1032,7 @@ while (!done)
|
||||
}
|
||||
}
|
||||
use_size_offsets = n;
|
||||
if (n == 0) use_offsets = NULL;
|
||||
if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
|
||||
continue;
|
||||
|
||||
case 'Z':
|
||||
@ -1057,18 +1066,19 @@ while (!done)
|
||||
else
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < use_size_offsets; i++)
|
||||
for (i = 0; i < (size_t)use_size_offsets; i++)
|
||||
{
|
||||
if (pmatch[i].rm_so >= 0)
|
||||
{
|
||||
fprintf(outfile, "%2d: ", (int)i);
|
||||
pchars(dbuffer + pmatch[i].rm_so,
|
||||
pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
|
||||
(void)pchars(dbuffer + pmatch[i].rm_so,
|
||||
pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
|
||||
fprintf(outfile, "\n");
|
||||
if (i == 0 && do_showrest)
|
||||
{
|
||||
fprintf(outfile, " 0+ ");
|
||||
pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
|
||||
(void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
|
||||
outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
}
|
||||
@ -1094,8 +1104,8 @@ while (!done)
|
||||
start_offset, options | g_notempty, use_offsets, use_size_offsets);
|
||||
time_taken = clock() - start_time;
|
||||
fprintf(outfile, "Execute time %.3f milliseconds\n",
|
||||
((double)time_taken * 1000.0)/
|
||||
((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
|
||||
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
|
||||
(double)CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
count = pcre_exec(re, extra, (char *)bptr, len,
|
||||
@ -1119,14 +1129,16 @@ while (!done)
|
||||
else
|
||||
{
|
||||
fprintf(outfile, "%2d: ", i/2);
|
||||
pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);
|
||||
(void)pchars(bptr + use_offsets[i],
|
||||
use_offsets[i+1] - use_offsets[i], outfile);
|
||||
fprintf(outfile, "\n");
|
||||
if (i == 0)
|
||||
{
|
||||
if (do_showrest)
|
||||
{
|
||||
fprintf(outfile, " 0+ ");
|
||||
pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);
|
||||
(void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
|
||||
outfile);
|
||||
fprintf(outfile, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -1,169 +0,0 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# Program for testing regular expressions with perl to check that PCRE handles
|
||||
# them the same.
|
||||
|
||||
|
||||
# Function for turning a string into a string of printing chars
|
||||
|
||||
sub pchars {
|
||||
my($t) = "";
|
||||
|
||||
foreach $c (split(//, $_[0]))
|
||||
{
|
||||
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
|
||||
else { $t .= sprintf("\\x%02x", ord $c); }
|
||||
}
|
||||
$t;
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Read lines from named file or stdin and write to named file or stdout; lines
|
||||
# consist of a regular expression, in delimiters and optionally followed by
|
||||
# options, followed by a set of test data, terminated by an empty line.
|
||||
|
||||
# Sort out the input and output files
|
||||
|
||||
if (@ARGV > 0)
|
||||
{
|
||||
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
|
||||
$infile = "INFILE";
|
||||
}
|
||||
else { $infile = "STDIN"; }
|
||||
|
||||
if (@ARGV > 1)
|
||||
{
|
||||
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
|
||||
$outfile = "OUTFILE";
|
||||
}
|
||||
else { $outfile = "STDOUT"; }
|
||||
|
||||
printf($outfile "Perl $] Regular Expressions\n\n");
|
||||
|
||||
# Main loop
|
||||
|
||||
NEXT_RE:
|
||||
for (;;)
|
||||
{
|
||||
printf " re> " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
next if ($_ eq "");
|
||||
|
||||
$pattern = $_;
|
||||
|
||||
while ($pattern !~ /^\s*(.).*\1/s)
|
||||
{
|
||||
printf " > " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
$pattern .= $_;
|
||||
}
|
||||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
|
||||
# The private /+ modifier means "print $' afterwards". We use it
|
||||
# only on the end of patterns to make it easy to chop off here.
|
||||
|
||||
$showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
|
||||
|
||||
# Check that the pattern is valid
|
||||
|
||||
eval "\$_ =~ ${pattern}";
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@";
|
||||
next NEXT_RE;
|
||||
}
|
||||
|
||||
# If the /g modifier is present, we want to put a loop round the matching;
|
||||
# otherwise just a single "if".
|
||||
|
||||
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
|
||||
|
||||
# If the pattern is actually the null string, Perl uses the most recently
|
||||
# executed (and successfully compiled) regex is used instead. This is a
|
||||
# nasty trap for the unwary! The PCRE test suite does contain null strings
|
||||
# in places - if they are allowed through here all sorts of weird and
|
||||
# unexpected effects happen. To avoid this, we replace such patterns with
|
||||
# a non-null pattern that has the same effect.
|
||||
|
||||
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
|
||||
|
||||
# Read data lines and test them
|
||||
|
||||
for (;;)
|
||||
{
|
||||
printf "data> " if $infile eq "STDIN";
|
||||
last NEXT_RE if ! ($_ = <$infile>);
|
||||
chomp;
|
||||
printf $outfile "$_\n" if $infile ne "STDIN";
|
||||
|
||||
s/\s+$//;
|
||||
s/^\s+//;
|
||||
|
||||
last if ($_ eq "");
|
||||
|
||||
$x = eval "\"$_\""; # To get escapes processed
|
||||
|
||||
# Empty array for holding results, then do the matching.
|
||||
|
||||
@subs = ();
|
||||
|
||||
eval "${cmd} (\$x =~ ${pattern}) {" .
|
||||
"push \@subs,\$&;" .
|
||||
"push \@subs,\$1;" .
|
||||
"push \@subs,\$2;" .
|
||||
"push \@subs,\$3;" .
|
||||
"push \@subs,\$4;" .
|
||||
"push \@subs,\$5;" .
|
||||
"push \@subs,\$6;" .
|
||||
"push \@subs,\$7;" .
|
||||
"push \@subs,\$8;" .
|
||||
"push \@subs,\$9;" .
|
||||
"push \@subs,\$10;" .
|
||||
"push \@subs,\$11;" .
|
||||
"push \@subs,\$12;" .
|
||||
"push \@subs,\$13;" .
|
||||
"push \@subs,\$14;" .
|
||||
"push \@subs,\$15;" .
|
||||
"push \@subs,\$16;" .
|
||||
"push \@subs,\$'; }";
|
||||
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@\n";
|
||||
next NEXT_RE;
|
||||
}
|
||||
elsif (scalar(@subs) == 0)
|
||||
{
|
||||
printf $outfile "No match\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
while (scalar(@subs) != 0)
|
||||
{
|
||||
printf $outfile (" 0: %s\n", &pchars($subs[0]));
|
||||
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
|
||||
$last_printed = 0;
|
||||
for ($i = 1; $i <= 16; $i++)
|
||||
{
|
||||
if (defined $subs[$i])
|
||||
{
|
||||
while ($last_printed++ < $i-1)
|
||||
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
|
||||
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
|
||||
$last_printed = $i;
|
||||
}
|
||||
}
|
||||
splice(@subs, 0, 18);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf $outfile "\n";
|
||||
|
||||
# End
|
@ -1,208 +0,0 @@
|
||||
#! /usr/bin/perl
|
||||
|
||||
# Program for testing regular expressions with perl to check that PCRE handles
|
||||
# them the same. This is the version that supports /8 for UTF-8 testing. It
|
||||
# requires at least Perl 5.6.
|
||||
|
||||
|
||||
# Function for turning a string into a string of printing chars. There are
|
||||
# currently problems with UTF-8 strings; this fudges round them.
|
||||
|
||||
sub pchars {
|
||||
my($t) = "";
|
||||
|
||||
if ($utf8)
|
||||
{
|
||||
use utf8;
|
||||
@p = unpack('U*', $_[0]);
|
||||
foreach $c (@p)
|
||||
{
|
||||
if ($c >= 32 && $c < 127) { $t .= chr $c; }
|
||||
else { $t .= sprintf("\\x{%02x}", $c); }
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
foreach $c (split(//, $_[0]))
|
||||
{
|
||||
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
|
||||
else { $t .= sprintf("\\x%02x", ord $c); }
|
||||
}
|
||||
}
|
||||
|
||||
$t;
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Read lines from named file or stdin and write to named file or stdout; lines
|
||||
# consist of a regular expression, in delimiters and optionally followed by
|
||||
# options, followed by a set of test data, terminated by an empty line.
|
||||
|
||||
# Sort out the input and output files
|
||||
|
||||
if (@ARGV > 0)
|
||||
{
|
||||
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
|
||||
$infile = "INFILE";
|
||||
}
|
||||
else { $infile = "STDIN"; }
|
||||
|
||||
if (@ARGV > 1)
|
||||
{
|
||||
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
|
||||
$outfile = "OUTFILE";
|
||||
}
|
||||
else { $outfile = "STDOUT"; }
|
||||
|
||||
printf($outfile "Perl $] Regular Expressions\n\n");
|
||||
|
||||
# Main loop
|
||||
|
||||
NEXT_RE:
|
||||
for (;;)
|
||||
{
|
||||
printf " re> " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
next if ($_ eq "");
|
||||
|
||||
$pattern = $_;
|
||||
|
||||
while ($pattern !~ /^\s*(.).*\1/s)
|
||||
{
|
||||
printf " > " if $infile eq "STDIN";
|
||||
last if ! ($_ = <$infile>);
|
||||
printf $outfile "$_" if $infile ne "STDIN";
|
||||
$pattern .= $_;
|
||||
}
|
||||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
|
||||
# The private /+ modifier means "print $' afterwards".
|
||||
|
||||
$showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
|
||||
|
||||
# The private /8 modifier means "operate in UTF-8". Currently, Perl
|
||||
# has bugs that we try to work around using this flag.
|
||||
|
||||
$utf8 = ($pattern =~ s/8(?=[a-z]*$)//);
|
||||
|
||||
# Check that the pattern is valid
|
||||
|
||||
if ($utf8)
|
||||
{
|
||||
use utf8;
|
||||
eval "\$_ =~ ${pattern}";
|
||||
}
|
||||
else
|
||||
{
|
||||
eval "\$_ =~ ${pattern}";
|
||||
}
|
||||
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@";
|
||||
next NEXT_RE;
|
||||
}
|
||||
|
||||
# If the /g modifier is present, we want to put a loop round the matching;
|
||||
# otherwise just a single "if".
|
||||
|
||||
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
|
||||
|
||||
# If the pattern is actually the null string, Perl uses the most recently
|
||||
# executed (and successfully compiled) regex is used instead. This is a
|
||||
# nasty trap for the unwary! The PCRE test suite does contain null strings
|
||||
# in places - if they are allowed through here all sorts of weird and
|
||||
# unexpected effects happen. To avoid this, we replace such patterns with
|
||||
# a non-null pattern that has the same effect.
|
||||
|
||||
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
|
||||
|
||||
# Read data lines and test them
|
||||
|
||||
for (;;)
|
||||
{
|
||||
printf "data> " if $infile eq "STDIN";
|
||||
last NEXT_RE if ! ($_ = <$infile>);
|
||||
chomp;
|
||||
printf $outfile "$_\n" if $infile ne "STDIN";
|
||||
|
||||
s/\s+$//;
|
||||
s/^\s+//;
|
||||
|
||||
last if ($_ eq "");
|
||||
|
||||
$x = eval "\"$_\""; # To get escapes processed
|
||||
|
||||
# Empty array for holding results, then do the matching.
|
||||
|
||||
@subs = ();
|
||||
|
||||
$pushes = "push \@subs,\$&;" .
|
||||
"push \@subs,\$1;" .
|
||||
"push \@subs,\$2;" .
|
||||
"push \@subs,\$3;" .
|
||||
"push \@subs,\$4;" .
|
||||
"push \@subs,\$5;" .
|
||||
"push \@subs,\$6;" .
|
||||
"push \@subs,\$7;" .
|
||||
"push \@subs,\$8;" .
|
||||
"push \@subs,\$9;" .
|
||||
"push \@subs,\$10;" .
|
||||
"push \@subs,\$11;" .
|
||||
"push \@subs,\$12;" .
|
||||
"push \@subs,\$13;" .
|
||||
"push \@subs,\$14;" .
|
||||
"push \@subs,\$15;" .
|
||||
"push \@subs,\$16;" .
|
||||
"push \@subs,\$'; }";
|
||||
|
||||
if ($utf8)
|
||||
{
|
||||
use utf8;
|
||||
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
|
||||
}
|
||||
else
|
||||
{
|
||||
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
|
||||
}
|
||||
|
||||
if ($@)
|
||||
{
|
||||
printf $outfile "Error: $@\n";
|
||||
next NEXT_RE;
|
||||
}
|
||||
elsif (scalar(@subs) == 0)
|
||||
{
|
||||
printf $outfile "No match\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
while (scalar(@subs) != 0)
|
||||
{
|
||||
printf $outfile (" 0: %s\n", &pchars($subs[0]));
|
||||
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
|
||||
$last_printed = 0;
|
||||
for ($i = 1; $i <= 16; $i++)
|
||||
{
|
||||
if (defined $subs[$i])
|
||||
{
|
||||
while ($last_printed++ < $i-1)
|
||||
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
|
||||
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
|
||||
$last_printed = $i;
|
||||
}
|
||||
}
|
||||
splice(@subs, 0, 18);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf $outfile "\n";
|
||||
|
||||
# End
|
@ -9,7 +9,7 @@ the file Tech.Notes for some information on the internals.
|
||||
|
||||
Written by: Philip Hazel <ph10@cam.ac.uk>
|
||||
|
||||
Copyright (c) 1997-2001 University of Cambridge
|
||||
Copyright (c) 1997-2002 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Permission is granted to anyone to use this software for any purpose on any
|
||||
@ -99,7 +99,7 @@ volatile int dummy;
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + 3;
|
||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next)
|
||||
@ -119,6 +119,12 @@ do
|
||||
default:
|
||||
return FALSE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Skip over extended extraction bracket number */
|
||||
|
||||
case OP_BRANUMBER:
|
||||
@ -130,8 +136,8 @@ do
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
|
||||
tcode += 3;
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
@ -148,8 +154,8 @@ do
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, cd))
|
||||
return FALSE;
|
||||
dummy = 1;
|
||||
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
|
||||
tcode += 3;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
@ -314,7 +320,7 @@ do
|
||||
} /* End of switch */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += (code[1] << 8) + code[2]; /* Advance to next branch */
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return TRUE;
|
||||
@ -346,6 +352,8 @@ pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
uschar start_bits[32];
|
||||
real_pcre_extra *extra;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + sizeof(real_pcre) +
|
||||
(re->name_count * re->name_entry_size);
|
||||
compile_data compile_block;
|
||||
|
||||
*errorptr = NULL;
|
||||
@ -362,9 +370,9 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* For an anchored pattern, or an unchored pattern that has a first char, or a
|
||||
multiline pattern that matches only at "line starts", no further processing at
|
||||
present. */
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
@ -379,7 +387,7 @@ compile_block.ctypes = re->tables + ctypes_offset;
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
&compile_block)) return NULL;
|
||||
|
||||
/* Get an "extra" block and put the information therein. */
|
||||
|
1859
ext/pcre/pcrelib/testdata/testinput1
vendored
1859
ext/pcre/pcrelib/testdata/testinput1
vendored
File diff suppressed because it is too large
Load Diff
441
ext/pcre/pcrelib/testdata/testinput2
vendored
441
ext/pcre/pcrelib/testdata/testinput2
vendored
@ -173,7 +173,7 @@
|
||||
/<.*>/U
|
||||
abc<def>ghi<klm>nop
|
||||
|
||||
/<.*>(?U)/
|
||||
/(?U)<.*>/
|
||||
abc<def>ghi<klm>nop
|
||||
|
||||
/<.*?>/U
|
||||
@ -658,6 +658,8 @@
|
||||
|
||||
/^[[:ascii:]]/D
|
||||
|
||||
/^[[:blank:]]/D
|
||||
|
||||
/^[[:cntrl:]]/D
|
||||
|
||||
/^[[:digit:]]/D
|
||||
@ -682,6 +684,8 @@
|
||||
|
||||
/^[12[:^digit:]]/D
|
||||
|
||||
/^[[:^blank:]]/D
|
||||
|
||||
/[01[:alpha:]%]/D
|
||||
|
||||
/[[.ch.]]/
|
||||
@ -720,4 +724,439 @@
|
||||
mainmain
|
||||
mainOmain
|
||||
|
||||
/These are all cases where Perl does it differently (nested captures)/
|
||||
|
||||
/^(a(b)?)+$/
|
||||
aba
|
||||
|
||||
/^(aa(bb)?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(aa|aa(bb))+$/
|
||||
aabbaa
|
||||
|
||||
/^(aa(bb)??)+$/
|
||||
aabbaa
|
||||
|
||||
/^(?:aa(bb)?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(aa(b(b))?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(?:aa(b(b))?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(?:aa(b(?:b))?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(?:aa(bb(?:b))?)+$/
|
||||
aabbbaa
|
||||
|
||||
/^(?:aa(b(?:bb))?)+$/
|
||||
aabbbaa
|
||||
|
||||
/^(?:aa(?:b(b))?)+$/
|
||||
aabbaa
|
||||
|
||||
/^(?:aa(?:b(bb))?)+$/
|
||||
aabbbaa
|
||||
|
||||
/^(aa(b(bb))?)+$/
|
||||
aabbbaa
|
||||
|
||||
/^(aa(bb(bb))?)+$/
|
||||
aabbbbaa
|
||||
|
||||
/--------------------------------------------------------------------/
|
||||
|
||||
/#/xMD
|
||||
|
||||
/a#/xMD
|
||||
|
||||
/[\s]/D
|
||||
|
||||
/[\S]/D
|
||||
|
||||
/a(?i)b/D
|
||||
ab
|
||||
aB
|
||||
*** Failers
|
||||
AB
|
||||
|
||||
/(a(?i)b)/D
|
||||
ab
|
||||
aB
|
||||
*** Failers
|
||||
AB
|
||||
|
||||
/ (?i)abc/xD
|
||||
|
||||
/#this is a comment
|
||||
(?i)abc/xD
|
||||
|
||||
/123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/D
|
||||
|
||||
/\Q123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890/D
|
||||
|
||||
/\Q\E/D
|
||||
\
|
||||
|
||||
/\Q\Ex/D
|
||||
|
||||
/ \Q\E/D
|
||||
|
||||
/a\Q\E/D
|
||||
abc
|
||||
bca
|
||||
bac
|
||||
|
||||
/a\Q\Eb/D
|
||||
abc
|
||||
|
||||
/\Q\Eabc/D
|
||||
|
||||
/x*+\w/D
|
||||
****Failers
|
||||
xxxxx
|
||||
|
||||
/x?+/D
|
||||
|
||||
/x++/D
|
||||
|
||||
/x{1,3}+/D
|
||||
|
||||
/(x)*+/D
|
||||
|
||||
/^(\w++|\s++)*$/
|
||||
now is the time for all good men to come to the aid of the party
|
||||
*** Failers
|
||||
this is not a line with only words and spaces!
|
||||
|
||||
/(\d++)(\w)/
|
||||
12345a
|
||||
*** Failers
|
||||
12345+
|
||||
|
||||
/a++b/
|
||||
aaab
|
||||
|
||||
/(a++b)/
|
||||
aaab
|
||||
|
||||
/(a++)b/
|
||||
aaab
|
||||
|
||||
/([^()]++|\([^()]*\))+/
|
||||
((abc(ade)ufh()()x
|
||||
|
||||
/\(([^()]++|\([^()]+\))+\)/
|
||||
(abc)
|
||||
(abc(def)xyz)
|
||||
*** Failers
|
||||
((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
|
||||
/(abc){1,3}+/D
|
||||
|
||||
/a+?+/
|
||||
|
||||
/a{2,3}?+b/
|
||||
|
||||
/(?U)a+?+/
|
||||
|
||||
/a{2,3}?+b/U
|
||||
|
||||
/x(?U)a++b/D
|
||||
xaaaab
|
||||
|
||||
/(?U)xa++b/D
|
||||
xaaaab
|
||||
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/D
|
||||
|
||||
/^x(?U)a+b/D
|
||||
|
||||
/^x(?U)(a+)b/D
|
||||
|
||||
/[.x.]/
|
||||
|
||||
/[=x=]/
|
||||
|
||||
/[:x:]/
|
||||
|
||||
/\l/
|
||||
|
||||
/\L/
|
||||
|
||||
/\N{name}/
|
||||
|
||||
/\pP/
|
||||
|
||||
/\PP/
|
||||
|
||||
/\p{prop}/
|
||||
|
||||
/\P{prop}/
|
||||
|
||||
/\u/
|
||||
|
||||
/\U/
|
||||
|
||||
/\X/
|
||||
|
||||
/[/
|
||||
|
||||
/[a-/
|
||||
|
||||
/[[:space:]/
|
||||
|
||||
/[\s]/DM
|
||||
|
||||
/[[:space:]]/DM
|
||||
|
||||
/[[:space:]abcde]/DM
|
||||
|
||||
/< (?: (?(R) \d++ | [^<>]*+) | (?R)) * >/x
|
||||
<>
|
||||
<abcd>
|
||||
<abc <123> hij>
|
||||
<abc <def> hij>
|
||||
<abc<>def>
|
||||
<abc<>
|
||||
*** Failers
|
||||
<abc
|
||||
|
||||
|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|DM
|
||||
|
||||
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|DM
|
||||
|
||||
/(.*)\d+\1/I
|
||||
|
||||
/(.*)\d+/I
|
||||
|
||||
/(.*)\d+\1/Is
|
||||
|
||||
/(.*)\d+/Is
|
||||
|
||||
/(.*(xyz))\d+\2/I
|
||||
|
||||
/((.*))\d+\1/I
|
||||
abc123bc
|
||||
|
||||
/a[b]/I
|
||||
|
||||
/(?=a).*/I
|
||||
|
||||
/(?=abc).xyz/iI
|
||||
|
||||
/(?=abc)(?i).xyz/I
|
||||
|
||||
/(?=a)(?=b)/I
|
||||
|
||||
/(?=.)a/I
|
||||
|
||||
/((?=abcda)a)/I
|
||||
|
||||
/((?=abcda)ab)/I
|
||||
|
||||
/()a/I
|
||||
|
||||
/(?(1)ab|ac)/I
|
||||
|
||||
/(?(1)abz|acz)/I
|
||||
|
||||
/(?(1)abz)/I
|
||||
|
||||
/(?(1)abz)123/I
|
||||
|
||||
/(a)+/I
|
||||
|
||||
/(a){2,3}/I
|
||||
|
||||
/(a)*/I
|
||||
|
||||
/[a]/I
|
||||
|
||||
/[ab]/I
|
||||
|
||||
/[ab]/IS
|
||||
|
||||
/[^a]/I
|
||||
|
||||
/\d456/I
|
||||
|
||||
/\d456/IS
|
||||
|
||||
/a^b/I
|
||||
|
||||
/^a/mI
|
||||
abcde
|
||||
xy\nabc
|
||||
*** Failers
|
||||
xyabc
|
||||
|
||||
/c|abc/I
|
||||
|
||||
/(?i)[ab]/IS
|
||||
|
||||
/[ab](?i)cd/IS
|
||||
|
||||
/abc(?C)def/
|
||||
abcdef
|
||||
1234abcdef
|
||||
*** Failers
|
||||
abcxyz
|
||||
abcxyzf
|
||||
|
||||
/abc(?C)de(?C1)f/
|
||||
123abcdef
|
||||
|
||||
/(?C1)\dabc(?C2)def/
|
||||
1234abcdef
|
||||
*** Failers
|
||||
abcdef
|
||||
|
||||
/(?C255)ab/
|
||||
|
||||
/(?C256)ab/
|
||||
|
||||
/(?Cab)xx/
|
||||
|
||||
/(?C12vr)x/
|
||||
|
||||
/abc(?C)def/
|
||||
*** Failers
|
||||
\x83\x0\x61bcdef
|
||||
|
||||
/(abc)(?C)de(?C1)f/
|
||||
123abcdef
|
||||
123abcdef\C+
|
||||
123abcdef\C-
|
||||
*** Failers
|
||||
123abcdef\C!1
|
||||
|
||||
/(?C0)(abc(?C1))*/
|
||||
abcabcabc
|
||||
abcabc\C!1!3
|
||||
*** Failers
|
||||
abcabcabc\C!1!3
|
||||
|
||||
/(\d{3}(?C))*/
|
||||
123\C+
|
||||
123456\C+
|
||||
123456789\C+
|
||||
|
||||
/((xyz)(?C)p|(?C1)xyzabc)/
|
||||
xyzabc\C+
|
||||
|
||||
/(X)((xyz)(?C)p|(?C1)xyzabc)/
|
||||
Xxyzabc\C+
|
||||
|
||||
/(?=(abc))(?C)abcdef/
|
||||
abcdef\C+
|
||||
|
||||
/(?!(abc)(?C1)d)(?C2)abcxyz/
|
||||
abcxyz\C+
|
||||
|
||||
/(?<=(abc)(?C))xyz/
|
||||
abcxyz\C+
|
||||
|
||||
/(?C)abc/
|
||||
|
||||
/(?C)^abc/
|
||||
|
||||
/(?C)a|b/S
|
||||
|
||||
/(?R)/
|
||||
|
||||
/(a|(?R))/
|
||||
|
||||
/(ab|(bc|(de|(?R))))/
|
||||
|
||||
/x(ab|(bc|(de|(?R))))/
|
||||
xab
|
||||
xbc
|
||||
xde
|
||||
xxab
|
||||
xxxab
|
||||
*** Failers
|
||||
xyab
|
||||
|
||||
/(ab|(bc|(de|(?1))))/
|
||||
|
||||
/x(ab|(bc|(de|(?1)x)x)x)/
|
||||
|
||||
/^([^()]|\((?1)*\))*$/
|
||||
abc
|
||||
a(b)c
|
||||
a(b(c))d
|
||||
*** Failers)
|
||||
a(b(c)d
|
||||
|
||||
/^>abc>([^()]|\((?1)*\))*<xyz<$/
|
||||
>abc>123<xyz<
|
||||
>abc>1(2)3<xyz<
|
||||
>abc>(1(2)3)<xyz<
|
||||
|
||||
/(a(?1)b)/D
|
||||
|
||||
/(a(?1)+b)/D
|
||||
|
||||
/^\W*(?:((.)\W*(?1)\W*\2|)|((.)\W*(?3)\W*\4|\W*.\W*))\W*$/i
|
||||
1221
|
||||
Satan, oscillate my metallic sonatas!
|
||||
A man, a plan, a canal: Panama!
|
||||
Able was I ere I saw Elba.
|
||||
*** Failers
|
||||
The quick brown fox
|
||||
|
||||
/^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/
|
||||
12
|
||||
(((2+2)*-3)-7)
|
||||
-12
|
||||
*** Failers
|
||||
((2+2)*-3)-7)
|
||||
|
||||
/^(x(y|(?1){2})z)/
|
||||
xyz
|
||||
xxyzxyzz
|
||||
*** Failers
|
||||
xxyzz
|
||||
xxyzxyzxyzz
|
||||
|
||||
/((< (?: (?(R) \d++ | [^<>]*+) | (?2)) * >))/x
|
||||
<>
|
||||
<abcd>
|
||||
<abc <123> hij>
|
||||
<abc <def> hij>
|
||||
<abc<>def>
|
||||
<abc<>
|
||||
*** Failers
|
||||
<abc
|
||||
|
||||
/(?1)/
|
||||
|
||||
/((?2)(abc)/
|
||||
|
||||
/^(abc)def(?1)/
|
||||
abcdefabc
|
||||
|
||||
/^(a|b|c)=(?1)+/
|
||||
a=a
|
||||
a=b
|
||||
a=bc
|
||||
|
||||
/^(a|b|c)=((?1))+/
|
||||
a=a
|
||||
a=b
|
||||
a=bc
|
||||
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/D
|
||||
abde
|
||||
acde
|
||||
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/D
|
||||
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/D
|
||||
|
||||
/ End of testinput2 /
|
||||
|
1767
ext/pcre/pcrelib/testdata/testinput3
vendored
1767
ext/pcre/pcrelib/testdata/testinput3
vendored
File diff suppressed because it is too large
Load Diff
176
ext/pcre/pcrelib/testdata/testinput4
vendored
176
ext/pcre/pcrelib/testdata/testinput4
vendored
@ -1,65 +1,155 @@
|
||||
/^[\w]+/
|
||||
/-- Do not use the \x{} construct except with patterns that have the --/
|
||||
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
|
||||
/-- that option is set. However, the latest Perls recognize them always. --/
|
||||
|
||||
/a.b/8
|
||||
acb
|
||||
a\x7fb
|
||||
a\x{100}b
|
||||
*** Failers
|
||||
École
|
||||
a\nb
|
||||
|
||||
/^[\w]+/Lfr
|
||||
École
|
||||
|
||||
/^[\w]+/
|
||||
/a(.{3})b/8
|
||||
a\x{4000}xyb
|
||||
a\x{4000}\x7fyb
|
||||
a\x{4000}\x{100}yb
|
||||
*** Failers
|
||||
École
|
||||
a\x{4000}b
|
||||
ac\ncb
|
||||
|
||||
/^[\W]+/
|
||||
École
|
||||
/a(.*?)(.)/
|
||||
a\xc0\x88b
|
||||
|
||||
/^[\W]+/Lfr
|
||||
/a(.*?)(.)/8
|
||||
a\x{100}b
|
||||
|
||||
/a(.*)(.)/
|
||||
a\xc0\x88b
|
||||
|
||||
/a(.*)(.)/8
|
||||
a\x{100}b
|
||||
|
||||
/a(.)(.)/
|
||||
a\xc0\x92bcd
|
||||
|
||||
/a(.)(.)/8
|
||||
a\x{240}bcd
|
||||
|
||||
/a(.?)(.)/
|
||||
a\xc0\x92bcd
|
||||
|
||||
/a(.?)(.)/8
|
||||
a\x{240}bcd
|
||||
|
||||
/a(.??)(.)/
|
||||
a\xc0\x92bcd
|
||||
|
||||
/a(.??)(.)/8
|
||||
a\x{240}bcd
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
*** Failers
|
||||
École
|
||||
a\x{1234}b
|
||||
ac\ncb
|
||||
|
||||
/[\b]/
|
||||
\b
|
||||
/a(.{3,})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
*** Failers
|
||||
a
|
||||
a\x{1234}b
|
||||
|
||||
/[\b]/Lfr
|
||||
\b
|
||||
/a(.{3,}?)b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
*** Failers
|
||||
a
|
||||
a\x{1234}b
|
||||
|
||||
/^\w+/
|
||||
/a(.{3,5})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
axbxxbcdefghijb
|
||||
axxxxxbcdefghijb
|
||||
*** Failers
|
||||
École
|
||||
a\x{1234}b
|
||||
axxxxxxbcdefghijb
|
||||
|
||||
/^\w+/Lfr
|
||||
École
|
||||
|
||||
/(.+)\b(.+)/
|
||||
École
|
||||
|
||||
/(.+)\b(.+)/Lfr
|
||||
/a(.{3,5}?)b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
axbxxbcdefghijb
|
||||
axxxxxbcdefghijb
|
||||
*** Failers
|
||||
École
|
||||
a\x{1234}b
|
||||
axxxxxxbcdefghijb
|
||||
|
||||
/École/i
|
||||
École
|
||||
/^[a\x{c0}]/8
|
||||
*** Failers
|
||||
école
|
||||
\x{100}
|
||||
|
||||
/École/iLfr
|
||||
École
|
||||
école
|
||||
/(?<=aXb)cd/8
|
||||
aXbcd
|
||||
|
||||
/\w/IS
|
||||
/(?<=a\x{100}b)cd/8
|
||||
a\x{100}bcd
|
||||
|
||||
/\w/ISLfr
|
||||
|
||||
/^[\xc8-\xc9]/iLfr
|
||||
École
|
||||
école
|
||||
|
||||
/^[\xc8-\xc9]/Lfr
|
||||
École
|
||||
/(?<=a\x{100000}b)cd/8
|
||||
a\x{100000}bcd
|
||||
|
||||
/(?:\x{100}){3}b/8
|
||||
\x{100}\x{100}\x{100}b
|
||||
*** Failers
|
||||
école
|
||||
\x{100}\x{100}b
|
||||
|
||||
/\x{ab}/8
|
||||
\x{ab}
|
||||
\xc2\xab
|
||||
*** Failers
|
||||
\x00{ab}
|
||||
|
||||
/(?<=(.))X/8
|
||||
WXYZ
|
||||
\x{256}XYZ
|
||||
*** Failers
|
||||
XYZ
|
||||
|
||||
/X(\C{3})/8
|
||||
X\x{1234}
|
||||
|
||||
/X(\C{4})/8
|
||||
X\x{1234}YZ
|
||||
|
||||
/X\C*/8
|
||||
XYZabcdce
|
||||
|
||||
/X\C*?/8
|
||||
XYZabcde
|
||||
|
||||
/X\C{3,5}/8
|
||||
Xabcdefg
|
||||
X\x{1234}
|
||||
X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
X\x{1234}\x{512}YZ
|
||||
|
||||
/X\C{3,5}?/8
|
||||
Xabcdefg
|
||||
X\x{1234}
|
||||
X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
|
||||
/ End of testinput4 /
|
||||
|
175
ext/pcre/pcrelib/testdata/testinput5
vendored
175
ext/pcre/pcrelib/testdata/testinput5
vendored
@ -1,118 +1,91 @@
|
||||
/-- Because of problems with Perl 5.6 in handling UTF-8 vs non UTF-8 --/
|
||||
/-- strings automatically, do not use the \x{} construct except with --/
|
||||
/-- patterns that have the /8 option set, and don't use them without! --/
|
||||
/\x{100}/8DM
|
||||
|
||||
/a.b/8
|
||||
acb
|
||||
a\x7fb
|
||||
a\x{100}b
|
||||
*** Failers
|
||||
a\nb
|
||||
/\x{1000}/8DM
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{4000}xyb
|
||||
a\x{4000}\x7fyb
|
||||
a\x{4000}\x{100}yb
|
||||
*** Failers
|
||||
a\x{4000}b
|
||||
ac\ncb
|
||||
/\x{10000}/8DM
|
||||
|
||||
/a(.*?)(.)/
|
||||
a\xc0\x88b
|
||||
/\x{100000}/8DM
|
||||
|
||||
/a(.*?)(.)/8
|
||||
a\x{100}b
|
||||
/\x{1000000}/8DM
|
||||
|
||||
/a(.*)(.)/
|
||||
a\xc0\x88b
|
||||
/\x{4000000}/8DM
|
||||
|
||||
/a(.*)(.)/8
|
||||
a\x{100}b
|
||||
/\x{7fffFFFF}/8DM
|
||||
|
||||
/a(.)(.)/
|
||||
a\xc0\x92bcd
|
||||
/[\x{ff}]/8DM
|
||||
|
||||
/a(.)(.)/8
|
||||
a\x{240}bcd
|
||||
/[\x{100}]/8DM
|
||||
|
||||
/a(.?)(.)/
|
||||
a\xc0\x92bcd
|
||||
/\x{ffffffff}/8
|
||||
|
||||
/a(.?)(.)/8
|
||||
a\x{240}bcd
|
||||
/\x{100000000}/8
|
||||
|
||||
/a(.??)(.)/
|
||||
a\xc0\x92bcd
|
||||
/^\x{100}a\x{1234}/8
|
||||
\x{100}a\x{1234}bcd
|
||||
|
||||
/a(.??)(.)/8
|
||||
a\x{240}bcd
|
||||
/\x80/8D
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
*** Failers
|
||||
a\x{1234}b
|
||||
ac\ncb
|
||||
/\xff/8D
|
||||
|
||||
/a(.{3,})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
*** Failers
|
||||
a\x{1234}b
|
||||
|
||||
/a(.{3,}?)b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
*** Failers
|
||||
a\x{1234}b
|
||||
|
||||
/a(.{3,5})b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
axbxxbcdefghijb
|
||||
axxxxxbcdefghijb
|
||||
*** Failers
|
||||
a\x{1234}b
|
||||
axxxxxxbcdefghijb
|
||||
|
||||
/a(.{3,5}?)b/8
|
||||
a\x{1234}xyb
|
||||
a\x{1234}\x{4321}yb
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
axxxxbcdefghijb
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
axbxxbcdefghijb
|
||||
axxxxxbcdefghijb
|
||||
*** Failers
|
||||
a\x{1234}b
|
||||
axxxxxxbcdefghijb
|
||||
|
||||
/^[a\x{c0}]/8
|
||||
*** Failers
|
||||
\x{100}
|
||||
|
||||
/(?<=aXb)cd/8
|
||||
aXbcd
|
||||
|
||||
/(?<=a\x{100}b)cd/8
|
||||
a\x{100}bcd
|
||||
|
||||
/(?<=a\x{100000}b)cd/8
|
||||
a\x{100000}bcd
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8
|
||||
\x{0041}\x{2262}\x{0391}\x{002e}
|
||||
|
||||
/(?:\x{100}){3}b/8
|
||||
\x{100}\x{100}\x{100}b
|
||||
*** Failers
|
||||
\x{100}\x{100}b
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8
|
||||
\x{D55c}\x{ad6d}\x{C5B4}
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
|
||||
/\x{80}/D8
|
||||
|
||||
/\x{084}/D8
|
||||
|
||||
/\x{104}/D8
|
||||
|
||||
/\x{861}/D8
|
||||
|
||||
/\x{212ab}/D8
|
||||
|
||||
/.{3,5}X/D8
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}X
|
||||
|
||||
|
||||
/.{3,5}?/D8
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
||||
|
||||
/-- These tests are here rather than in testinput4 because Perl 5.6 has --/
|
||||
/-- some problems with UTF-8 support, in the area of \x{..} where the --/
|
||||
/-- value is < 255. It grumbles about invalid UTF-8 strings. --/
|
||||
|
||||
/^[a\x{c0}]b/8
|
||||
\x{c0}b
|
||||
|
||||
/^([a\x{c0}]*?)aa/8
|
||||
a\x{c0}aaaa/
|
||||
|
||||
/^([a\x{c0}]*?)aa/8
|
||||
a\x{c0}aaaa/
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
|
||||
/^([a\x{c0}]*)aa/8
|
||||
a\x{c0}aaaa/
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
|
||||
/^([a\x{c0}]*)a\x{c0}/8
|
||||
a\x{c0}aaaa/
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
|
||||
/-- --/
|
||||
|
||||
/(?<=\C)X/8
|
||||
Should produce an error diagnostic
|
||||
|
||||
/-- This one is here not because it's different to Perl, but because the --/
|
||||
/-- way the captured single-byte is displayed. (In Perl it becomes a --/
|
||||
/-- character, and you can't tell the difference.) --/
|
||||
|
||||
/X(\C)(.*)/8
|
||||
X\x{1234}
|
||||
X\nabc
|
||||
|
||||
/ End of testinput5 /
|
||||
|
3207
ext/pcre/pcrelib/testdata/testoutput1
vendored
3207
ext/pcre/pcrelib/testdata/testoutput1
vendored
File diff suppressed because it is too large
Load Diff
1746
ext/pcre/pcrelib/testdata/testoutput2
vendored
1746
ext/pcre/pcrelib/testdata/testoutput2
vendored
File diff suppressed because it is too large
Load Diff
3017
ext/pcre/pcrelib/testdata/testoutput3
vendored
3017
ext/pcre/pcrelib/testdata/testoutput3
vendored
File diff suppressed because it is too large
Load Diff
346
ext/pcre/pcrelib/testdata/testoutput4
vendored
346
ext/pcre/pcrelib/testdata/testoutput4
vendored
@ -1,116 +1,304 @@
|
||||
PCRE version 3.9 02-Jan-2002
|
||||
PCRE version 3.92 11-Sep-2002
|
||||
|
||||
/^[\w]+/
|
||||
/-- Do not use the \x{} construct except with patterns that have the --/
|
||||
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
|
||||
No match
|
||||
/-- that option is set. However, the latest Perls recognize them always. --/
|
||||
No match
|
||||
|
||||
/a.b/8
|
||||
acb
|
||||
0: acb
|
||||
a\x7fb
|
||||
0: a\x{7f}b
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
*** Failers
|
||||
No match
|
||||
École
|
||||
a\nb
|
||||
No match
|
||||
|
||||
/^[\w]+/Lfr
|
||||
École
|
||||
0: École
|
||||
|
||||
/^[\w]+/
|
||||
/a(.{3})b/8
|
||||
a\x{4000}xyb
|
||||
0: a\x{4000}xyb
|
||||
1: \x{4000}xy
|
||||
a\x{4000}\x7fyb
|
||||
0: a\x{4000}\x{7f}yb
|
||||
1: \x{4000}\x{7f}y
|
||||
a\x{4000}\x{100}yb
|
||||
0: a\x{4000}\x{100}yb
|
||||
1: \x{4000}\x{100}y
|
||||
*** Failers
|
||||
No match
|
||||
École
|
||||
a\x{4000}b
|
||||
No match
|
||||
ac\ncb
|
||||
No match
|
||||
|
||||
/^[\W]+/
|
||||
École
|
||||
0: \xc9
|
||||
/a(.*?)(.)/
|
||||
a\xc0\x88b
|
||||
0: a\xc0
|
||||
1:
|
||||
2: \xc0
|
||||
|
||||
/^[\W]+/Lfr
|
||||
*** Failers
|
||||
0: ***
|
||||
École
|
||||
No match
|
||||
/a(.*?)(.)/8
|
||||
a\x{100}b
|
||||
0: a\x{100}
|
||||
1:
|
||||
2: \x{100}
|
||||
|
||||
/[\b]/
|
||||
\b
|
||||
0: \x08
|
||||
/a(.*)(.)/
|
||||
a\xc0\x88b
|
||||
0: a\xc0\x88b
|
||||
1: \xc0\x88
|
||||
2: b
|
||||
|
||||
/a(.*)(.)/8
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
1: \x{100}
|
||||
2: b
|
||||
|
||||
/a(.)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0\x92
|
||||
1: \xc0
|
||||
2: \x92
|
||||
|
||||
/a(.)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}b
|
||||
1: \x{240}
|
||||
2: b
|
||||
|
||||
/a(.?)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0\x92
|
||||
1: \xc0
|
||||
2: \x92
|
||||
|
||||
/a(.?)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}b
|
||||
1: \x{240}
|
||||
2: b
|
||||
|
||||
/a(.??)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0
|
||||
1:
|
||||
2: \xc0
|
||||
|
||||
/a(.??)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}
|
||||
1:
|
||||
2: \x{240}
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
*** Failers
|
||||
No match
|
||||
a
|
||||
a\x{1234}b
|
||||
No match
|
||||
ac\ncb
|
||||
No match
|
||||
|
||||
/[\b]/Lfr
|
||||
\b
|
||||
0: \x08
|
||||
/a(.{3,})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxbcdefghijb
|
||||
1: xxxxbcdefghij
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
*** Failers
|
||||
No match
|
||||
a
|
||||
a\x{1234}b
|
||||
No match
|
||||
|
||||
/^\w+/
|
||||
/a(.{3,}?)b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
*** Failers
|
||||
No match
|
||||
École
|
||||
a\x{1234}b
|
||||
No match
|
||||
|
||||
/^\w+/Lfr
|
||||
École
|
||||
0: École
|
||||
|
||||
/(.+)\b(.+)/
|
||||
École
|
||||
0: \xc9cole
|
||||
1: \xc9
|
||||
2: cole
|
||||
|
||||
/(.+)\b(.+)/Lfr
|
||||
*** Failers
|
||||
0: *** Failers
|
||||
1: ***
|
||||
2: Failers
|
||||
École
|
||||
No match
|
||||
|
||||
/École/i
|
||||
École
|
||||
0: \xc9cole
|
||||
/a(.{3,5})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
axbxxbcdefghijb
|
||||
0: axbxxb
|
||||
1: xbxx
|
||||
axxxxxbcdefghijb
|
||||
0: axxxxxb
|
||||
1: xxxxx
|
||||
*** Failers
|
||||
No match
|
||||
école
|
||||
a\x{1234}b
|
||||
No match
|
||||
axxxxxxbcdefghijb
|
||||
No match
|
||||
|
||||
/École/iLfr
|
||||
École
|
||||
0: École
|
||||
école
|
||||
0: école
|
||||
/a(.{3,5}?)b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
axbxxbcdefghijb
|
||||
0: axbxxb
|
||||
1: xbxx
|
||||
axxxxxbcdefghijb
|
||||
0: axxxxxb
|
||||
1: xxxxx
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
axxxxxxbcdefghijb
|
||||
No match
|
||||
|
||||
/\w/IS
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
Starting character set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
|
||||
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
/^[a\x{c0}]/8
|
||||
*** Failers
|
||||
No match
|
||||
\x{100}
|
||||
No match
|
||||
|
||||
/\w/ISLfr
|
||||
Capturing subpattern count = 0
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
Starting character set: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
|
||||
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å
|
||||
æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø ù ú û ü ý þ ÿ
|
||||
/(?<=aXb)cd/8
|
||||
aXbcd
|
||||
0: cd
|
||||
|
||||
/^[\xc8-\xc9]/iLfr
|
||||
École
|
||||
0: É
|
||||
école
|
||||
0: é
|
||||
/(?<=a\x{100}b)cd/8
|
||||
a\x{100}bcd
|
||||
0: cd
|
||||
|
||||
/^[\xc8-\xc9]/Lfr
|
||||
École
|
||||
0: É
|
||||
/(?<=a\x{100000}b)cd/8
|
||||
a\x{100000}bcd
|
||||
0: cd
|
||||
|
||||
/(?:\x{100}){3}b/8
|
||||
\x{100}\x{100}\x{100}b
|
||||
0: \x{100}\x{100}\x{100}b
|
||||
*** Failers
|
||||
No match
|
||||
école
|
||||
\x{100}\x{100}b
|
||||
No match
|
||||
|
||||
/\x{ab}/8
|
||||
\x{ab}
|
||||
0: \x{ab}
|
||||
\xc2\xab
|
||||
0: \x{ab}
|
||||
*** Failers
|
||||
No match
|
||||
\x00{ab}
|
||||
No match
|
||||
|
||||
/(?<=(.))X/8
|
||||
WXYZ
|
||||
0: X
|
||||
1: W
|
||||
\x{256}XYZ
|
||||
0: X
|
||||
1: \x{256}
|
||||
*** Failers
|
||||
No match
|
||||
XYZ
|
||||
No match
|
||||
|
||||
/X(\C{3})/8
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
1: \x{1234}
|
||||
|
||||
/X(\C{4})/8
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}Y
|
||||
1: \x{1234}Y
|
||||
|
||||
/X\C*/8
|
||||
XYZabcdce
|
||||
0: XYZabcdce
|
||||
|
||||
/X\C*?/8
|
||||
XYZabcde
|
||||
0: X
|
||||
|
||||
/X\C{3,5}/8
|
||||
Xabcdefg
|
||||
0: Xabcde
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}YZ
|
||||
X\x{1234}\x{512}
|
||||
0: X\x{1234}\x{512}
|
||||
X\x{1234}\x{512}YZ
|
||||
0: X\x{1234}\x{512}
|
||||
|
||||
/X\C{3,5}?/8
|
||||
Xabcdefg
|
||||
0: Xabc
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
X\x{1234}YZ
|
||||
0: X\x{1234}
|
||||
X\x{1234}\x{512}
|
||||
0: X\x{1234}
|
||||
|
||||
/ End of testinput4 /
|
||||
|
||||
|
539
ext/pcre/pcrelib/testdata/testoutput5
vendored
539
ext/pcre/pcrelib/testdata/testoutput5
vendored
@ -1,242 +1,339 @@
|
||||
PCRE version 3.9 02-Jan-2002
|
||||
PCRE version 3.92 11-Sep-2002
|
||||
|
||||
/-- Because of problems with Perl 5.6 in handling UTF-8 vs non UTF-8 --/
|
||||
/-- strings automatically, do not use the \x{} construct except with --/
|
||||
No match
|
||||
/-- patterns that have the /8 option set, and don't use them without! --/
|
||||
No match
|
||||
/\x{100}/8DM
|
||||
Memory allocation (code space): 11
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc4\x80
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 196
|
||||
Need char = 128
|
||||
|
||||
/a.b/8
|
||||
acb
|
||||
0: acb
|
||||
a\x7fb
|
||||
0: a\x{7f}b
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
*** Failers
|
||||
No match
|
||||
a\nb
|
||||
No match
|
||||
/\x{1000}/8DM
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
3 3 \xe1\x80\x80
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 225
|
||||
Need char = 128
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{4000}xyb
|
||||
0: a\x{4000}xyb
|
||||
1: \x{4000}xy
|
||||
a\x{4000}\x7fyb
|
||||
0: a\x{4000}\x{7f}yb
|
||||
1: \x{4000}\x{7f}y
|
||||
a\x{4000}\x{100}yb
|
||||
0: a\x{4000}\x{100}yb
|
||||
1: \x{4000}\x{100}y
|
||||
*** Failers
|
||||
No match
|
||||
a\x{4000}b
|
||||
No match
|
||||
ac\ncb
|
||||
No match
|
||||
/\x{10000}/8DM
|
||||
Memory allocation (code space): 13
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra 0
|
||||
3 4 \xf0\x90\x80\x80
|
||||
9 9 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 240
|
||||
Need char = 128
|
||||
|
||||
/a(.*?)(.)/
|
||||
a\xc0\x88b
|
||||
0: a\xc0
|
||||
1:
|
||||
2: \xc0
|
||||
/\x{100000}/8DM
|
||||
Memory allocation (code space): 13
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra 0
|
||||
3 4 \xf4\x80\x80\x80
|
||||
9 9 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 244
|
||||
Need char = 128
|
||||
|
||||
/a(.*?)(.)/8
|
||||
a\x{100}b
|
||||
0: a\x{100}
|
||||
1:
|
||||
2: \x{100}
|
||||
/\x{1000000}/8DM
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra 0
|
||||
3 5 \xf9\x80\x80\x80\x80
|
||||
10 10 Ket
|
||||
13 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 249
|
||||
Need char = 128
|
||||
|
||||
/a(.*)(.)/
|
||||
a\xc0\x88b
|
||||
0: a\xc0\x88b
|
||||
1: \xc0\x88
|
||||
2: b
|
||||
/\x{4000000}/8DM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 6 \xfc\x84\x80\x80\x80\x80
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 252
|
||||
Need char = 128
|
||||
|
||||
/a(.*)(.)/8
|
||||
a\x{100}b
|
||||
0: a\x{100}b
|
||||
1: \x{100}
|
||||
2: b
|
||||
/\x{7fffFFFF}/8DM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 6 \xfd\xbf\xbf\xbf\xbf\xbf
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 253
|
||||
Need char = 191
|
||||
|
||||
/a(.)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0\x92
|
||||
1: \xc0
|
||||
2: \x92
|
||||
/[\x{ff}]/8DM
|
||||
Memory allocation (code space): 40
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
3 1 \xff
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 255
|
||||
No need char
|
||||
|
||||
/a(.)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}b
|
||||
1: \x{240}
|
||||
2: b
|
||||
/[\x{100}]/8DM
|
||||
Failed: characters with values > 255 are not yet supported in classes at offset 7
|
||||
|
||||
/a(.?)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0\x92
|
||||
1: \xc0
|
||||
2: \x92
|
||||
/\x{ffffffff}/8
|
||||
Failed: character value in \x{...} sequence is too large at offset 11
|
||||
|
||||
/a(.?)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}b
|
||||
1: \x{240}
|
||||
2: b
|
||||
/\x{100000000}/8
|
||||
Failed: character value in \x{...} sequence is too large at offset 12
|
||||
|
||||
/a(.??)(.)/
|
||||
a\xc0\x92bcd
|
||||
0: a\xc0
|
||||
1:
|
||||
2: \xc0
|
||||
/^\x{100}a\x{1234}/8
|
||||
\x{100}a\x{1234}bcd
|
||||
0: \x{100}a\x{1234}
|
||||
|
||||
/a(.??)(.)/8
|
||||
a\x{240}bcd
|
||||
0: a\x{240}
|
||||
1:
|
||||
2: \x{240}
|
||||
/\x80/8D
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc2\x80
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 194
|
||||
Need char = 128
|
||||
|
||||
/a(.{3})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
ac\ncb
|
||||
No match
|
||||
/\xff/8D
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc3\xbf
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 195
|
||||
Need char = 191
|
||||
|
||||
/a(.{3,})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxbcdefghijb
|
||||
1: xxxxbcdefghij
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
|
||||
/a(.{3,}?)b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
|
||||
/a(.{3,5})b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
axbxxbcdefghijb
|
||||
0: axbxxb
|
||||
1: xbxx
|
||||
axxxxxbcdefghijb
|
||||
0: axxxxxb
|
||||
1: xxxxx
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
axxxxxxbcdefghijb
|
||||
No match
|
||||
|
||||
/a(.{3,5}?)b/8
|
||||
a\x{1234}xyb
|
||||
0: a\x{1234}xyb
|
||||
1: \x{1234}xy
|
||||
a\x{1234}\x{4321}yb
|
||||
0: a\x{1234}\x{4321}yb
|
||||
1: \x{1234}\x{4321}y
|
||||
a\x{1234}\x{4321}\x{3412}b
|
||||
0: a\x{1234}\x{4321}\x{3412}b
|
||||
1: \x{1234}\x{4321}\x{3412}
|
||||
axxxxbcdefghijb
|
||||
0: axxxxb
|
||||
1: xxxx
|
||||
a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
0: a\x{1234}\x{4321}\x{3412}\x{3421}b
|
||||
1: \x{1234}\x{4321}\x{3412}\x{3421}
|
||||
axbxxbcdefghijb
|
||||
0: axbxxb
|
||||
1: xbxx
|
||||
axxxxxbcdefghijb
|
||||
0: axxxxxb
|
||||
1: xxxxx
|
||||
*** Failers
|
||||
No match
|
||||
a\x{1234}b
|
||||
No match
|
||||
axxxxxxbcdefghijb
|
||||
No match
|
||||
|
||||
/^[a\x{c0}]/8
|
||||
*** Failers
|
||||
No match
|
||||
\x{100}
|
||||
No match
|
||||
|
||||
/(?<=aXb)cd/8
|
||||
aXbcd
|
||||
0: cd
|
||||
|
||||
/(?<=a\x{100}b)cd/8
|
||||
a\x{100}bcd
|
||||
0: cd
|
||||
|
||||
/(?<=a\x{100000}b)cd/8
|
||||
a\x{100000}bcd
|
||||
0: cd
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8
|
||||
------------------------------------------------------------------
|
||||
0 12 Bra 0
|
||||
3 7 A\xe2\x89\xa2\xce\x91.
|
||||
12 12 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 'A'
|
||||
Need char = '.'
|
||||
\x{0041}\x{2262}\x{0391}\x{002e}
|
||||
0: A\x{2262}\x{391}.
|
||||
|
||||
/(?:\x{100}){3}b/8
|
||||
\x{100}\x{100}\x{100}b
|
||||
0: \x{100}\x{100}\x{100}b
|
||||
*** Failers
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 9 \xed\x95\x9c\xea\xb5\xad\xec\x96\xb4
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 237
|
||||
Need char = 180
|
||||
\x{D55c}\x{ad6d}\x{C5B4}
|
||||
0: \x{d55c}\x{ad6d}\x{c5b4}
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 9 \xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 230
|
||||
Need char = 158
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
0: \x{65e5}\x{672c}\x{8a9e}
|
||||
|
||||
/\x{80}/D8
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc2\x80
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 194
|
||||
Need char = 128
|
||||
|
||||
/\x{084}/D8
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc2\x84
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 194
|
||||
Need char = 132
|
||||
|
||||
/\x{104}/D8
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
3 2 \xc4\x84
|
||||
7 7 Ket
|
||||
10 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 196
|
||||
Need char = 132
|
||||
|
||||
/\x{861}/D8
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
3 3 \xe0\xa1\xa1
|
||||
8 8 Ket
|
||||
11 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 224
|
||||
Need char = 161
|
||||
|
||||
/\x{212ab}/D8
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra 0
|
||||
3 4 \xf0\xa1\x8a\xab
|
||||
9 9 Ket
|
||||
12 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
First char = 240
|
||||
Need char = 171
|
||||
|
||||
/.{3,5}X/D8
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
3 Any{3}
|
||||
7 Any{0,2}
|
||||
11 1 X
|
||||
14 14 Ket
|
||||
17 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
No first char
|
||||
Need char = 'X'
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}X
|
||||
0: \x{212ab}\x{212ab}\x{212ab}\x{861}X
|
||||
|
||||
|
||||
/.{3,5}?/D8
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
3 Any{3}
|
||||
7 Any{0,2}?
|
||||
11 11 Ket
|
||||
14 End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Options: utf8
|
||||
No first char
|
||||
No need char
|
||||
\x{212ab}\x{212ab}\x{212ab}\x{861}
|
||||
0: \x{212ab}\x{212ab}\x{212ab}
|
||||
|
||||
/-- These tests are here rather than in testinput4 because Perl 5.6 has --/
|
||||
/-- some problems with UTF-8 support, in the area of \x{..} where the --/
|
||||
No match
|
||||
\x{100}\x{100}b
|
||||
/-- value is < 255. It grumbles about invalid UTF-8 strings. --/
|
||||
No match
|
||||
|
||||
/^[a\x{c0}]b/8
|
||||
\x{c0}b
|
||||
0: \x{c0}b
|
||||
|
||||
/^([a\x{c0}]*?)aa/8
|
||||
a\x{c0}aaaa/
|
||||
0: a\x{c0}aa
|
||||
1: a\x{c0}
|
||||
|
||||
/^([a\x{c0}]*?)aa/8
|
||||
a\x{c0}aaaa/
|
||||
0: a\x{c0}aa
|
||||
1: a\x{c0}
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
0: a\x{c0}a\x{c0}aa
|
||||
1: a\x{c0}a\x{c0}
|
||||
|
||||
/^([a\x{c0}]*)aa/8
|
||||
a\x{c0}aaaa/
|
||||
0: a\x{c0}aaaa
|
||||
1: a\x{c0}aa
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
0: a\x{c0}a\x{c0}aaa
|
||||
1: a\x{c0}a\x{c0}a
|
||||
|
||||
/^([a\x{c0}]*)a\x{c0}/8
|
||||
a\x{c0}aaaa/
|
||||
0: a\x{c0}
|
||||
1:
|
||||
a\x{c0}a\x{c0}aaa/
|
||||
0: a\x{c0}a\x{c0}
|
||||
1: a\x{c0}
|
||||
|
||||
/-- --/
|
||||
|
||||
/(?<=\C)X/8
|
||||
Failed: \C not allowed in lookbehind assertion at offset 6
|
||||
|
||||
/-- This one is here not because it's different to Perl, but because the --/
|
||||
/-- way the captured single-byte is displayed. (In Perl it becomes a --/
|
||||
No match
|
||||
/-- character, and you can't tell the difference.) --/
|
||||
No match
|
||||
|
||||
/X(\C)(.*)/8
|
||||
X\x{1234}
|
||||
0: X\x{1234}
|
||||
1: \xe1
|
||||
2: \x88\xb4
|
||||
X\nabc
|
||||
0: X\x{0a}abc
|
||||
1: \x{0a}
|
||||
2: abc
|
||||
|
||||
/ End of testinput5 /
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user