Upgrade PCRE lib to 6.7

2025-01-22 03:34:19 +08:00 · 2006-08-30 20:00:23 +00:00 · 2006-08-30 20:00:23 +00:00 · 45debc52ef
commit 45debc52ef
parent 307b3bcbb4
39 changed files with 3707 additions and 1281 deletions
--- a/ext/pcre/config.w32
+++ b/ext/pcre/config.w32
@ -5,7 +5,7 @@ ARG_WITH("pcre-regex", "Perl Compatible Regular Expressions", "yes");

 if (PHP_PCRE_REGEX == "yes") {
 	EXTENSION("pcre", "php_pcre.c",	PHP_PCRE_REGEX_SHARED,
-		"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DNO_RECURSE -Iext/pcre/pcrelib");
+		"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DNO_RECURSE -Iext/pcre/pcrelib");
 	ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
 	ADD_DEF_FILE("ext\\pcre\\php_pcre.def");

--- a/ext/pcre/config0.m4
+++ b/ext/pcre/config0.m4
@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,

 if test "$PHP_PCRE_REGEX" != "no"; then
  if test "$PHP_PCRE_REGEX" = "yes"; then
-    PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -I@ext_srcdir@/pcrelib)
+    PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -I@ext_srcdir@/pcrelib)
    PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
    PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
    AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
@ -51,7 +51,7 @@ if test "$PHP_PCRE_REGEX" != "no"; then
    
    AC_DEFINE(HAVE_PCRE, 1, [ ])
    PHP_ADD_INCLUDE($PCRE_INCDIR)
-    PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000)
+    PHP_NEW_EXTENSION(pcre, php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000)
  fi
  PHP_SUBST(PCRE_SHARED_LIBADD)
 fi
--- a/ext/pcre/pcrelib/AUTHORS
+++ b/ext/pcre/pcrelib/AUTHORS
@ -8,7 +8,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.

-Copyright (c) 1997-2005 University of Cambridge
+Copyright (c) 1997-2006 University of Cambridge
 All rights reserved


@ -17,7 +17,7 @@ THE C++ WRAPPER LIBRARY

 Written by:       Google Inc.

-Copyright (c) 2005 Google Inc
+Copyright (c) 2006 Google Inc
 All rights reserved

 ####
--- a/ext/pcre/pcrelib/COPYING
+++ b/ext/pcre/pcrelib/COPYING
@ -22,7 +22,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England. Phone: +44 1223 334714.

-Copyright (c) 1997-2005 University of Cambridge
+Copyright (c) 1997-2006 University of Cambridge
 All rights reserved.


@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS

 Contributed by:   Google Inc.

-Copyright (c) 2005, Google Inc.
+Copyright (c) 2006, Google Inc.
 All rights reserved.


--- a/ext/pcre/pcrelib/ChangeLog
+++ b/ext/pcre/pcrelib/ChangeLog
@ -1,6 +1,175 @@
 ChangeLog for PCRE
 ------------------

+Version 6.7 04-Jul-06
+---------------------
+
+ 1. In order to handle tests when input lines are enormously long, pcretest has
+    been re-factored so that it automatically extends its buffers when
+    necessary. The code is crude, but this _is_ just a test program. The
+    default size has been increased from 32K to 50K.
+
+ 2. The code in pcre_study() was using the value of the re argument before
+    testing it for NULL. (Of course, in any sensible call of the function, it
+    won't be NULL.)
+
+ 3. The memmove() emulation function in pcre_internal.h, which is used on
+    systems that lack both memmove() and bcopy() - that is, hardly ever -
+    was missing a "static" storage class specifier.
+
+ 4. When UTF-8 mode was not set, PCRE looped when compiling certain patterns
+    containing an extended class (one that cannot be represented by a bitmap
+    because it contains high-valued characters or Unicode property items, e.g.
+    [\pZ]). Almost always one would set UTF-8 mode when processing such a
+    pattern, but PCRE should not loop if you do not (it no longer does).
+    [Detail: two cases were found: (a) a repeated subpattern containing an
+    extended class; (b) a recursive reference to a subpattern that followed a
+    previous extended class. It wasn't skipping over the extended class
+    correctly when UTF-8 mode was not set.]
+
+ 5. A negated single-character class was not being recognized as fixed-length
+    in lookbehind assertions such as (?<=[^f]), leading to an incorrect
+    compile error "lookbehind assertion is not fixed length".
+
+ 6. The RunPerlTest auxiliary script was showing an unexpected difference
+    between PCRE and Perl for UTF-8 tests. It turns out that it is hard to
+    write a Perl script that can interpret lines of an input file either as
+    byte characters or as UTF-8, which is what "perltest" was being required to
+    do for the non-UTF-8 and UTF-8 tests, respectively. Essentially what you
+    can't do is switch easily at run time between having the "use utf8;" pragma
+    or not. In the end, I fudged it by using the RunPerlTest script to insert
+    "use utf8;" explicitly for the UTF-8 tests.
+
+ 7. In multiline (/m) mode, PCRE was matching ^ after a terminating newline at
+    the end of the subject string, contrary to the documentation and to what
+    Perl does. This was true of both matching functions. Now it matches only at
+    the start of the subject and immediately after *internal* newlines.
+
+ 8. A call of pcre_fullinfo() from pcretest to get the option bits was passing
+    a pointer to an int instead of a pointer to an unsigned long int. This
+    caused problems on 64-bit systems.
+
+ 9. Applied a patch from the folks at Google to pcrecpp.cc, to fix "another
+    instance of the 'standard' template library not being so standard".
+
+10. There was no check on the number of named subpatterns nor the maximum
+    length of a subpattern name. The product of these values is used to compute
+    the size of the memory block for a compiled pattern. By supplying a very
+    long subpattern name and a large number of named subpatterns, the size
+    computation could be caused to overflow. This is now prevented by limiting
+    the length of names to 32 characters, and the number of named subpatterns
+    to 10,000.
+
+11. Subpatterns that are repeated with specific counts have to be replicated in
+    the compiled pattern. The size of memory for this was computed from the
+    length of the subpattern and the repeat count. The latter is limited to
+    65535, but there was no limit on the former, meaning that integer overflow
+    could in principle occur. The compiled length of a repeated subpattern is
+    now limited to 30,000 bytes in order to prevent this.
+
+12. Added the optional facility to have named substrings with the same name.
+
+13. Added the ability to use a named substring as a condition, using the
+    Python syntax: (?(name)yes|no). This overloads (?(R)... and names that
+    are numbers (not recommended). Forward references are permitted.
+
+14. Added forward references in named backreferences (if you see what I mean).
+
+15. In UTF-8 mode, with the PCRE_DOTALL option set, a quantified dot in the
+    pattern could run off the end of the subject. For example, the pattern
+    "(?s)(.{1,5})"8 did this with the subject "ab".
+
+16. If PCRE_DOTALL or PCRE_MULTILINE were set, pcre_dfa_exec() behaved as if
+    PCRE_CASELESS was set when matching characters that were quantified with ?
+    or *.
+
+17. A character class other than a single negated character that had a minimum
+    but no maximum quantifier - for example [ab]{6,} - was not handled
+    correctly by pce_dfa_exec(). It would match only one character.
+
+18. A valid (though odd) pattern that looked like a POSIX character
+    class but used an invalid character after [ (for example [[,abc,]]) caused
+    pcre_compile() to give the error "Failed: internal error: code overflow" or
+    in some cases to crash with a glibc free() error. This could even happen if
+    the pattern terminated after [[ but there just happened to be a sequence of
+    letters, a binary zero, and a closing ] in the memory that followed.
+
+19. Perl's treatment of octal escapes in the range \400 to \777 has changed
+    over the years. Originally (before any Unicode support), just the bottom 8
+    bits were taken. Thus, for example, \500 really meant \100. Nowadays the
+    output from "man perlunicode" includes this:
+
+      The regular expression compiler produces polymorphic opcodes.  That
+      is, the pattern adapts to the data and automatically switches to
+      the Unicode character scheme when presented with Unicode data--or
+      instead uses a traditional byte scheme when presented with byte
+      data.
+
+    Sadly, a wide octal escape does not cause a switch, and in a string with
+    no other multibyte characters, these octal escapes are treated as before.
+    Thus, in Perl, the pattern  /\500/ actually matches \100 but the pattern
+    /\500|\x{1ff}/ matches \500 or \777 because the whole thing is treated as a
+    Unicode string.
+
+    I have not perpetrated such confusion in PCRE. Up till now, it took just
+    the bottom 8 bits, as in old Perl. I have now made octal escapes with
+    values greater than \377 illegal in non-UTF-8 mode. In UTF-8 mode they
+    translate to the appropriate multibyte character.
+
+29. Applied some refactoring to reduce the number of warnings from Microsoft
+    and Borland compilers. This has included removing the fudge introduced
+    seven years ago for the OS/2 compiler (see 2.02/2 below) because it caused
+    a warning about an unused variable.
+
+21. PCRE has not included VT (character 0x0b) in the set of whitespace
+    characters since release 4.0, because Perl (from release 5.004) does not.
+    [Or at least, is documented not to: some releases seem to be in conflict
+    with the documentation.] However, when a pattern was studied with
+    pcre_study() and all its branches started with \s, PCRE still included VT
+    as a possible starting character. Of course, this did no harm; it just
+    caused an unnecessary match attempt.
+
+22. Removed a now-redundant internal flag bit that recorded the fact that case
+    dependency changed within the pattern. This was once needed for "required
+    byte" processing, but is no longer used. This recovers a now-scarce options
+    bit. Also moved the least significant internal flag bit to the most-
+    significant bit of the word, which was not previously used (hangover from
+    the days when it was an int rather than a uint) to free up another bit for
+    the future.
+
+23. Added support for CRLF line endings as well as CR and LF. As well as the
+    default being selectable at build time, it can now be changed at runtime
+    via the PCRE_NEWLINE_xxx flags. There are now options for pcregrep to
+    specify that it is scanning data with non-default line endings.
+
+24. Changed the definition of CXXLINK to make it agree with the definition of
+    LINK in the Makefile, by replacing LDFLAGS to CXXFLAGS.
+
+25. Applied Ian Taylor's patches to avoid using another stack frame for tail
+    recursions. This makes a big different to stack usage for some patterns.
+
+26. If a subpattern containing a named recursion or subroutine reference such
+    as (?P>B) was quantified, for example (xxx(?P>B)){3}, the calculation of
+    the space required for the compiled pattern went wrong and gave too small a
+    value. Depending on the environment, this could lead to "Failed: internal
+    error: code overflow at offset 49" or "glibc detected double free or
+    corruption" errors.
+
+27. Applied patches from Google (a) to support the new newline modes and (b) to
+    advance over multibyte UTF-8 characters in GlobalReplace.
+
+28. Change free() to pcre_free() in pcredemo.c. Apparently this makes a
+    difference for some implementation of PCRE in some Windows version.
+
+29. Added some extra testing facilities to pcretest:
+
+    \q<number>   in a data line sets the "match limit" value
+    \Q<number>   in a data line sets the "match recursion limt" value
+    -S <number>  sets the stack size, where <number> is in megabytes
+
+    The -S option isn't available for Windows.
+
+
 Version 6.6 06-Feb-06
 ---------------------

--- a/ext/pcre/pcrelib/LICENCE
+++ b/ext/pcre/pcrelib/LICENCE
@ -31,7 +31,7 @@ THE C++ WRAPPER FUNCTIONS

 Contributed by:   Google Inc.

-Copyright (c) 2005, Google Inc.
+Copyright (c) 2006, Google Inc.
 All rights reserved.


--- a/ext/pcre/pcrelib/NEWS
+++ b/ext/pcre/pcrelib/NEWS
@ -1,6 +1,17 @@
 News about PCRE releases
 ------------------------

+Release 6.7 04-Jul-06
+---------------------
+
+The main additions to this release are the ability to use the same name for
+multiple sets of parentheses, and support for CRLF line endings in both the
+library and pcregrep (and in pcretest for testing).
+
+Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
+significantly reduced for certain subject strings.
+
+
 Release 6.5 01-Feb-06
 ---------------------

--- a/ext/pcre/pcrelib/README
+++ b/ext/pcre/pcrelib/README
@ -34,7 +34,7 @@ Documentation for PCRE
 ----------------------

 If you install PCRE in the normal way, you will end up with an installed set of
-man pages whose names all start with "pcre". The one that is called "pcre"
+man pages whose names all start with "pcre". The one that is just called "pcre"
 lists all the others. In addition to these man pages, the PCRE documentation is
 supplied in two other forms; however, as there is no standard place to install
 them, they are left in the doc directory of the unpacked source distribution.
@ -114,15 +114,17 @@ library. You can read more about them in the pcrebuild man page.
 . If, in addition to support for UTF-8 character strings, you want to include
  support for the \P, \p, and \X sequences that recognize Unicode character
  properties, you must add --enable-unicode-properties to the "configure"
-  command. This adds about 90K to the size of the library (in the form of a
+  command. This adds about 30K to the size of the library (in the form of a
  property table); only the basic two-letter properties such as Lu are
  supported.

-. You can build PCRE to recognize either CR or LF as the newline character,
-  instead of whatever your compiler uses for "\n", by adding --newline-is-cr or
-  --newline-is-lf to the "configure" command, respectively. Only do this if you
-  really understand what you are doing. On traditional Unix-like systems, the
-  newline character is LF.
+. You can build PCRE to recognize either CR or LF or the sequence CRLF as
+  indicating the end of a line. Whatever you specify at build time is the
+  default; the caller of PCRE can change the selection at run time. The default
+  newline indicator is a single LF character (the Unix standard). You can
+  specify the default newline indicator by adding --newline-is-cr or
+  --newline-is-lf or --newline-is-crlf to the "configure" command,
+  respectively.

 . When called via the POSIX interface, PCRE uses malloc() to get additional
  storage for processing capturing parentheses if there are more than 10 of
@ -142,6 +144,16 @@ library. You can read more about them in the pcrebuild man page.
  pcre_exec() can supply their own value. There is discussion on the pcreapi
  man page.

+. There is a separate counter that limits the depth of recursive function calls
+  during a matching process. This also has a default of ten million, which is
+  essentially "unlimited". You can change the default by setting, for example,
+
+  --with-match-limit-recursion=500000
+
+  Recursive function calls use up the runtime stack; running out of stack can
+  cause programs to crash in strange ways. There is a discussion about stack
+  sizes in the pcrestack man page.
+
 . The default maximum compiled pattern size is around 64K. You can increase
  this by adding --with-link-size=3 to the "configure" command. You can
  increase it even more by setting --with-link-size=4, but this is unlikely
@ -165,7 +177,6 @@ library. You can read more about them in the pcrebuild man page.

 The "configure" script builds eight files for the basic C library:

-. pcre.h is the header file for C programs that call PCRE
 . Makefile is the makefile that builds the library
 . config.h contains build-time configuration options for the library
 . pcre-config is a script that shows the settings of "configure" options
@ -432,25 +443,24 @@ The distribution should contain the following files:
  pcre_info.c           )
  pcre_maketables.c     )
  pcre_ord2utf8.c       )
-  pcre_printint.c       )
+  pcre_refcount.c       )
  pcre_study.c          )
  pcre_tables.c         )
  pcre_try_flipped.c    )
-  pcre_ucp_findchar.c   )
+  pcre_ucp_searchfuncs.c)
  pcre_valid_utf8.c     )
  pcre_version.c        )
  pcre_xclass.c         )
-
-  ucp_findchar.c        )
-  ucp.h                 ) source for the code that is used for
-  ucpinternal.h         )   Unicode property handling
  ucptable.c            )
-  ucptypetable.c        )

-  pcre.in               "source" for the header for the external API; pcre.h
-                          is built from this by "configure"
+  pcre_printint.src     ) debugging function that is #included in pcretest, and
+                        )   can also be #included in pcre_compile()
+
+  pcre.h                the public PCRE header file
  pcreposix.h           header for the external POSIX wrapper API
  pcre_internal.h       header for internal use
+  ucp.h                 ) headers concerned with
+  ucpinternal.h         )   Unicode property handling
  config.in             template for config.h, which is built by configure

  pcrecpp.h             the header file for the C++ wrapper
@ -477,8 +487,9 @@ The distribution should contain the following files:
  RunGrepTest.in        template for a Unix shell script for pcregrep tests
  config.guess          ) files used by libtool,
  config.sub            )   used only when building a shared library
+  config.h.in           "source" for the config.h header file
  configure             a configuring shell script (built by autoconf)
-  configure.in          the autoconf input used to build configure
+  configure.ac          the autoconf input used to build configure
  doc/Tech.Notes        notes on the encoding
  doc/*.3               man page sources for the PCRE functions
  doc/*.1               man page sources for pcregrep and pcretest
@ -506,7 +517,6 @@ The distribution should contain the following files:

  libpcre.def
  libpcreposix.def
-  pcre.def

 (D) Auxiliary file for VPASCAL

@ -515,4 +525,4 @@ The distribution should contain the following files:
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-January 2006
+June 2006
--- a/ext/pcre/pcrelib/doc/Tech.Notes
+++ b/ext/pcre/pcrelib/doc/Tech.Notes
@ -1,6 +1,9 @@
 Technical Notes about PCRE
 --------------------------

+These are very rough technical notes that record potentially useful information 
+about PCRE internals.
+
 Historical note 1
 -----------------

@ -21,13 +24,14 @@ the pattern, as is expected in Unix and Perl-style regular expressions.
 Historical note 2
 -----------------

-By contrast, the code originally written by Henry Spencer and subsequently
-heavily modified for Perl actually compiles the expression twice: once in a
-dummy mode in order to find out how much store will be needed, and then for
-real. The execution function operates by backtracking and maximizing (or,
-optionally, minimizing in Perl) the amount of the subject that matches
-individual wild portions of the pattern. This is an "NFA algorithm" in Friedl's
-terminology.
+By contrast, the code originally written by Henry Spencer (which was
+subsequently heavily modified for Perl) compiles the expression twice: once in
+a dummy mode in order to find out how much store will be needed, and then for
+real. (The Perl version probably doesn't do this any more; I'm talking about
+the original library.) The execution function operates by backtracking and
+maximizing (or, optionally, minimizing in Perl) the amount of the subject that
+matches individual wild portions of the pattern. This is an "NFA algorithm" in
+Friedl's terminology.

 OK, here's the real stuff
 -------------------------
@ -43,7 +47,7 @@ then a second pass to do the real compile - which may use a bit less than the
 predicted amount of store. The idea is that this is going to turn out faster
 because the first pass is degenerate and the second pass can just store stuff
 straight into the vector, which it knows is big enough. It does make the
-compiling functions bigger, of course, but they have got quite big anyway to
+compiling functions bigger, of course, but they have become quite big anyway to
 handle all the Perl stuff.

 Traditional matching function
@ -63,7 +67,7 @@ pcre_dfa_exec(). This implements a DFA matching algorithm that searches
 simultaneously for all possible matches that start at one point in the subject 
 string. (Going back to my roots: see Historical Note 1 above.) This function 
 intreprets the same compiled pattern data as pcre_exec(); however, not all the 
-facilities are available, and those that are don't always work in quite the 
+facilities are available, and those that are do not always work in quite the 
 same way. See the user documentation for details.

 Format of compiled patterns
@ -157,10 +161,12 @@ Match by Unicode property

 OP_PROP and OP_NOTPROP are used for positive and negative matches of a 
 character by testing its Unicode property (the \p and \P escape sequences).
-Each is followed by a single byte that encodes the desired property value.
+Each is followed by two bytes that encode the desired property as a type and a 
+value.

-Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by two 
-bytes: OP_PROP or OP_NOTPROP and then the desired property value.
+Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by 
+three bytes: OP_PROP or OP_NOTPROP and then the desired property type and 
+value.


 Matching literal characters
@ -339,4 +345,4 @@ at compile time, and so does not cause anything to be put into the compiled
 data.

 Philip Hazel
-January 2006
+June 2006
--- a/ext/pcre/pcrelib/doc/pcre.txt
+++ b/ext/pcre/pcrelib/doc/pcre.txt
--- a/ext/pcre/pcrelib/pcre.h
+++ b/ext/pcre/pcrelib/pcre.h
@ -55,9 +55,9 @@ cannot run ./configure. As it now stands, this file need not be edited in that
 circumstance. */

 #define PCRE_MAJOR          6
-#define PCRE_MINOR          6
+#define PCRE_MINOR          7
 #define PCRE_PRERELEASE
-#define PCRE_DATE           06-Feb-2006
+#define PCRE_DATE           04-Jul-2006

 /* Win32 uses DLL by default; it needs special stuff for exported functions
 when building PCRE. */
@ -116,6 +116,10 @@ extern "C" {
 #define PCRE_DFA_SHORTEST       0x00010000
 #define PCRE_DFA_RESTART        0x00020000
 #define PCRE_FIRSTLINE          0x00040000
+#define PCRE_DUPNAMES           0x00080000
+#define PCRE_NEWLINE_CR         0x00100000
+#define PCRE_NEWLINE_LF         0x00200000
+#define PCRE_NEWLINE_CRLF       0x00300000

 /* Exec-time and get/set-time error codes */

@ -269,6 +273,8 @@ PCRE_DATA_SCOPE int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
 PCRE_DATA_SCOPE int  pcre_get_named_substring(const pcre *, const char *,
                  int *, int, const char *, const char **);
 PCRE_DATA_SCOPE int  pcre_get_stringnumber(const pcre *, const char *);
+PCRE_DATA_SCOPE int  pcre_get_stringtable_entries(const pcre *, const char *,
+                  char **, char **);
 PCRE_DATA_SCOPE int  pcre_get_substring(const char *, int *, int, int,
                  const char **);
 PCRE_DATA_SCOPE int  pcre_get_substring_list(const char *, int *, int,
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@ -42,6 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 supporting internal functions that are not used by other modules. */


+#define NLBLOCK cd            /* The block containing newline information */
 #include "pcre_internal.h"


@ -190,7 +191,7 @@ static const char *error_texts[] = {
  "unrecognized character after (?<",
  /* 25 */
  "lookbehind assertion is not fixed length",
-  "malformed number after (?(",
+  "malformed number or name after (?(",
  "conditional group contains more than two branches",
  "assertion expected after (?(",
  "(?R or (?digits must be followed by )",
@ -210,12 +211,17 @@ static const char *error_texts[] = {
  "recursive call could loop indefinitely",
  "unrecognized character after (?P",
  "syntax error after (?P",
-  "two named groups have the same name",
+  "two named subpatterns have the same name",
  "invalid UTF-8 string",
  /* 45 */
  "support for \\P, \\p, and \\X has not been compiled",
  "malformed \\P or \\p sequence",
-  "unknown property name after \\P or \\p"
+  "unknown property name after \\P or \\p",
+  "subpattern name is too long (maximum 32 characters)",
+  "too many named subpatterns (maximum 10,000)",
+  /* 50 */
+  "repeated subpattern is too long",
+  "octal value is greater than \\377 (not in UTF-8 mode)"
 };


@ -460,13 +466,16 @@ else
      }

    /* \0 always starts an octal number, but we may drop through to here with a
-    larger first octal digit. */
+    larger first octal digit. The original code used just to take the least
+    significant 8 bits of octal numbers (I think this is what early Perls used
+    to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
+    than 3 octal digits. */

    case '0':
    c -= '0';
    while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
        c = c * 8 + *(++ptr) - '0';
-    c &= 255;     /* Take least significant 8 bits */
+    if (!utf8 && c > 255) *errorcodeptr = ERR51;
    break;

    /* \x is complicated. \x{ddd} is a character number which can be greater
@ -762,6 +771,48 @@ return p;



+/*************************************************
+*     Find forward referenced named subpattern   *
+*************************************************/
+
+/* This function scans along a pattern looking for capturing subpatterns, and
+counting them. If it finds a named pattern that matches the name it is given,
+it returns its number. This is used for forward references to named
+subpatterns. We know that if (?P< is encountered, the name will be terminated
+by '>' because that is checked in the first pass.
+
+Arguments:
+  pointer      current position in the pattern
+  count        current count of capturing parens
+  name         name to seek
+  namelen      name length
+
+Returns:       the number of the named subpattern, or -1 if not found
+*/
+
+static int
+find_named_parens(const uschar *ptr, int count, const uschar *name, int namelen)
+{
+const uschar *thisname;
+for (; *ptr != 0; ptr++)
+  {
+  if (*ptr == '\\' && ptr[1] != 0) { ptr++; continue; }
+  if (*ptr != '(') continue;
+  if (ptr[1] != '?') { count++; continue; }
+  if (ptr[2] == '(') { ptr += 2; continue; }
+  if (ptr[2] != 'P' || ptr[3] != '<') continue;
+  count++;
+  ptr += 4;
+  thisname = ptr;
+  while (*ptr != '>') ptr++;
+  if (namelen == ptr - thisname && strncmp(name, thisname, namelen) == 0)
+    return count;
+  }
+return -1;
+}
+
+
+
 /*************************************************
 *      Find first significant op code            *
 *************************************************/
@ -917,6 +968,7 @@ for (;;)

    case OP_CHAR:
    case OP_CHARNC:
+    case OP_NOT:
    branchlength++;
    cc += 2;
 #ifdef SUPPORT_UTF8
@ -1031,14 +1083,19 @@ Returns:      pointer to the opcode for the bracket, or NULL if not found
 static const uschar *
 find_bracket(const uschar *code, BOOL utf8, int number)
 {
-#ifndef SUPPORT_UTF8
-utf8 = utf8;               /* Stop pedantic compilers complaining */
-#endif
-
 for (;;)
  {
  register int c = *code;
  if (c == OP_END) return NULL;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit
+  map. This includes negated single high-valued characters. The length in
+  the table is zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+
+  /* Handle bracketed group */
+
  else if (c > OP_BRA)
    {
    int n = c - OP_BRA;
@ -1046,17 +1103,16 @@ for (;;)
    if (n == number) return (uschar *)code;
    code += _pcre_OP_lengths[OP_BRA];
    }
+
+  /* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
+  that are followed by a character may be followed by a multi-byte character.
+  The length in the table is a minimum, so we have to scan along to skip the
+  extra bytes. All opcodes are less than 128, so we can use relatively
+  efficient code. */
+
  else
    {
    code += _pcre_OP_lengths[c];
-
-#ifdef SUPPORT_UTF8
-
-    /* In UTF-8 mode, opcodes that are followed by a character may be followed
-    by a multi-byte character. The length in the table is a minimum, so we have
-    to scan along to skip the extra bytes. All opcodes are less than 128, so we
-    can use relatively efficient code. */
-
    if (utf8) switch(c)
      {
      case OP_CHAR:
@ -1072,16 +1128,7 @@ for (;;)
      case OP_MINQUERY:
      while ((*code & 0xc0) == 0x80) code++;
      break;
-
-      /* XCLASS is used for classes that cannot be represented just by a bit
-      map. This includes negated single high-valued characters. The length in
-      the table is zero; the actual length is stored in the compiled code. */
-
-      case OP_XCLASS:
-      code += GET(code, 1) + 1;
-      break;
      }
-#endif
    }
  }
 }
@ -1105,30 +1152,34 @@ Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
 static const uschar *
 find_recurse(const uschar *code, BOOL utf8)
 {
-#ifndef SUPPORT_UTF8
-utf8 = utf8;               /* Stop pedantic compilers complaining */
-#endif
-
 for (;;)
  {
  register int c = *code;
  if (c == OP_END) return NULL;
-  else if (c == OP_RECURSE) return code;
+  if (c == OP_RECURSE) return code;
+
+  /* XCLASS is used for classes that cannot be represented just by a bit
+  map. This includes negated single high-valued characters. The length in
+  the table is zero; the actual length is stored in the compiled code. */
+
+  if (c == OP_XCLASS) code += GET(code, 1);
+
+  /* All bracketed groups have the same length. */
+
  else if (c > OP_BRA)
    {
    code += _pcre_OP_lengths[OP_BRA];
    }
+
+  /* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
+  that are followed by a character may be followed by a multi-byte character.
+  The length in the table is a minimum, so we have to scan along to skip the
+  extra bytes. All opcodes are less than 128, so we can use relatively
+  efficient code. */
+
  else
    {
    code += _pcre_OP_lengths[c];
-
-#ifdef SUPPORT_UTF8
-
-    /* In UTF-8 mode, opcodes that are followed by a character may be followed
-    by a multi-byte character. The length in the table is a minimum, so we have
-    to scan along to skip the extra bytes. All opcodes are less than 128, so we
-    can use relatively efficient code. */
-
    if (utf8) switch(c)
      {
      case OP_CHAR:
@ -1144,16 +1195,7 @@ for (;;)
      case OP_MINQUERY:
      while ((*code & 0xc0) == 0x80) code++;
      break;
-
-      /* XCLASS is used for classes that cannot be represented just by a bit
-      map. This includes negated single high-valued characters. The length in
-      the table is zero; the actual length is stored in the compiled code. */
-
-      case OP_XCLASS:
-      code += GET(code, 1) + 1;
-      break;
      }
-#endif
    }
  }
 }
@ -1569,7 +1611,6 @@ int greedy_default, greedy_non_default;
 int firstbyte, reqbyte;
 int zeroreqbyte, zerofirstbyte;
 int req_caseopt, reqvary, tempreqvary;
-int condcount = 0;
 int options = *optionsptr;
 int after_manual_callout = 0;
 register int c;
@ -1683,10 +1724,14 @@ for (;; ptr++)
    if ((cd->ctypes[c] & ctype_space) != 0) continue;
    if (c == '#')
      {
-      /* The space before the ; is to avoid a warning on a silly compiler
-      on the Macintosh. */
-      while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
-      if (c != 0) continue;   /* Else fall through to handle end of string */
+      while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) break;
+      if (*ptr != 0)
+        {
+        ptr += cd->nllen - 1;
+        continue;
+        }
+      /* Else fall through to handle end of string */
+      c = 0;
      }
    }

@ -2851,37 +2896,91 @@ for (;; ptr++)
        case '(':
        bravalue = OP_COND;       /* Conditional group */

-        /* Condition to test for recursion */
+        /* A condition can be a number, referring to a numbered group, a name,
+        referring to a named group, 'R', referring to recursion, or an
+        assertion. There are two unfortunate ambiguities, caused by history.
+        (a) 'R' can be the recursive thing or the name 'R', and (b) a number
+        could be a name that consists of digits. In both cases, we look for a
+        name first; if not found, we try the other cases. If the first
+        character after (?( is a word character, we know the rest up to ) will
+        also be word characters because the syntax was checked in the first
+        pass. */

-        if (ptr[1] == 'R')
+        if ((cd->ctypes[ptr[1]] & ctype_word) != 0)
          {
-          code[1+LINK_SIZE] = OP_CREF;
-          PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
+          int i, namelen;
+          int condref = 0;
+          const uschar *name;
+          uschar *slot = cd->name_table;
+
+          /* This is needed for all successful cases. */
+
          skipbytes = 3;
-          ptr += 3;
-          }

-        /* Condition to test for a numbered subpattern match. We know that
-        if a digit follows ( then there will just be digits until ) because
-        the syntax was checked in the first pass. */
+          /* Read the name, but also get it as a number if it's all digits */

-        else if ((digitab[ptr[1]] && ctype_digit) != 0)
-          {
-          int condref;                 /* Don't amalgamate; some compilers */
-          condref = *(++ptr) - '0';    /* grumble at autoincrement in declaration */
-          while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
-          if (condref == 0)
+          name = ++ptr;
+          while (*ptr != ')')
            {
-            *errorcodeptr = ERR35;
+            if (condref >= 0)
+              condref = ((digitab[*ptr] & ctype_digit) != 0)?
+                condref * 10 + *ptr - '0' : -1;
+            ptr++;
+            }
+          namelen = ptr - name;
+          ptr++;
+
+          for (i = 0; i < cd->names_found; i++)
+            {
+            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
+            slot += cd->name_entry_size;
+            }
+
+          /* Found a previous named subpattern */
+
+          if (i < cd->names_found)
+            {
+            condref = GET2(slot, 0);
+            code[1+LINK_SIZE] = OP_CREF;
+            PUT2(code, 2+LINK_SIZE, condref);
+            }
+
+          /* Search the pattern for a forward reference */
+
+          else if ((i = find_named_parens(ptr, *brackets, name, namelen)) > 0)
+            {
+            code[1+LINK_SIZE] = OP_CREF;
+            PUT2(code, 2+LINK_SIZE, i);
+            }
+
+          /* Check for 'R' for recursion */
+
+          else if (namelen == 1 && *name == 'R')
+            {
+            code[1+LINK_SIZE] = OP_CREF;
+            PUT2(code, 2+LINK_SIZE, CREF_RECURSE);
+            }
+
+          /* Check for a subpattern number */
+
+          else if (condref > 0)
+            {
+            code[1+LINK_SIZE] = OP_CREF;
+            PUT2(code, 2+LINK_SIZE, condref);
+            }
+
+          /* Either an unidentified subpattern, or a reference to (?(0) */
+
+          else
+            {
+            *errorcodeptr = (condref == 0)? ERR35: ERR15;
            goto FAILED;
            }
-          ptr++;
-          code[1+LINK_SIZE] = OP_CREF;
-          PUT2(code, 2+LINK_SIZE, condref);
-          skipbytes = 3;
          }
+
        /* For conditions that are assertions, we just fall through, having
        set bravalue above. */
+
        break;

        case '=':                 /* Positive lookahead */
@ -2953,10 +3052,13 @@ for (;; ptr++)
              {
              if (slot[2+namelen] == 0)
                {
-                *errorcodeptr = ERR43;
-                goto FAILED;
+                if ((options & PCRE_DUPNAMES) == 0)
+                  {
+                  *errorcodeptr = ERR43;
+                  goto FAILED;
+                  }
                }
-              crc = -1;             /* Current name is substring */
+              else crc = -1;      /* Current name is substring */
              }
            if (crc < 0)
              {
@ -2989,14 +3091,18 @@ for (;; ptr++)
            if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
            slot += cd->name_entry_size;
            }
-          if (i >= cd->names_found)
+
+          if (i < cd->names_found)         /* Back reference */
+            {
+            recno = GET2(slot, 0);
+            }
+          else if ((recno =                /* Forward back reference */
+                    find_named_parens(ptr, *brackets, name, namelen)) <= 0)
            {
            *errorcodeptr = ERR15;
            goto FAILED;
            }

-          recno = GET2(slot, 0);
-
          if (type == '>') goto HANDLE_RECURSION;  /* A few lines below */

          /* Back reference */
@ -3036,9 +3142,8 @@ for (;; ptr++)
          regex in case it doesn't exist. */

          *code = OP_END;
-          called = (recno == 0)?
-            cd->start_code : find_bracket(cd->start_code, utf8, recno);
-
+          called = (recno == 0)? cd->start_code :
+            find_bracket(cd->start_code, utf8, recno);
          if (called == NULL)
            {
            *errorcodeptr = ERR15;
@ -3085,6 +3190,7 @@ for (;; ptr++)
            case '-': optset = &unset; break;

            case 'i': *optset |= PCRE_CASELESS; break;
+            case 'J': *optset |= PCRE_DUPNAMES; break;
            case 'm': *optset |= PCRE_MULTILINE; break;
            case 's': *optset |= PCRE_DOTALL; break;
            case 'x': *optset |= PCRE_EXTENDED; break;
@ -3201,7 +3307,7 @@ for (;; ptr++)
    else if (bravalue == OP_COND)
      {
      uschar *tc = code;
-      condcount = 0;
+      int condcount = 0;

      do {
         condcount++;
@ -3906,13 +4012,14 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
 }


+
 PCRE_DATA_SCOPE pcre *
 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
  const char **errorptr, int *erroroffset, const unsigned char *tables)
 {
 real_pcre *re;
 int length = 1 + LINK_SIZE;      /* For initial BRA plus length */
-int c, firstbyte, reqbyte;
+int c, firstbyte, reqbyte, newline;
 int bracount = 0;
 int branch_extra = 0;
 int branch_newextra;
@ -3933,6 +4040,7 @@ uschar *code;
 const uschar *codestart;
 const uschar *ptr;
 compile_data compile_block;
+compile_data *cd = &compile_block;
 int brastack[BRASTACK_SIZE];
 uschar bralenstack[BRASTACK_SIZE];

@ -3986,18 +4094,42 @@ if ((options & ~PUBLIC_OPTIONS) != 0)
 /* Set up pointers to the individual character tables */

 if (tables == NULL) tables = _pcre_default_tables;
-compile_block.lcc = tables + lcc_offset;
-compile_block.fcc = tables + fcc_offset;
-compile_block.cbits = tables + cbits_offset;
-compile_block.ctypes = tables + ctypes_offset;
+cd->lcc = tables + lcc_offset;
+cd->fcc = tables + fcc_offset;
+cd->cbits = tables + cbits_offset;
+cd->ctypes = tables + ctypes_offset;
+
+/* Handle different types of newline. The two bits give four cases. The current
+code allows for one- or two-byte sequences. */
+
+switch (options & PCRE_NEWLINE_CRLF)
+  {
+  default:              newline = NEWLINE; break;   /* Compile-time default */
+  case PCRE_NEWLINE_CR: newline = '\r'; break;
+  case PCRE_NEWLINE_LF: newline = '\n'; break;
+  case PCRE_NEWLINE_CR+
+       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+  }
+
+if (newline > 255)
+  {
+  cd->nllen = 2;
+  cd->nl[0] = (newline >> 8) & 255;
+  cd->nl[1] = newline & 255;
+  }
+else
+  {
+  cd->nllen = 1;
+  cd->nl[0] = newline;
+  }

 /* Maximum back reference and backref bitmap. This is updated for numeric
 references during the first pass, but for named references during the actual
 compile pass. The bitmap records up to 31 back references to help in deciding
 whether (.*) can be treated as anchored or not. */

-compile_block.top_backref = 0;
-compile_block.backref_map = 0;
+cd->top_backref = 0;
+cd->backref_map = 0;

 /* Reflect pattern for debugging output */

@ -4031,14 +4163,16 @@ while ((c = *(++ptr)) != 0)

  if ((options & PCRE_EXTENDED) != 0)
    {
-    if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
+    if ((cd->ctypes[c] & ctype_space) != 0) continue;
    if (c == '#')
      {
-      /* The space before the ; is to avoid a warning on a silly compiler
-      on the Macintosh. */
-      while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
-      if (c == 0) break;
-      continue;
+      while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) break;
+      if (*ptr != 0)
+        {
+        ptr += cd->nllen - 1;
+        continue;
+        }
+      break;    /* End loop at end of pattern */
      }
    }

@ -4128,9 +4262,9 @@ while ((c = *(++ptr)) != 0)
    if (c <= -ESC_REF)
      {
      int refnum = -c - ESC_REF;
-      compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;
-      if (refnum > compile_block.top_backref)
-        compile_block.top_backref = refnum;
+      cd->backref_map |= (refnum < 32)? (1 << refnum) : 1;
+      if (refnum > cd->top_backref)
+        cd->top_backref = refnum;
      length += 2;   /* For single back reference */
      if (ptr[1] == '{' && is_counted_repeat(ptr+2))
        {
@ -4284,7 +4418,9 @@ while ((c = *(++ptr)) != 0)
      /* Check the syntax for POSIX stuff. The bits we actually handle are
      checked during the real compile phase. */

-      else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))
+      else if (*ptr == '[' &&
+                (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
+                check_posix_syntax(ptr, &ptr, cd))
        {
        ptr++;
        class_optcount = 10;    /* Make sure > 1 */
@ -4517,6 +4653,61 @@ while ((c = *(++ptr)) != 0)
        ptr += 2;
        break;

+        /* Named subpatterns are an extension copied from Python */
+
+        case 'P':
+        ptr += 3;
+
+        /* Handle the definition of a named subpattern */
+
+        if (*ptr == '<')
+          {
+          const uschar *p;    /* Don't amalgamate; some compilers */
+          p = ++ptr;          /* grumble at autoincrement in declaration */
+          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
+          if (*ptr != '>')
+            {
+            errorcode = ERR42;
+            goto PCRE_ERROR_RETURN;
+            }
+          name_count++;
+          if (name_count > MAX_NAME_COUNT)
+            {
+            errorcode = ERR49;
+            goto PCRE_ERROR_RETURN;
+            }
+          if (ptr - p > max_name_size)
+            {
+            max_name_size = (ptr - p);
+            if (max_name_size > MAX_NAME_SIZE)
+              {
+              errorcode = ERR48;
+              goto PCRE_ERROR_RETURN;
+              }
+            }
+          capturing = TRUE;   /* Named parentheses are always capturing */
+          break;              /* Go handle capturing parentheses */
+          }
+
+        /* Handle back references and recursive calls to named subpatterns */
+
+        if (*ptr == '=' || *ptr == '>')
+          {
+          length += 3 + 3*LINK_SIZE;  /* Allow for the automatic "once" */
+          while ((cd->ctypes[*(++ptr)] & ctype_word) != 0);
+          if (*ptr != ')')
+            {
+            errorcode = ERR42;
+            goto PCRE_ERROR_RETURN;
+            }
+          goto RECURSE_CHECK_QUANTIFIED;
+          }
+
+        /* Unknown character after (?P */
+
+        errorcode = ERR41;
+        goto PCRE_ERROR_RETURN;
+
        /* (?R) specifies a recursive call to the regex, which is an extension
        to provide the facility which can be obtained by (?p{perl-code}) in
        Perl 5.6. In Perl 5.8 this has become (??{perl-code}).
@ -4542,8 +4733,10 @@ while ((c = *(++ptr)) != 0)

        /* If this item is quantified, it will get wrapped inside brackets so
        as to use the code for quantified brackets. We jump down and use the
-        code that handles this for real brackets. */
+        code that handles this for real brackets. Come here from code for
+        named recursions/subroutines. */

+        RECURSE_CHECK_QUANTIFIED:
        if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')
          {
          length += 2 + 2 * LINK_SIZE;       /* to make bracketed */
@ -4567,48 +4760,6 @@ while ((c = *(++ptr)) != 0)
        length += 2 + 2*LINK_SIZE;
        continue;

-        /* Named subpatterns are an extension copied from Python */
-
-        case 'P':
-        ptr += 3;
-
-        /* Handle the definition of a named subpattern */
-
-        if (*ptr == '<')
-          {
-          const uschar *p;    /* Don't amalgamate; some compilers */
-          p = ++ptr;          /* grumble at autoincrement in declaration */
-          while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;
-          if (*ptr != '>')
-            {
-            errorcode = ERR42;
-            goto PCRE_ERROR_RETURN;
-            }
-          name_count++;
-          if (ptr - p > max_name_size) max_name_size = (ptr - p);
-          capturing = TRUE;   /* Named parentheses are always capturing */
-          break;
-          }
-
-        /* Handle back references and recursive calls to named subpatterns */
-
-        if (*ptr == '=' || *ptr == '>')
-          {
-          length += 2 + 2*LINK_SIZE;  /* Allow for the automatic "once" */
-          while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
-          if (*ptr != ')')
-            {
-            errorcode = ERR42;
-            goto PCRE_ERROR_RETURN;
-            }
-          break;
-          }
-
-        /* Unknown character after (?P */
-
-        errorcode = ERR41;
-        goto PCRE_ERROR_RETURN;
-
        /* Lookbehinds are in Perl from version 5.005 */

        case '<':
@ -4624,19 +4775,17 @@ while ((c = *(++ptr)) != 0)

        /* Conditionals are in Perl from version 5.005. The bracket must either
        be followed by a number (for bracket reference) or by an assertion
-        group, or (a PCRE extension) by 'R' for a recursion test. */
+        group. PCRE extends this by allowing a name to reference a named group;
+        unfortunately, previously 'R' was implemented for a recursion test.
+        When this is compiled, we look for the named group 'R' first. At this
+        point we just do a basic syntax check. */

        case '(':
-        if (ptr[3] == 'R' && ptr[4] == ')')
+        if ((cd->ctypes[ptr[3]] & ctype_word) != 0)
          {
          ptr += 4;
          length += 3;
-          }
-        else if ((digitab[ptr[3]] & ctype_digit) != 0)
-          {
-          ptr += 4;
-          length += 3;
-          while ((digitab[*ptr] & ctype_digit) != 0) ptr++;
+          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
          if (*ptr != ')')
            {
            errorcode = ERR26;
@ -4675,6 +4824,11 @@ while ((c = *(++ptr)) != 0)
            *optset |= PCRE_CASELESS;
            continue;

+            case 'J':
+            *optset |= PCRE_DUPNAMES;
+            options |= PCRE_JCHANGED;   /* Record that it changed */
+            continue;
+
            case 'm':
            *optset |= PCRE_MULTILINE;
            continue;
@ -4740,16 +4894,13 @@ while ((c = *(++ptr)) != 0)
            will lead to an over-estimate on the length, but this shouldn't
            matter very much. We also have to allow for resetting options at
            the start of any alternations, which we do by setting
-            branch_newextra to 2. Finally, we record whether the case-dependent
-            flag ever changes within the regex. This is used by the "required
-            character" code. */
+            branch_newextra to 2. */

            case ':':
            if (((set|unset) & PCRE_IMS) != 0)
              {
              length += 4;
              branch_newextra = 2;
-              if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
              }
            goto END_OPTIONS;

@ -4829,6 +4980,12 @@ while ((c = *(++ptr)) != 0)
      {
      duplength = length - brastack[--brastackptr];
      branch_extra = bralenstack[brastackptr];
+      /* This is a paranoid check to stop integer overflow later on */
+      if (duplength > MAX_DUPLENGTH)
+        {
+        errorcode = ERR50;
+        goto PCRE_ERROR_RETURN;
+        }
      }
    else duplength = 0;

@ -4933,7 +5090,8 @@ if (length > MAX_PATTERN_SIZE)
  }

 /* Compute the size of data block needed and get it, either from malloc or
-externally provided function. */
+externally provided function. Integer overflow should no longer be possible
+because nowadays we limit the maximum value of name_count and max_name size. */

 size = length + sizeof(real_pcre) + name_count * (max_name_size + 3);
 re = (real_pcre *)(pcre_malloc)(size);
@ -4963,14 +5121,14 @@ re->nullpad = NULL;
 /* The starting points of the name/number translation table and of the code are
 passed around in the compile data block. */

-compile_block.names_found = 0;
-compile_block.name_entry_size = max_name_size + 3;
-compile_block.name_table = (uschar *)re + re->name_table_offset;
-codestart = compile_block.name_table + re->name_entry_size * re->name_count;
-compile_block.start_code = codestart;
-compile_block.start_pattern = (const uschar *)pattern;
-compile_block.req_varyopt = 0;
-compile_block.nopartial = FALSE;
+cd->names_found = 0;
+cd->name_entry_size = max_name_size + 3;
+cd->name_table = (uschar *)re + re->name_table_offset;
+codestart = cd->name_table + re->name_entry_size * re->name_count;
+cd->start_code = codestart;
+cd->start_pattern = (const uschar *)pattern;
+cd->req_varyopt = 0;
+cd->nopartial = FALSE;

 /* Set up a starting, non-extracting bracket, then compile the expression. On
 error, errorcode will be set non-zero, so we don't need to look at the result
@ -4981,11 +5139,11 @@ code = (uschar *)codestart;
 *code = OP_BRA;
 bracount = 0;
 (void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,
-  &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block);
+  &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd);
 re->top_bracket = bracount;
-re->top_backref = compile_block.top_backref;
+re->top_backref = cd->top_backref;

-if (compile_block.nopartial) re->options |= PCRE_NOPARTIAL;
+if (cd->nopartial) re->options |= PCRE_NOPARTIAL;

 /* If not reached end of pattern on success, there's an excess bracket. */

@ -5031,7 +5189,7 @@ start with ^. and also when all branches start with .* for non-DOTALL matches.
 if ((options & PCRE_ANCHORED) == 0)
  {
  int temp_options = options;
-  if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map))
+  if (is_anchored(codestart, &temp_options, 0, cd->backref_map))
    re->options |= PCRE_ANCHORED;
  else
    {
@ -5041,10 +5199,10 @@ if ((options & PCRE_ANCHORED) == 0)
      {
      int ch = firstbyte & 255;
      re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
-         compile_block.fcc[ch] == ch)? ch : firstbyte;
+         cd->fcc[ch] == ch)? ch : firstbyte;
      re->options |= PCRE_FIRSTSET;
      }
-    else if (is_startline(codestart, 0, compile_block.backref_map))
+    else if (is_startline(codestart, 0, cd->backref_map))
      re->options |= PCRE_STARTLINE;
    }
  }
@ -5058,7 +5216,7 @@ if (reqbyte >= 0 &&
  {
  int ch = reqbyte & 255;
  re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
-    compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
+    cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
  re->options |= PCRE_REQCHSET;
  }

@ -5072,11 +5230,10 @@ printf("Length = %d top_bracket = %d top_backref = %d\n",

 if (re->options != 0)
  {
-  printf("%s%s%s%s%s%s%s%s%s%s\n",
+  printf("%s%s%s%s%s%s%s%s%s\n",
    ((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",
    ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
    ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
-    ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
    ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
    ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
    ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
 possible. There are also some static supporting functions. */

-
+#define NLBLOCK md           /* The block containing newline information */
 #include "pcre_internal.h"


@ -275,7 +275,7 @@ typedef struct heapframe {
  long int Xims;
  eptrblock *Xeptrb;
  int Xflags;
-  int Xrdepth;
+  unsigned int Xrdepth;

  /* Function local variables */

@ -374,16 +374,16 @@ Returns:       MATCH_MATCH if matched            )  these values are >= 0
 static int
 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
  int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
-  int flags, int rdepth)
+  int flags, unsigned int rdepth)
 {
 /* These variables do not need to be preserved over recursion in this function,
 so they can be ordinary variables in all cases. Mark them with "register"
 because they are used a lot in loops. */

-register int  rrc;    /* Returns from recursive calls */
-register int  i;      /* Used for loops not involving calls to RMATCH() */
-register int  c;      /* Character values not kept over RMATCH() calls */
-register BOOL utf8;   /* Local copy of UTF-8 flag for speed */
+register int  rrc;         /* Returns from recursive calls */
+register int  i;           /* Used for loops not involving calls to RMATCH() */
+register unsigned int  c;  /* Character values not kept over RMATCH() calls */
+register BOOL utf8;        /* Local copy of UTF-8 flag for speed */

 /* When recursion is not being used, all "local" variables that have to be
 preserved over calls to RMATCH() are part of a "frame" which is obtained from
@ -527,6 +527,13 @@ prop_fail_result = 0;
 prop_test_variable = NULL;
 #endif

+/* This label is used for tail recursion, which is used in a few cases even
+when NO_RECURSE is not defined, in order to reduce the amount of stack that is
+used. Thanks to Ian Taylor for noticing this possibility and sending the
+original patch. */
+
+TAIL_RECURSE:
+
 /* OK, now we can get on with the real code of the function. Recursive calls
 are specified by the macro RMATCH and RRETURN is used to return. When
 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
@ -542,7 +549,12 @@ if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);

 original_ims = ims;    /* Save for resetting on ')' */
+
+#ifdef SUPPORT_UTF8
 utf8 = md->utf8;       /* Local copy of the flag */
+#else
+utf8 = FALSE;
+#endif

 /* At the start of a bracketed group, add the current subject pointer to the
 stack of such pointers, to be re-instated at the end of the group when we hit
@ -642,21 +654,38 @@ for (;;)
    {
    case OP_BRA:     /* Non-capturing bracket: optimized */
    DPRINTF(("start bracket 0\n"));
-    do
+
+    /* Loop for all the alternatives */
+
+    for (;;)
      {
+      /* When we get to the final alternative within the brackets, we would
+      return the result of a recursive call to match() whatever happened. We
+      can reduce stack usage by turning this into a tail recursion. */
+
+      if (ecode[GET(ecode, 1)] != OP_ALT)
+       {
+       ecode += 1 + LINK_SIZE;
+       flags = match_isgroup;
+       DPRINTF(("bracket 0 tail recursion\n"));
+       goto TAIL_RECURSE;
+       }
+
+      /* For non-final alternatives, continue the loop for a NOMATCH result;
+      otherwise return. */
+
      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
        match_isgroup);
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
      ecode += GET(ecode, 1);
      }
-    while (*ecode == OP_ALT);
-    DPRINTF(("bracket 0 failed\n"));
-    RRETURN(MATCH_NOMATCH);
+    /* Control never reaches here. */

    /* Conditional group: compilation checked that there are no more than
    two branches. If the condition is false, skipping the first branch takes us
    past the end if there is only one branch, but that's OK because that is
-    exactly what going to the ket would do. */
+    exactly what going to the ket would do. As there is only one branch to be
+    obeyed, we can use tail recursion to avoid using another stack frame. */

    case OP_COND:
    if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */
@ -665,10 +694,9 @@ for (;;)
      condition = (offset == CREF_RECURSE * 2)?
        (md->recursive != NULL) :
        (offset < offset_top && md->offset_vector[offset] >= 0);
-      RMATCH(rrc, eptr, ecode + (condition?
-        (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),
-        offset_top, md, ims, eptrb, match_isgroup);
-      RRETURN(rrc);
+      ecode += condition? (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1));
+      flags = match_isgroup;
+      goto TAIL_RECURSE;
      }

    /* The condition is an assertion. Call match() to evaluate it - setting
@ -688,9 +716,13 @@ for (;;)
        RRETURN(rrc);         /* Need braces because of following else */
        }
      else ecode += GET(ecode, 1);
-      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,
-        match_isgroup);
-      RRETURN(rrc);
+
+      /* We are now at the branch that is to be obeyed. As there is only one,
+      we can use tail recursion to avoid using another stack frame. */
+
+      ecode += 1 + LINK_SIZE;
+      flags = match_isgroup;
+      goto TAIL_RECURSE;
      }
    /* Control never reaches here */

@ -945,71 +977,72 @@ for (;;)
    the end of a normal bracket, leaving the subject pointer. */

    case OP_ONCE:
+    prev = ecode;
+    saved_eptr = eptr;
+
+    do
      {
-      prev = ecode;
-      saved_eptr = eptr;
-
-      do
-        {
-        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
-          eptrb, match_isgroup);
-        if (rrc == MATCH_MATCH) break;
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        ecode += GET(ecode,1);
-        }
-      while (*ecode == OP_ALT);
-
-      /* If hit the end of the group (which could be repeated), fail */
-
-      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
-
-      /* Continue as from after the assertion, updating the offsets high water
-      mark, since extracts may have been taken. */
-
-      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
-
-      offset_top = md->end_offset_top;
-      eptr = md->end_match_ptr;
-
-      /* For a non-repeating ket, just continue at this level. This also
-      happens for a repeating ket if no characters were matched in the group.
-      This is the forcible breaking of infinite loops as implemented in Perl
-      5.005. If there is an options reset, it will get obeyed in the normal
-      course of events. */
-
-      if (*ecode == OP_KET || eptr == saved_eptr)
-        {
-        ecode += 1+LINK_SIZE;
-        break;
-        }
-
-      /* The repeating kets try the rest of the pattern or restart from the
-      preceding bracket, in the appropriate order. We need to reset any options
-      that changed within the bracket before re-running it, so check the next
-      opcode. */
-
-      if (ecode[1+LINK_SIZE] == OP_OPT)
-        {
-        ims = (ims & ~PCRE_IMS) | ecode[4];
-        DPRINTF(("ims set to %02lx at group repeat\n", ims));
-        }
-
-      if (*ecode == OP_KETRMIN)
-        {
-        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        }
-      else  /* OP_KETRMAX */
-        {
-        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        }
+      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
+        eptrb, match_isgroup);
+      if (rrc == MATCH_MATCH) break;
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      ecode += GET(ecode,1);
      }
-    RRETURN(MATCH_NOMATCH);
+    while (*ecode == OP_ALT);
+
+    /* If hit the end of the group (which could be repeated), fail */
+
+    if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
+
+    /* Continue as from after the assertion, updating the offsets high water
+    mark, since extracts may have been taken. */
+
+    do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+
+    offset_top = md->end_offset_top;
+    eptr = md->end_match_ptr;
+
+    /* For a non-repeating ket, just continue at this level. This also
+    happens for a repeating ket if no characters were matched in the group.
+    This is the forcible breaking of infinite loops as implemented in Perl
+    5.005. If there is an options reset, it will get obeyed in the normal
+    course of events. */
+
+    if (*ecode == OP_KET || eptr == saved_eptr)
+      {
+      ecode += 1+LINK_SIZE;
+      break;
+      }
+
+    /* The repeating kets try the rest of the pattern or restart from the
+    preceding bracket, in the appropriate order. The second "call" of match()
+    uses tail recursion, to avoid using another stack frame. We need to reset
+    any options that changed within the bracket before re-running it, so
+    check the next opcode. */
+
+    if (ecode[1+LINK_SIZE] == OP_OPT)
+      {
+      ims = (ims & ~PCRE_IMS) | ecode[4];
+      DPRINTF(("ims set to %02lx at group repeat\n", ims));
+      }
+
+    if (*ecode == OP_KETRMIN)
+      {
+      RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      ecode = prev;
+      flags = match_isgroup;
+      goto TAIL_RECURSE;
+      }
+    else  /* OP_KETRMAX */
+      {
+      RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      ecode += 1 + LINK_SIZE;
+      flags = 0;
+      goto TAIL_RECURSE;
+      }
+    /* Control never gets here */

    /* An alternation is the end of a branch; scan along to find the end of the
    bracketed group and go to there. */
@ -1053,114 +1086,114 @@ for (;;)
    case OP_KET:
    case OP_KETRMIN:
    case OP_KETRMAX:
+    prev = ecode - GET(ecode, 1);
+    saved_eptr = eptrb->epb_saved_eptr;
+
+    /* Back up the stack of bracket start pointers. */
+
+    eptrb = eptrb->epb_prev;
+
+    if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
+        *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
+        *prev == OP_ONCE)
      {
-      prev = ecode - GET(ecode, 1);
-      saved_eptr = eptrb->epb_saved_eptr;
+      md->end_match_ptr = eptr;      /* For ONCE */
+      md->end_offset_top = offset_top;
+      RRETURN(MATCH_MATCH);
+      }

-      /* Back up the stack of bracket start pointers. */
+    /* In all other cases except a conditional group we have to check the
+    group number back at the start and if necessary complete handling an
+    extraction by setting the offsets and bumping the high water mark. */

-      eptrb = eptrb->epb_prev;
+    if (*prev != OP_COND)
+      {
+      number = *prev - OP_BRA;

-      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
-          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
-          *prev == OP_ONCE)
-        {
-        md->end_match_ptr = eptr;      /* For ONCE */
-        md->end_offset_top = offset_top;
-        RRETURN(MATCH_MATCH);
-        }
+      /* For extended extraction brackets (large number), we have to fish out
+      the number from a dummy opcode at the start. */

-      /* In all other cases except a conditional group we have to check the
-      group number back at the start and if necessary complete handling an
-      extraction by setting the offsets and bumping the high water mark. */
-
-      if (*prev != OP_COND)
-        {
-        number = *prev - OP_BRA;
-
-        /* For extended extraction brackets (large number), we have to fish out
-        the number from a dummy opcode at the start. */
-
-        if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
-        offset = number << 1;
+      if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);
+      offset = number << 1;

 #ifdef DEBUG
-        printf("end bracket %d", number);
-        printf("\n");
+      printf("end bracket %d", number);
+      printf("\n");
 #endif

-        /* Test for a numbered group. This includes groups called as a result
-        of recursion. Note that whole-pattern recursion is coded as a recurse
-        into group 0, so it won't be picked up here. Instead, we catch it when
-        the OP_END is reached. */
+      /* Test for a numbered group. This includes groups called as a result
+      of recursion. Note that whole-pattern recursion is coded as a recurse
+      into group 0, so it won't be picked up here. Instead, we catch it when
+      the OP_END is reached. */

-        if (number > 0)
+      if (number > 0)
+        {
+        md->capture_last = number;
+        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
          {
-          md->capture_last = number;
-          if (offset >= md->offset_max) md->offset_overflow = TRUE; else
-            {
-            md->offset_vector[offset] =
-              md->offset_vector[md->offset_end - number];
-            md->offset_vector[offset+1] = eptr - md->start_subject;
-            if (offset_top <= offset) offset_top = offset + 2;
-            }
-
-          /* Handle a recursively called group. Restore the offsets
-          appropriately and continue from after the call. */
-
-          if (md->recursive != NULL && md->recursive->group_num == number)
-            {
-            recursion_info *rec = md->recursive;
-            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
-            md->recursive = rec->prevrec;
-            md->start_match = rec->save_start;
-            memcpy(md->offset_vector, rec->offset_save,
-              rec->saved_max * sizeof(int));
-            ecode = rec->after_call;
-            ims = original_ims;
-            break;
-            }
+          md->offset_vector[offset] =
+            md->offset_vector[md->offset_end - number];
+          md->offset_vector[offset+1] = eptr - md->start_subject;
+          if (offset_top <= offset) offset_top = offset + 2;
          }
-        }

-      /* Reset the value of the ims flags, in case they got changed during
-      the group. */
+        /* Handle a recursively called group. Restore the offsets
+        appropriately and continue from after the call. */

-      ims = original_ims;
-      DPRINTF(("ims reset to %02lx\n", ims));
-
-      /* For a non-repeating ket, just continue at this level. This also
-      happens for a repeating ket if no characters were matched in the group.
-      This is the forcible breaking of infinite loops as implemented in Perl
-      5.005. If there is an options reset, it will get obeyed in the normal
-      course of events. */
-
-      if (*ecode == OP_KET || eptr == saved_eptr)
-        {
-        ecode += 1 + LINK_SIZE;
-        break;
-        }
-
-      /* The repeating kets try the rest of the pattern or restart from the
-      preceding bracket, in the appropriate order. */
-
-      if (*ecode == OP_KETRMIN)
-        {
-        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        }
-      else  /* OP_KETRMAX */
-        {
-        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
-        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+        if (md->recursive != NULL && md->recursive->group_num == number)
+          {
+          recursion_info *rec = md->recursive;
+          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
+          md->recursive = rec->prevrec;
+          md->start_match = rec->save_start;
+          memcpy(md->offset_vector, rec->offset_save,
+            rec->saved_max * sizeof(int));
+          ecode = rec->after_call;
+          ims = original_ims;
+          break;
+          }
        }
      }

-    RRETURN(MATCH_NOMATCH);
+    /* Reset the value of the ims flags, in case they got changed during
+    the group. */
+
+    ims = original_ims;
+    DPRINTF(("ims reset to %02lx\n", ims));
+
+    /* For a non-repeating ket, just continue at this level. This also
+    happens for a repeating ket if no characters were matched in the group.
+    This is the forcible breaking of infinite loops as implemented in Perl
+    5.005. If there is an options reset, it will get obeyed in the normal
+    course of events. */
+
+    if (*ecode == OP_KET || eptr == saved_eptr)
+      {
+      ecode += 1 + LINK_SIZE;
+      break;
+      }
+
+    /* The repeating kets try the rest of the pattern or restart from the
+    preceding bracket, in the appropriate order. In the second case, we can use
+    tail recursion to avoid using another stack frame. */
+
+    if (*ecode == OP_KETRMIN)
+      {
+      RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      ecode = prev;
+      flags = match_isgroup;
+      goto TAIL_RECURSE;
+      }
+    else  /* OP_KETRMAX */
+      {
+      RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+      ecode += 1 + LINK_SIZE;
+      flags = 0;
+      goto TAIL_RECURSE;
+      }
+    /* Control never gets here */

    /* Start of subject unless notbol, or after internal newline if multiline */

@ -1168,7 +1201,10 @@ for (;;)
    if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
    if ((ims & PCRE_MULTILINE) != 0)
      {
-      if (eptr != md->start_subject && eptr[-1] != NEWLINE)
+      if (eptr != md->start_subject &&
+          (eptr == md->end_subject ||
+           eptr < md->start_subject + md->nllen ||
+           !IS_NEWLINE(eptr - md->nllen)))
        RRETURN(MATCH_NOMATCH);
      ecode++;
      break;
@ -1196,7 +1232,7 @@ for (;;)
    if ((ims & PCRE_MULTILINE) != 0)
      {
      if (eptr < md->end_subject)
-        { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }
+        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
      else
        { if (md->noteol) RRETURN(MATCH_NOMATCH); }
      ecode++;
@ -1207,14 +1243,14 @@ for (;;)
      if (md->noteol) RRETURN(MATCH_NOMATCH);
      if (!md->endonly)
        {
-        if (eptr < md->end_subject - 1 ||
-           (eptr == md->end_subject - 1 && *eptr != NEWLINE))
+        if (eptr != md->end_subject &&
+            (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
          RRETURN(MATCH_NOMATCH);
        ecode++;
        break;
        }
      }
-    /* ... else fall through */
+    /* ... else fall through for endonly */

    /* End of subject assertion (\z) */

@ -1226,8 +1262,9 @@ for (;;)
    /* End of subject or ending \n assertion (\Z) */

    case OP_EODN:
-    if (eptr < md->end_subject - 1 ||
-       (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);
+    if (eptr != md->end_subject &&
+        (eptr != md->end_subject - md->nllen || !IS_NEWLINE(eptr)))
+      RRETURN(MATCH_NOMATCH);
    ecode++;
    break;

@ -1280,13 +1317,14 @@ for (;;)
    /* Match a single character type; inline for speed */

    case OP_ANY:
-    if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
-      RRETURN(MATCH_NOMATCH);
+    if ((ims & PCRE_DOTALL) == 0)
+      {
+      if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
+        RRETURN(MATCH_NOMATCH);
+      }
    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
-#ifdef SUPPORT_UTF8
    if (utf8)
      while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
-#endif
    ecode++;
    break;

@ -2573,8 +2611,11 @@ for (;;)
        for (i = 1; i <= min; i++)
          {
          if (eptr >= md->end_subject ||
-             (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
+               ((ims & PCRE_DOTALL) == 0 &&
+                 eptr <= md->end_subject - md->nllen &&
+                 IS_NEWLINE(eptr)))
            RRETURN(MATCH_NOMATCH);
+          eptr++;
          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
          }
        break;
@ -2659,7 +2700,11 @@ for (;;)
        if ((ims & PCRE_DOTALL) == 0)
          {
          for (i = 1; i <= min; i++)
-            if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);
+            {
+            if (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr))
+              RRETURN(MATCH_NOMATCH);
+            eptr++;
+            }
          }
        else eptr += min;
        break;
@ -2829,13 +2874,15 @@ for (;;)
          {
          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (fi >= max || eptr >= md->end_subject ||
+               (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
+                eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
+            RRETURN(MATCH_NOMATCH);

          GETCHARINC(c, eptr);
          switch(ctype)
            {
-            case OP_ANY:
-            if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
+            case OP_ANY:        /* This is the DOTALL case */
            break;

            case OP_ANYBYTE:
@ -2884,12 +2931,15 @@ for (;;)
          {
          RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (fi >= max || eptr >= md->end_subject ||
+               ((ims & PCRE_DOTALL) == 0 &&
+                 eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
+            RRETURN(MATCH_NOMATCH);
+
          c = *eptr++;
          switch(ctype)
            {
-            case OP_ANY:
-            if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);
+            case OP_ANY:   /* This is the DOTALL case */
            break;

            case OP_ANYBYTE:
@ -3075,9 +3125,9 @@ for (;;)
          {
          case OP_ANY:

-          /* Special code is required for UTF8, but when the maximum is unlimited
-          we don't need it, so we repeat the non-UTF8 code. This is probably
-          worth it, because .* is quite a common idiom. */
+          /* Special code is required for UTF8, but when the maximum is
+          unlimited we don't need it, so we repeat the non-UTF8 code. This is
+          probably worth it, because .* is quite a common idiom. */

          if (max < INT_MAX)
            {
@ -3085,7 +3135,9 @@ for (;;)
              {
              for (i = min; i < max; i++)
                {
-                if (eptr >= md->end_subject || *eptr == NEWLINE) break;
+                if (eptr >= md->end_subject ||
+                    (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
+                  break;
                eptr++;
                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                }
@ -3094,6 +3146,7 @@ for (;;)
              {
              for (i = min; i < max; i++)
                {
+                if (eptr >= md->end_subject) break;
                eptr++;
                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                }
@ -3108,7 +3161,9 @@ for (;;)
              {
              for (i = min; i < max; i++)
                {
-                if (eptr >= md->end_subject || *eptr == NEWLINE) break;
+                if (eptr >= md->end_subject ||
+                    (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
+                  break;
                eptr++;
                }
              break;
@ -3222,7 +3277,9 @@ for (;;)
            {
            for (i = min; i < max; i++)
              {
-              if (eptr >= md->end_subject || *eptr == NEWLINE) break;
+              if (eptr >= md->end_subject ||
+                  (eptr <= md->end_subject - md->nllen && IS_NEWLINE(eptr)))
+                break;
              eptr++;
              }
            break;
@ -3419,7 +3476,8 @@ int rc, resetcount, ocount;
 int first_byte = -1;
 int req_byte = -1;
 int req_byte2 = -1;
-unsigned long int ims = 0;
+int newline;
+unsigned long int ims;
 BOOL using_temporary_offsets = FALSE;
 BOOL anchored;
 BOOL startline;
@ -3427,6 +3485,7 @@ BOOL firstline;
 BOOL first_byte_caseless = FALSE;
 BOOL req_byte_caseless = FALSE;
 match_data match_block;
+match_data *md = &match_block;
 const uschar *tables;
 const uschar *start_bits = NULL;
 USPTR start_match = (USPTR)subject + start_offset;
@ -3451,9 +3510,9 @@ if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 the default values. */

 study = NULL;
-match_block.match_limit = MATCH_LIMIT;
-match_block.match_limit_recursion = MATCH_LIMIT_RECURSION;
-match_block.callout_data = NULL;
+md->match_limit = MATCH_LIMIT;
+md->match_limit_recursion = MATCH_LIMIT_RECURSION;
+md->callout_data = NULL;

 /* The table pointer is always in native byte order. */

@ -3465,11 +3524,11 @@ if (extra_data != NULL)
  if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
    study = (const pcre_study_data *)extra_data->study_data;
  if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
-    match_block.match_limit = extra_data->match_limit;
+    md->match_limit = extra_data->match_limit;
  if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
-    match_block.match_limit_recursion = extra_data->match_limit_recursion;
+    md->match_limit_recursion = extra_data->match_limit_recursion;
  if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
-    match_block.callout_data = extra_data->callout_data;
+    md->callout_data = extra_data->callout_data;
  if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
  }

@ -3499,39 +3558,64 @@ firstline = (re->options & PCRE_FIRSTLINE) != 0;

 /* The code starts after the real_pcre block and the capture name table. */

-match_block.start_code = (const uschar *)external_re + re->name_table_offset +
+md->start_code = (const uschar *)external_re + re->name_table_offset +
  re->name_count * re->name_entry_size;

-match_block.start_subject = (USPTR)subject;
-match_block.start_offset = start_offset;
-match_block.end_subject = match_block.start_subject + length;
-end_subject = match_block.end_subject;
+md->start_subject = (USPTR)subject;
+md->start_offset = start_offset;
+md->end_subject = md->start_subject + length;
+end_subject = md->end_subject;

-match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
-match_block.utf8 = (re->options & PCRE_UTF8) != 0;
+md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+md->utf8 = (re->options & PCRE_UTF8) != 0;

-match_block.notbol = (options & PCRE_NOTBOL) != 0;
-match_block.noteol = (options & PCRE_NOTEOL) != 0;
-match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
-match_block.partial = (options & PCRE_PARTIAL) != 0;
-match_block.hitend = FALSE;
+md->notbol = (options & PCRE_NOTBOL) != 0;
+md->noteol = (options & PCRE_NOTEOL) != 0;
+md->notempty = (options & PCRE_NOTEMPTY) != 0;
+md->partial = (options & PCRE_PARTIAL) != 0;
+md->hitend = FALSE;

-match_block.recursive = NULL;                   /* No recursion at top level */
+md->recursive = NULL;                   /* No recursion at top level */

-match_block.lcc = tables + lcc_offset;
-match_block.ctypes = tables + ctypes_offset;
+md->lcc = tables + lcc_offset;
+md->ctypes = tables + ctypes_offset;
+
+/* Handle different types of newline. The two bits give four cases. If nothing
+is set at run time, whatever was used at compile time applies. */
+
+switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
+         PCRE_NEWLINE_CRLF)
+  {
+  default:              newline = NEWLINE; break;   /* Compile-time default */
+  case PCRE_NEWLINE_CR: newline = '\r'; break;
+  case PCRE_NEWLINE_LF: newline = '\n'; break;
+  case PCRE_NEWLINE_CR+
+       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+  }
+
+if (newline > 255)
+  {
+  md->nllen = 2;
+  md->nl[0] = (newline >> 8) & 255;
+  md->nl[1] = newline & 255;
+  }
+else
+  {
+  md->nllen = 1;
+  md->nl[0] = newline;
+  }

 /* Partial matching is supported only for a restricted set of regexes at the
 moment. */

-if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)
+if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
  return PCRE_ERROR_BADPARTIAL;

 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
 back the character offset. */

 #ifdef SUPPORT_UTF8
-if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
+if (md->utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
  {
  if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
    return PCRE_ERROR_BADUTF8;
@ -3563,17 +3647,17 @@ ocount = offsetcount - (offsetcount % 3);
 if (re->top_backref > 0 && re->top_backref >= ocount/3)
  {
  ocount = re->top_backref * 3 + 3;
-  match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
-  if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
+  md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
+  if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
  using_temporary_offsets = TRUE;
  DPRINTF(("Got memory to hold back references\n"));
  }
-else match_block.offset_vector = offsets;
+else md->offset_vector = offsets;

-match_block.offset_end = ocount;
-match_block.offset_max = (2*ocount)/3;
-match_block.offset_overflow = FALSE;
-match_block.capture_last = -1;
+md->offset_end = ocount;
+md->offset_max = (2*ocount)/3;
+md->offset_overflow = FALSE;
+md->capture_last = -1;

 /* Compute the minimum number of offsets that we need to reset each time. Doing
 this makes a huge difference to execution time when there aren't many brackets
@ -3586,9 +3670,9 @@ if (resetcount > offsetcount) resetcount = ocount;
 never be used unless previously set, but they get saved and restored, and so we
 initialize them to avoid reading uninitialized locations. */

-if (match_block.offset_vector != NULL)
+if (md->offset_vector != NULL)
  {
-  register int *iptr = match_block.offset_vector + ocount;
+  register int *iptr = md->offset_vector + ocount;
  register int *iend = iptr - resetcount/2 + 1;
  while (--iptr >= iend) *iptr = -1;
  }
@ -3605,7 +3689,7 @@ if (!anchored)
    {
    first_byte = re->first_byte & 255;
    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
-      first_byte = match_block.lcc[first_byte];
+      first_byte = md->lcc[first_byte];
    }
  else
    if (!startline && study != NULL &&
@ -3632,9 +3716,9 @@ do

  /* Reset the maximum number of extractions we might see. */

-  if (match_block.offset_vector != NULL)
+  if (md->offset_vector != NULL)
    {
-    register int *iptr = match_block.offset_vector;
+    register int *iptr = md->offset_vector;
    register int *iend = iptr + resetcount;
    while (iptr < iend) *iptr++ = -1;
    }
@ -3648,7 +3732,7 @@ do
  if (firstline)
    {
    USPTR t = start_match;
-    while (t < save_end_subject && *t != '\n') t++;
+    while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
    end_subject = t;
    }

@ -3658,20 +3742,22 @@ do
    {
    if (first_byte_caseless)
      while (start_match < end_subject &&
-             match_block.lcc[*start_match] != first_byte)
+             md->lcc[*start_match] != first_byte)
        start_match++;
    else
      while (start_match < end_subject && *start_match != first_byte)
        start_match++;
    }

-  /* Or to just after \n for a multiline match if possible */
+  /* Or to just after a linebreak for a multiline match if possible */

  else if (startline)
    {
-    if (start_match > match_block.start_subject + start_offset)
+    if (start_match >= md->start_subject + md->nllen +
+          start_offset)
      {
-      while (start_match < end_subject && start_match[-1] != NEWLINE)
+      while (start_match <= end_subject &&
+             !IS_NEWLINE(start_match - md->nllen))
        start_match++;
      }
    }
@ -3693,7 +3779,7 @@ do

 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
  printf(">>>> Match against: ");
-  pchars(start_match, end_subject - start_match, TRUE, &match_block);
+  pchars(start_match, end_subject - start_match, TRUE, md);
  printf("\n");
 #endif

@ -3715,7 +3801,7 @@ do

  if (req_byte >= 0 &&
      end_subject - start_match < REQ_BYTE_MAX &&
-      !match_block.partial)
+      !md->partial)
    {
    register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);

@ -3759,11 +3845,10 @@ do
  those back references that we can. In this case there need not be overflow
  if certain parts of the pattern were not used. */

-  match_block.start_match = start_match;
-  match_block.match_call_count = 0;
+  md->start_match = start_match;
+  md->match_call_count = 0;

-  rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,
-    match_isgroup, 0);
+  rc = match(start_match, md->start_code, 2, md, ims, NULL, match_isgroup, 0);

  /* When the result is no match, if the subject's first character was a
  newline and the PCRE_FIRSTLINE option is set, break (which will return
@ -3774,10 +3859,13 @@ do

  if (rc == MATCH_NOMATCH)
    {
-    if (firstline && *start_match == NEWLINE) break;
+    if (firstline &&
+        start_match <= md->end_subject - md->nllen &&
+        IS_NEWLINE(start_match))
+      break;
    start_match++;
 #ifdef SUPPORT_UTF8
-    if (match_block.utf8)
+    if (md->utf8)
      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
        start_match++;
 #endif
@ -3797,23 +3885,23 @@ do
    {
    if (offsetcount >= 4)
      {
-      memcpy(offsets + 2, match_block.offset_vector + 2,
+      memcpy(offsets + 2, md->offset_vector + 2,
        (offsetcount - 2) * sizeof(int));
      DPRINTF(("Copied offsets from temporary memory\n"));
      }
-    if (match_block.end_offset_top > offsetcount)
-      match_block.offset_overflow = TRUE;
+    if (md->end_offset_top > offsetcount)
+      md->offset_overflow = TRUE;

    DPRINTF(("Freeing temporary memory\n"));
-    (pcre_free)(match_block.offset_vector);
+    (pcre_free)(md->offset_vector);
    }

-  rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
+  rc = md->offset_overflow? 0 : md->end_offset_top/2;

  if (offsetcount < 2) rc = 0; else
    {
-    offsets[0] = start_match - match_block.start_subject;
-    offsets[1] = match_block.end_match_ptr - match_block.start_subject;
+    offsets[0] = start_match - md->start_subject;
+    offsets[1] = md->end_match_ptr - md->start_subject;
    }

  DPRINTF((">>>> returning %d\n", rc));
@ -3827,10 +3915,10 @@ while (!anchored && start_match <= end_subject);
 if (using_temporary_offsets)
  {
  DPRINTF(("Freeing temporary memory\n"));
-  (pcre_free)(match_block.offset_vector);
+  (pcre_free)(md->offset_vector);
  }

-if (match_block.partial && match_block.hitend)
+if (md->partial && md->hitend)
  {
  DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
  return PCRE_ERROR_PARTIAL;
--- a/ext/pcre/pcrelib/pcre_get.c
+++ b/ext/pcre/pcrelib/pcre_get.c
@ -50,8 +50,8 @@ for these functions came from Scott Wimer. */
 *           Find number for named string         *
 *************************************************/

-/* This function is used by the two extraction functions below, as well
-as being generally available.
+/* This function is used by the get_first_set() function below, as well
+as being generally available. It assumes that names are unique.

 Arguments:
  code        the compiled regex
@ -93,6 +93,113 @@ return PCRE_ERROR_NOSUBSTRING;



+/*************************************************
+*     Find (multiple) entries for named string   *
+*************************************************/
+
+/* This is used by the get_first_set() function below, as well as being
+generally available. It is used when duplicated names are permitted.
+
+Arguments:
+  code        the compiled regex
+  stringname  the name whose entries required
+  firstptr    where to put the pointer to the first entry
+  lastptr     where to put the pointer to the last entry
+
+Returns:      the length of each entry, or a negative number
+                (PCRE_ERROR_NOSUBSTRING) if not found
+*/
+
+int
+pcre_get_stringtable_entries(const pcre *code, const char *stringname,
+  char **firstptr, char **lastptr)
+{
+int rc;
+int entrysize;
+int top, bot;
+uschar *nametable, *lastentry;
+
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+  return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+  return rc;
+if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+  return rc;
+
+lastentry = nametable + entrysize * (top - 1);
+bot = 0;
+while (top > bot)
+  {
+  int mid = (top + bot) / 2;
+  uschar *entry = nametable + entrysize*mid;
+  int c = strcmp(stringname, (char *)(entry + 2));
+  if (c == 0)
+    {
+    uschar *first = entry;
+    uschar *last = entry;
+    while (first > nametable)
+      {
+      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
+      first -= entrysize;
+      }
+    while (last < lastentry)
+      {
+      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
+      last += entrysize;
+      }
+    *firstptr = (char *)first;
+    *lastptr = (char *)last;
+    return entrysize;
+    }
+  if (c > 0) bot = mid + 1; else top = mid;
+  }
+
+return PCRE_ERROR_NOSUBSTRING;
+}
+
+
+
+/*************************************************
+*    Find first set of multiple named strings    *
+*************************************************/
+
+/* This function allows for duplicate names in the table of named substrings.
+It returns the number of the first one that was set in a pattern match.
+
+Arguments:
+  code         the compiled regex
+  stringname   the name of the capturing substring
+  ovector      the vector of matched substrings
+
+Returns:       the number of the first that is set,
+               or the number of the last one if none are set,
+               or a negative number on error
+*/
+
+static int
+get_first_set(const pcre *code, const char *stringname, int *ovector)
+{
+const real_pcre *re = (const real_pcre *)code;
+int entrysize;
+char *first, *last;
+uschar *entry;
+if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
+  return pcre_get_stringnumber(code, stringname);
+entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
+if (entrysize <= 0) return entrysize;
+for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
+  {
+  int n = (entry[0] << 8) + entry[1];
+  if (ovector[n*2] >= 0) return n;
+  }
+return (first[0] << 8) + first[1];
+}
+
+
+
+
 /*************************************************
 *      Copy captured string to given buffer      *
 *************************************************/
@ -142,7 +249,8 @@ return yield;
 *************************************************/

 /* This function copies a single captured substring into a given buffer,
-identifying it by name.
+identifying it by name. If the regex permits duplicate names, the first
+substring that is set is chosen.

 Arguments:
  code           the compiled regex
@ -168,7 +276,7 @@ int
 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
  int stringcount, const char *stringname, char *buffer, int size)
 {
-int n = pcre_get_stringnumber(code, stringname);
+int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
 }
@ -299,7 +407,8 @@ return yield;
 *************************************************/

 /* This function copies a single captured substring, identified by name, into
-new store.
+new store. If the regex permits duplicate names, the first substring that is
+set is chosen.

 Arguments:
  code           the compiled regex
@ -324,9 +433,10 @@ int
 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
  int stringcount, const char *stringname, const char **stringptr)
 {
-int n = pcre_get_stringnumber(code, stringname);
+int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
+
 }


--- a/ext/pcre/pcrelib/pcre_internal.h
+++ b/ext/pcre/pcrelib/pcre_internal.h
@ -118,6 +118,14 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */

 typedef unsigned char uschar;

+/* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The
+following macro is used to package up testing for newlines. NLBLOCK is defined
+in the various modules to indicate in which datablock the parameters exist. */
+
+#define IS_NEWLINE(p) \
+  ((p)[0] == NLBLOCK->nl[0] && \
+  (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))
+
 /* When PCRE is compiled as a C++ library, the subject pointer can be replaced
 with a custom type. This makes it possible, for example, to allow pcre_exec()
 to process subject strings that are discontinuous by using a smart pointer
@ -164,7 +172,7 @@ case in PCRE. */
 #if HAVE_BCOPY
 #define memmove(a, b, c) bcopy(b, a, c)
 #else  /* HAVE_BCOPY */
-void *
+static void *
 pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
 {
 size_t i;
@ -377,16 +385,17 @@ Standard C system should have one. */

 #define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)

-/* Private options flags start at the most significant end of the four bytes,
-but skip the top bit so we can use ints for convenience without getting tangled
-with negative values. The public options defined in pcre.h start at the least
-significant end. Make sure they don't overlap! */
+/* Private options flags start at the most significant end of the four bytes.
+The public options defined in pcre.h start at the least significant end. Make
+sure they don't overlap! The bits are getting a bit scarce now -- when we run
+out, there is a dummy word in the structure that could be used for the private
+bits. */

+#define PCRE_NOPARTIAL     0x80000000  /* can't use partial with this regex */
 #define PCRE_FIRSTSET      0x40000000  /* first_byte is set */
 #define PCRE_REQCHSET      0x20000000  /* req_byte is set */
 #define PCRE_STARTLINE     0x10000000  /* start after \n for multiline */
-#define PCRE_ICHANGED      0x08000000  /* i option changes within regex */
-#define PCRE_NOPARTIAL     0x04000000  /* can't use partial with this regex */
+#define PCRE_JCHANGED      0x08000000  /* j option changes within regex */

 /* Options for the "extra" block produced by pcre_study(). */

@ -398,15 +407,17 @@ time, run time, or study time, respectively. */
 #define PUBLIC_OPTIONS \
  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
-   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
+   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
+   PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)

 #define PUBLIC_EXEC_OPTIONS \
  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL)
+   PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)

 #define PUBLIC_DFA_EXEC_OPTIONS \
  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
+   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \
+   PCRE_NEWLINE_LF)

 #define PUBLIC_STUDY_OPTIONS 0   /* None defined */

@ -534,7 +545,7 @@ enum {
  OP_DOLL,           /* 20 End of line - varies with multiline switch */
  OP_CHAR,           /* 21 Match one character, casefully */
  OP_CHARNC,         /* 22 Match one character, caselessly */
-  OP_NOT,            /* 23 Match anything but the following char */
+  OP_NOT,            /* 23 Match one character, not the following one */

  OP_STAR,           /* 24 The maximizing and minimizing versions of */
  OP_MINSTAR,        /* 25 all these opcodes must come in pairs, with */
@ -714,7 +725,8 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
       ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
-       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
+       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
+       ERR50, ERR51 };

 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@ -778,6 +790,8 @@ typedef struct compile_data {
  unsigned int backref_map;     /* Bitmap of low back refs */
  int  req_varyopt;             /* "After variable item" flag for reqbyte */
  BOOL nopartial;               /* Set TRUE if partial won't work */
+  int  nllen;                   /* 1 or 2 for newline string length */
+  uschar nl[4];                 /* Newline string */
 } compile_data;

 /* Structure for maintaining a chain of pointers to the currently incomplete
@ -802,11 +816,11 @@ typedef struct recursion_info {

 /* When compiling in a mode that doesn't use recursive calls to match(),
 a structure is used to remember local variables on the heap. It is defined in
-pcre.c, close to the match() function, so that it is easy to keep it in step
-with any changes of local variable. However, the pointer to the current frame
-must be saved in some "static" place over a longjmp(). We declare the
-structure here so that we can put a pointer in the match_data structure.
-NOTE: This isn't used for a "normal" compilation of pcre. */
+pcre_exec.c, close to the match() function, so that it is easy to keep it in
+step with any changes of local variable. However, the pointer to the current
+frame must be saved in some "static" place over a longjmp(). We declare the
+structure here so that we can put a pointer in the match_data structure. NOTE:
+This isn't used for a "normal" compilation of pcre. */

 struct heapframe;

@ -820,6 +834,8 @@ typedef struct match_data {
  int   *offset_vector;         /* Offset vector */
  int    offset_end;            /* One past the end */
  int    offset_max;            /* The maximum usable for return data */
+  int    nllen;                 /* 1 or 2 for newline string length */
+  uschar nl[4];                 /* Newline string */
  const uschar *lcc;            /* Points to lower casing table */
  const uschar *ctypes;         /* Points to table of type maps */
  BOOL   offset_overflow;       /* Set if too many extractions */
@ -853,6 +869,8 @@ typedef struct dfa_match_data {
  const uschar *tables;         /* Character tables */
  int   moptions;               /* Match options */
  int   poptions;               /* Pattern options */
+  int    nllen;                 /* 1 or 2 for newline string length */
+  uschar nl[4];                 /* Newline string */
  void  *callout_data;          /* To pass back to callouts */
 } dfa_match_data;

@ -926,7 +944,7 @@ sense, but are not part of the PCRE public API. */
 extern int         _pcre_ord2utf8(int, uschar *);
 extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
                     const pcre_study_data *, pcre_study_data *);
-extern int         _pcre_ucp_findprop(const int, int *, int *);
+extern int         _pcre_ucp_findprop(const unsigned int, int *, int *);
 extern int         _pcre_ucp_othercase(const int);
 extern int         _pcre_valid_utf8(const uschar *, int);
 extern BOOL        _pcre_xclass(int, const uschar *);
--- a/ext/pcre/pcrelib/pcre_maketables.c
+++ b/ext/pcre/pcrelib/pcre_maketables.c
@ -130,7 +130,9 @@ for (i = 0; i < 256; i++)
  meta-character, which in this sense is any character that terminates a run
  of data characters. */

-  if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
+  if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
+  *p++ = x;
+  }

 return yield;
 }
--- a/ext/pcre/pcrelib/pcre_printint.src
+++ b/ext/pcre/pcrelib/pcre_printint.src
@ -111,9 +111,9 @@ for (i = _pcre_utt_size; i >= 0; i--)
  }
 return (i >= 0)? _pcre_utt[i].name : "??";
 #else
-ptype = ptype;    /* Avoid compiler warning */
-pvalue = pvalue;
-return "??";
+/* It gets harder and harder to shut off unwanted compiler warnings. */
+ptype = ptype * pvalue;
+return (ptype == pvalue)? "??" : "??";
 #endif
 }

@ -182,32 +182,26 @@ for(;;)
    break;

    case OP_CHAR:
+    fprintf(f, "    ");
+    do
      {
-      fprintf(f, "    ");
-      do
-        {
-        code++;
-        code += 1 + print_char(f, code, utf8);
-        }
-      while (*code == OP_CHAR);
-      fprintf(f, "\n");
-      continue;
+      code++;
+      code += 1 + print_char(f, code, utf8);
      }
-    break;
+    while (*code == OP_CHAR);
+    fprintf(f, "\n");
+    continue;

    case OP_CHARNC:
+    fprintf(f, " NC ");
+    do
      {
-      fprintf(f, " NC ");
-      do
-        {
-        code++;
-        code += 1 + print_char(f, code, utf8);
-        }
-      while (*code == OP_CHARNC);
-      fprintf(f, "\n");
-      continue;
+      code++;
+      code += 1 + print_char(f, code, utf8);
      }
-    break;
+    while (*code == OP_CHARNC);
+    fprintf(f, "\n");
+    continue;

    case OP_KETRMAX:
    case OP_KETRMIN:
--- a/ext/pcre/pcrelib/pcre_study.c
+++ b/ext/pcre/pcrelib/pcre_study.c
@ -95,6 +95,13 @@ set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
 {
 register int c;

+#if 0
+/* ========================================================================= */
+/* The following comment and code was inserted in January 1999. In May 2006,
+when it was observed to cause compiler warnings about unused values, I took it
+out again. If anybody is still using OS/2, they will have to put it back
+manually. */
+
 /* This next statement and the later reference to dummy are here in order to
 trick the optimizer of the IBM C compiler for OS/2 into generating correct
 code. Apparently IBM isn't going to fix the problem, and we would rather not
@ -102,6 +109,8 @@ disable optimization (in this module it actually makes a big difference, and
 the pcre module can use all the optimization it can get). */

 volatile int dummy;
+/* ========================================================================= */
+#endif

 do
  {
@ -159,7 +168,11 @@ do
      case OP_BRAMINZERO:
      if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
        return FALSE;
+/* =========================================================================
+      See the comment at the head of this function concerning the next line,
+      which was an old fudge for the benefit of OS/2.
      dummy = 1;
+  ========================================================================= */
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
      tcode += 1+LINK_SIZE;
      break;
@ -215,15 +228,29 @@ do
      try_next = FALSE;
      break;

+      /* The cbit_space table has vertical tab as whitespace; we have to
+      discard it. */
+
      case OP_NOT_WHITESPACE:
      for (c = 0; c < 32; c++)
-        start_bits[c] |= ~cd->cbits[c+cbit_space];
+        {
+        int d = cd->cbits[c+cbit_space];
+        if (c == 1) d &= ~0x08;
+        start_bits[c] |= ~d;
+        }
      try_next = FALSE;
      break;

+      /* The cbit_space table has vertical tab as whitespace; we have to
+      discard it. */
+
      case OP_WHITESPACE:
      for (c = 0; c < 32; c++)
-        start_bits[c] |= cd->cbits[c+cbit_space];
+        {
+        int d = cd->cbits[c+cbit_space];
+        if (c == 1) d &= ~0x08;
+        start_bits[c] |= d;
+        }
      try_next = FALSE;
      break;

@ -277,14 +304,28 @@ do
          start_bits[c] |= cd->cbits[c+cbit_digit];
        break;

+        /* The cbit_space table has vertical tab as whitespace; we have to
+        discard it. */
+
        case OP_NOT_WHITESPACE:
        for (c = 0; c < 32; c++)
-          start_bits[c] |= ~cd->cbits[c+cbit_space];
+          {
+          int d = cd->cbits[c+cbit_space];
+          if (c == 1) d &= ~0x08;
+          start_bits[c] |= ~d;
+          }
        break;

+        /* The cbit_space table has vertical tab as whitespace; we have to
+        discard it. */
+
        case OP_WHITESPACE:
        for (c = 0; c < 32; c++)
-          start_bits[c] |= cd->cbits[c+cbit_space];
+          {
+          int d = cd->cbits[c+cbit_space];
+          if (c == 1) d &= ~0x08;
+          start_bits[c] |= d;
+          }
        break;

        case OP_NOT_WORDCHAR:
@ -408,10 +449,9 @@ uschar start_bits[32];
 pcre_extra *extra;
 pcre_study_data *study;
 const uschar *tables;
-const real_pcre *re = (const real_pcre *)external_re;
-uschar *code = (uschar *)re + re->name_table_offset +
-  (re->name_count * re->name_entry_size);
+uschar *code;
 compile_data compile_block;
+const real_pcre *re = (const real_pcre *)external_re;

 *errorptr = NULL;

@ -427,6 +467,9 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
  return NULL;
  }

+code = (uschar *)re + re->name_table_offset +
+  (re->name_count * re->name_entry_size);
+
 /* For an anchored pattern, or an unanchored pattern that has a first char, or
 a multiline pattern that matches only at "line starts", no further processing
 at present. */
--- a/ext/pcre/pcrelib/pcre_try_flipped.c
+++ b/ext/pcre/pcrelib/pcre_try_flipped.c
@ -62,8 +62,8 @@ Arguments:
 Returns:       the flipped value
 */

-static long int
-byteflip(long int value, int n)
+static unsigned long int
+byteflip(unsigned long int value, int n)
 {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
 return ((value & 0x000000ff) << 24) |
--- a/ext/pcre/pcrelib/pcre_ucp_searchfuncs.c
+++ b/ext/pcre/pcrelib/pcre_ucp_searchfuncs.c
@ -79,7 +79,7 @@ Returns:      the character type category
 */

 int
-_pcre_ucp_findprop(const int c, int *type_ptr, int *script_ptr)
+_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
 {
 int bot = 0;
 int top = sizeof(ucp_table)/sizeof(cnode);
--- a/ext/pcre/pcrelib/pcrecpp.cc
+++ b/ext/pcre/pcrelib/pcrecpp.cc
@ -332,6 +332,30 @@ bool RE::Replace(const StringPiece& rewrite,
  return true;
 }

+// Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
+// Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
+static int NewlineMode(int pcre_options) {
+  // TODO: if we can make it threadsafe, cache this var
+  int newline_mode = 0;
+  /* if (newline_mode) return newline_mode; */  // do this once it's cached
+  if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
+    newline_mode = (pcre_options &
+                    (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
+  } else {
+    int newline;
+    pcre_config(PCRE_CONFIG_NEWLINE, &newline);
+    if (newline == 10)
+      newline_mode = PCRE_NEWLINE_LF;
+    else if (newline == 13)
+      newline_mode = PCRE_NEWLINE_CR;
+    else if (newline == 3338)
+      newline_mode = PCRE_NEWLINE_CRLF;
+    else
+      assert("" == "Unexpected return value from pcre_config(NEWLINE)");
+  }
+  return newline_mode;
+}
+
 int RE::GlobalReplace(const StringPiece& rewrite,
                      string *str) const {
  int count = 0;
@ -350,9 +374,27 @@ int RE::GlobalReplace(const StringPiece& rewrite,
    if (matchstart == matchend && matchstart == lastend) {
      // advance one character if we matched an empty string at the same
      // place as the last match occurred
-      if (start < static_cast<int>(str->length()))
-        out.push_back((*str)[start]);
-      start++;
+      matchend = start + 1;
+      // If the current char is CR and we're in CRLF mode, skip LF too.
+      // Note it's better to call pcre_fullinfo() than to examine
+      // all_options(), since options_ could have changed bewteen
+      // compile-time and now, but this is simpler and safe enough.
+      if (start+1 < static_cast<int>(str->length()) &&
+          (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
+          NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
+        matchend++;
+      }
+      // We also need to advance more than one char if we're in utf8 mode.
+#ifdef SUPPORT_UTF8
+      if (options_.utf8()) {
+        while (matchend < static_cast<int>(str->length()) &&
+               ((*str)[matchend] & 0xc0) == 0x80)
+          matchend++;
+      }
+#endif
+      if (matchend <= static_cast<int>(str->length()))
+        out.append(*str, start, matchend - start);
+      start = matchend;
    } else {
      out.append(*str, start, matchstart - start);
      Rewrite(&out, rewrite, *str, vec, matches);
--- a/ext/pcre/pcrelib/pcrecpp_unittest.cc
+++ b/ext/pcre/pcrelib/pcrecpp_unittest.cc
@ -32,6 +32,7 @@
 // TODO: Test extractions for PartialMatch/Consume

 #include <stdio.h>
+#include <cassert>
 #include <vector>
 #include "config.h"
 #include "pcrecpp.h"
@ -259,17 +260,71 @@ static void TestReplace() {
      "aaaaa",
      "bbaaaaa",
      "bbabbabbabbabbabb" },
+    { "b*",
+      "bb",
+      "aa\naa\n",
+      "bbaa\naa\n",
+      "bbabbabb\nbbabbabb\nbb" },
+    { "b*",
+      "bb",
+      "aa\raa\r",
+      "bbaa\raa\r",
+      "bbabbabb\rbbabbabb\rbb" },
+    { "b*",
+      "bb",
+      "aa\r\naa\r\n",
+      "bbaa\r\naa\r\n",
+      "bbabbabb\r\nbbabbabb\r\nbb" },
+#ifdef SUPPORT_UTF8
+    { "b*",
+      "bb",
+      "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
+      "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
+      "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" },
+    { "b*",
+      "bb",
+      "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
+      "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
+      ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
+       "bb\nbb""\xE3\x81\xB8""bb\r\nbb") },
+#endif
    { "", NULL, NULL, NULL, NULL }
  };

+#ifdef SUPPORT_UTF8
+  const bool support_utf8 = true;
+#else
+  const bool support_utf8 = false;
+#endif
+
  for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+    RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
+    assert(re.error().empty());
    string one(t->original);
-    CHECK(RE(t->regexp).Replace(t->rewrite, &one));
+    CHECK(re.Replace(t->rewrite, &one));
    CHECK_EQ(one, t->single);
    string all(t->original);
-    CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
+    CHECK(re.GlobalReplace(t->rewrite, &all) > 0);
    CHECK_EQ(all, t->global);
  }
+
+  // One final test: test \r\n replacement when we're not in CRLF mode
+  {
+    RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
+    assert(re.error().empty());
+    string all("aa\r\naa\r\n");
+    CHECK(re.GlobalReplace("bb", &all) > 0);
+    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+  }
+  {
+    RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
+    assert(re.error().empty());
+    string all("aa\r\naa\r\n");
+    CHECK(re.GlobalReplace("bb", &all) > 0);
+    CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
+  }
+  // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
+  //       Alas, the answer depends on how pcre was compiled.
 }

 static void TestExtract() {
--- a/ext/pcre/pcrelib/pcredemo.c
+++ b/ext/pcre/pcrelib/pcredemo.c
@ -117,7 +117,7 @@ if (rc < 0)
    */
    default: printf("Matching error %d\n", rc); break;
    }
-  free(re);     /* Release memory used for the compiled pattern */
+  pcre_free(re);     /* Release memory used for the compiled pattern */
  return 1;
  }

@ -223,8 +223,8 @@ if (namecount <= 0) printf("No named substrings\n"); else

 if (!find_all)
  {
-  free(re);   /* Release the memory used for the compiled pattern */
-  return 0;   /* Finish unless -g was given */
+  pcre_free(re);   /* Release the memory used for the compiled pattern */
+  return 0;        /* Finish unless -g was given */
  }

 /* Loop for second and subsequent matches */
@ -276,7 +276,7 @@ for (;;)
  if (rc < 0)
    {
    printf("Matching error %d\n", rc);
-    free(re);    /* Release memory used for the compiled pattern */
+    pcre_free(re);    /* Release memory used for the compiled pattern */
    return 1;
    }

@ -317,7 +317,7 @@ for (;;)
  }      /* End of loop to find second and subsequent matches */

 printf("\n");
-free(re);       /* Release memory used for the compiled pattern */
+pcre_free(re);       /* Release memory used for the compiled pattern */
 return 0;
 }

--- a/ext/pcre/pcrelib/pcregrep.c
+++ b/ext/pcre/pcrelib/pcregrep.c
@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.

 typedef int BOOL;

-#define VERSION "4.2 09-Jan-2006"
+#define VERSION "4.3 01-Jun-2006"
 #define MAX_PATTERN_COUNT 100

 #if BUFSIZ > 8192
@ -100,10 +100,14 @@ static const char *jfriedl_prefix = "";
 static const char *jfriedl_postfix = "";
 #endif

+static int  endlinebyte = '\n';     /* Last byte of endline sequence */
+static int  endlineextra = 0;       /* Extra bytes for endline sequence */
+
 static char *colour_string = (char *)"1;31";
 static char *colour_option = NULL;
 static char *dee_option = NULL;
 static char *DEE_option = NULL;
+static char *newline = NULL;
 static char *pattern_filename = NULL;
 static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;
@ -185,6 +189,7 @@ static option_item optionlist[] = {
  { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
  { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
  { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
+  { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
  { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
  { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
  { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
@ -493,8 +498,9 @@ if (after_context > 0 && lastmatchnumber > 0)
    char *pp = lastmatchrestart;
    if (printname != NULL) fprintf(stdout, "%s-", printname);
    if (number) fprintf(stdout, "%d-", lastmatchnumber++);
-    while (*pp != '\n') pp++;
-    fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
+    while (*pp != endlinebyte) pp++;
+    fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
+      stdout);
    lastmatchrestart = pp + 1;
    }
  hyphenpending = TRUE;
@ -566,7 +572,7 @@ while (ptr < endptr)
  that any match is constrained to be in the first line. */

  linelength = 0;
-  while (t < endptr && *t++ != '\n') linelength++;
+  while (t < endptr && *t++ != endlinebyte) linelength++;
  length = multiline? endptr - ptr : linelength;


@ -705,7 +711,7 @@ while (ptr < endptr)

        while (p < ptr && linecount < after_context)
          {
-          while (*p != '\n') p++;
+          while (*p != endlinebyte) p++;
          p++;
          linecount++;
          }
@ -719,8 +725,9 @@ while (ptr < endptr)
          char *pp = lastmatchrestart;
          if (printname != NULL) fprintf(stdout, "%s-", printname);
          if (number) fprintf(stdout, "%d-", lastmatchnumber++);
-          while (*pp != '\n') pp++;
-          fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
+          while (*pp != endlinebyte) pp++;
+          fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
+            (1 + endlineextra), stdout);
          lastmatchrestart = pp + 1;
          }
        if (lastmatchrestart != ptr) hyphenpending = TRUE;
@ -748,7 +755,7 @@ while (ptr < endptr)
          {
          linecount++;
          p--;
-          while (p > buffer && p[-1] != '\n') p--;
+          while (p > buffer && p[-1] != endlinebyte) p--;
          }

        if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
@ -759,8 +766,8 @@ while (ptr < endptr)
          char *pp = p;
          if (printname != NULL) fprintf(stdout, "%s-", printname);
          if (number) fprintf(stdout, "%d-", linenumber - linecount--);
-          while (*pp != '\n') pp++;
-          fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */
+          while (*pp != endlinebyte) pp++;
+          fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
          p = pp + 1;
          }
        }
@ -777,14 +784,14 @@ while (ptr < endptr)
      /* In multiline mode, we want to print to the end of the line in which
      the end of the matched string is found, so we adjust linelength and the
      line number appropriately. Because the PCRE_FIRSTLINE option is set, the
-      start of the match will always be before the first \n character. */
+      start of the match will always be before the first newline sequence. */

      if (multiline)
        {
        char *endmatch = ptr + offsets[1];
        t = ptr;
-        while (t < endmatch) { if (*t++ == '\n') linenumber++; }
-        while (endmatch < endptr && *endmatch != '\n') endmatch++;
+        while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
+        while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
        linelength = endmatch - ptr;
        }

@ -1206,7 +1213,7 @@ return FALSE;
 *************************************************/

 /* When the -F option has been used, each string may be a list of strings,
-separated by newlines. They will be matched literally.
+separated by line breaks. They will be matched literally.

 Arguments:
  pattern        the pattern string
@ -1227,10 +1234,10 @@ if ((process_options & PO_FIXED_STRINGS) != 0)
  char buffer[MBUFTHIRD];
  for(;;)
    {
-    char *p = strchr(pattern, '\n');
+    char *p = strchr(pattern, endlinebyte);
    if (p == NULL)
      return compile_single_pattern(pattern, options, filename, count);
-    sprintf(buffer, "%.*s", p - pattern, pattern);
+    sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
    pattern = p + 1;
    if (!compile_single_pattern(buffer, options, filename, count))
      return FALSE;
@ -1260,6 +1267,16 @@ char *patterns[MAX_PATTERN_COUNT];
 const char *locale_from = "--locale";
 const char *error;

+/* Set the default line ending value from the default in the PCRE library. */
+
+(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
+switch(i)
+  {
+  default:                 newline = (char *)"lf"; break;
+  case '\r':               newline = (char *)"cr"; break;
+  case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
+  }
+
 /* Process the options */

 for (i = 1; i < argc; i++)
@ -1543,6 +1560,28 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
    }
  }

+/* Interpret the newline type; the default settings are Unix-like. */
+
+if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
+  {
+  pcre_options |= PCRE_NEWLINE_CR;
+  endlinebyte = '\r';
+  }
+else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
+  {
+  pcre_options |= PCRE_NEWLINE_LF;
+  }
+else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
+  {
+  pcre_options |= PCRE_NEWLINE_CRLF;
+  endlineextra = 1;
+  }
+else
+  {
+  fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
+  return 2;
+  }
+
 /* Interpret the text values for -d and -D */

 if (dee_option != NULL)
--- a/ext/pcre/pcrelib/pcreposix.c
+++ b/ext/pcre/pcrelib/pcreposix.c
@ -77,7 +77,7 @@ static const int eint[] = {
  REG_ASSERT,  /* internal error: code overflow */
  REG_BADPAT,  /* unrecognized character after (?< */
  REG_BADPAT,  /* lookbehind assertion is not fixed length */
-  REG_BADPAT,  /* malformed number after (?( */
+  REG_BADPAT,  /* malformed number or name after (?( */
  REG_BADPAT,  /* conditional group containe more than two branches */
  REG_BADPAT,  /* assertion expected after (?( */
  REG_BADPAT,  /* (?R or (?digits must be followed by ) */
@ -94,11 +94,15 @@ static const int eint[] = {
  REG_BADPAT,  /* recursive call could loop indefinitely */
  REG_BADPAT,  /* unrecognized character after (?P */
  REG_BADPAT,  /* syntax error after (?P */
-  REG_BADPAT,  /* two named groups have the same name */
+  REG_BADPAT,  /* two named subpatterns have the same name */
  REG_BADPAT,  /* invalid UTF-8 string */
  REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
  REG_BADPAT,  /* malformed \P or \p sequence */
-  REG_BADPAT   /* unknown property name after \P or \p */
+  REG_BADPAT,  /* unknown property name after \P or \p */
+  REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
+  REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
+  REG_BADPAT,  /* repeated subpattern is too long */
+  REG_BADPAT   /* octal value is greater than \377 (not in UTF-8 mode) */
 };

 /* Table of texts corresponding to POSIX error codes */
--- a/ext/pcre/pcrelib/pcretest.c
+++ b/ext/pcre/pcrelib/pcretest.c
@ -44,6 +44,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <locale.h>
 #include <errno.h>

+#ifndef _WIN32
+#include <sys/resource.h>
+#endif
+
 #define PCRE_SPY        /* For Win32 build, import data, not export */

 /* We include pcre_internal.h because we need the internal info for displaying
@ -101,11 +105,6 @@ function (define NOINFOCHECK). */

 #define LOOPREPEAT 500000

-#define BUFFER_SIZE 30000
-#define PBUFFER_SIZE BUFFER_SIZE
-#define DBUFFER_SIZE BUFFER_SIZE
-
-
 /* Static variables */

 static FILE *outfile;
@ -119,10 +118,95 @@ static int show_malloc;
 static int use_utf8;
 static size_t gotten_store;

+/* The buffers grow automatically if very long input lines are encountered. */
+
+static int buffer_size = 50000;
+static uschar *buffer = NULL;
+static uschar *dbuffer = NULL;
 static uschar *pbuffer = NULL;



+/*************************************************
+*        Read or extend an input line            *
+*************************************************/
+
+/* Input lines are read into buffer, but both patterns and data lines can be
+continued over multiple input lines. In addition, if the buffer fills up, we
+want to automatically expand it so as to be able to handle extremely large
+lines that are needed for certain stress tests. When the input buffer is
+expanded, the other two buffers must also be expanded likewise, and the
+contents of pbuffer, which are a copy of the input for callouts, must be
+preserved (for when expansion happens for a data line). This is not the most
+optimal way of handling this, but hey, this is just a test program!
+
+Arguments:
+  f            the file to read
+  start        where in buffer to start (this *must* be within buffer)
+
+Returns:       pointer to the start of new data
+               could be a copy of start, or could be moved
+               NULL if no data read and EOF reached
+*/
+
+static uschar *
+extend_inputline(FILE *f, uschar *start)
+{
+uschar *here = start;
+
+for (;;)
+  {
+  int rlen = buffer_size - (here - buffer);
+  if (rlen > 1000)
+    {
+    int dlen;
+    if (fgets((char *)here, rlen,  f) == NULL)
+      return (here == start)? NULL : start;
+    dlen = (int)strlen((char *)here);
+    if (dlen > 0 && here[dlen - 1] == '\n') return start;
+    here += dlen;
+    }
+
+  else
+    {
+    int new_buffer_size = 2*buffer_size;
+    uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
+    uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
+    uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
+
+    if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
+      {
+      fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
+      exit(1);
+      }
+
+    memcpy(new_buffer, buffer, buffer_size);
+    memcpy(new_pbuffer, pbuffer, buffer_size);
+
+    buffer_size = new_buffer_size;
+
+    start = new_buffer + (start - buffer);
+    here = new_buffer + (here - buffer);
+
+    free(buffer);
+    free(dbuffer);
+    free(pbuffer);
+
+    buffer = new_buffer;
+    dbuffer = new_dbuffer;
+    pbuffer = new_pbuffer;
+    }
+  }
+
+return NULL;  /* Control never gets here */
+}
+
+
+
+
+
+
+
 /*************************************************
 *          Read number from string               *
 *************************************************/
@ -159,19 +243,19 @@ return(result);
 and returns the value of the character.

 Argument:
-  buffer   a pointer to the byte vector
-  vptr     a pointer to an int to receive the value
+  utf8bytes   a pointer to the byte vector
+  vptr        a pointer to an int to receive the value

-Returns:   >  0 => the number of bytes consumed
-           -6 to 0 => malformed UTF-8 character at offset = (-return)
+Returns:      >  0 => the number of bytes consumed
+              -6 to 0 => malformed UTF-8 character at offset = (-return)
 */

 #if !defined NOUTF8

 static int
-utf82ord(unsigned char *buffer, int *vptr)
+utf82ord(unsigned char *utf8bytes, int *vptr)
 {
-int c = *buffer++;
+int c = *utf8bytes++;
 int d = c;
 int i, j, s;

@ -191,7 +275,7 @@ d = (c & utf8_table3[i]) << s;

 for (j = 0; j < i; j++)
  {
-  c = *buffer++;
+  c = *utf8bytes++;
  if ((c & 0xc0) != 0x80) return -(j+1);
  s -= 6;
  d |= (c & 0x3f) << s;
@ -222,24 +306,24 @@ and encodes it as a UTF-8 character in 0 to 6 bytes.

 Arguments:
  cvalue     the character value
-  buffer     pointer to buffer for result - at least 6 bytes long
+  utf8bytes  pointer to buffer for result - at least 6 bytes long

 Returns:     number of characters placed in the buffer
 */

 static int
-ord2utf8(int cvalue, uschar *buffer)
+ord2utf8(int cvalue, uschar *utf8bytes)
 {
 register int i, j;
 for (i = 0; i < utf8_table1_size; i++)
  if (cvalue <= utf8_table1[i]) break;
-buffer += i;
+utf8bytes += i;
 for (j = i; j > 0; j--)
 {
- *buffer-- = 0x80 | (cvalue & 0x3f);
+ *utf8bytes-- = 0x80 | (cvalue & 0x3f);
 cvalue >>= 6;
 }
-*buffer = utf8_table2[i] | cvalue;
+*utf8bytes = utf8_table2[i] | cvalue;
 return i + 1;
 }

@ -461,8 +545,8 @@ if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
 *         Byte flipping function                 *
 *************************************************/

-static long int
-byteflip(long int value, int n)
+static unsigned long int
+byteflip(unsigned long int value, int n)
 {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
 return ((value & 0x000000ff) << 24) |
@ -525,6 +609,32 @@ return count;



+/*************************************************
+*         Check newline indicator                *
+*************************************************/
+
+/* This is used both at compile and run-time to check for <xxx> escapes, where
+xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
+
+Arguments:
+  p           points after the leading '<'
+  f           file for error message
+
+Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
+*/
+
+static int
+check_newline(uschar *p, FILE *f)
+{
+if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
+if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
+if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
+fprintf(f, "Unknown newline type at: <%s\n", p);
+return 0;
+}
+
+
+
 /*************************************************
 *                Main Program                    *
 *************************************************/
@ -553,16 +663,23 @@ int debug = 0;
 int done = 0;
 int all_use_dfa = 0;
 int yield = 0;
+int stack_size;

-unsigned char *buffer;
-unsigned char *dbuffer;
+/* These vectors store, end-to-end, a list of captured substring names. Assume
+that 1024 is plenty long enough for the few names we'll be testing. */
+
+uschar copynames[1024];
+uschar getnames[1024];
+
+uschar *copynamesptr;
+uschar *getnamesptr;

 /* Get buffers from malloc() so that Electric Fence will check their misuse
-when I am debugging. */
+when I am debugging. They grow automatically when very long lines are read. */

-buffer = (unsigned char *)malloc(BUFFER_SIZE);
-dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
-pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
+buffer = (unsigned char *)malloc(buffer_size);
+dbuffer = (unsigned char *)malloc(buffer_size);
+pbuffer = (unsigned char *)malloc(buffer_size);

 /* The outfile variable is static so that new_malloc can use it. The _setmode()
 stuff is some magic that I don't understand, but which apparently does good
@ -596,6 +713,28 @@ while (argc > 1 && argv[op][0] == '-')
    op++;
    argc--;
    }
+  else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
+      ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
+        *endptr == 0))
+    {
+#ifdef _WIN32
+    printf("PCRE: -S not supported on this OS\n");
+    exit(1);
+#else
+    int rc;
+    struct rlimit rlim;
+    getrlimit(RLIMIT_STACK, &rlim);
+    rlim.rlim_cur = stack_size * 1024 * 1024;
+    rc = setrlimit(RLIMIT_STACK, &rlim);
+    if (rc != 0)
+      {
+    printf("PCRE: setrlimit() failed with error %d\n", rc);
+    exit(1);
+      }
+    op++;
+    argc--;
+#endif
+    }
 #if !defined NOPOSIX
  else if (strcmp(argv[op], "-p") == 0) posix = 1;
 #endif
@ -609,7 +748,8 @@ while (argc > 1 && argv[op][0] == '-')
    (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
    printf("  %sUnicode properties support\n", rc? "" : "No ");
    (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
-    printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
+    printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
+      (rc == '\n')? "LF" : "CRLF");
    (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
    printf("  Internal link size = %d\n", rc);
    (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
@ -625,7 +765,7 @@ while (argc > 1 && argv[op][0] == '-')
  else
    {
    printf("** Unknown or malformed option %s\n", argv[op]);
-    printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
+    printf("Usage:   pcretest [options] [<input> [<output>]]\n");
    printf("  -C     show PCRE compile-time options and exit\n");
    printf("  -d     debug: show compiled code; implies -i\n");
 #if !defined NODFA
@ -637,6 +777,7 @@ while (argc > 1 && argv[op][0] == '-')
 #if !defined NOPOSIX
    printf("  -p     use POSIX interface\n");
 #endif
+    printf("  -S <n> set stack size to <n> megabytes\n");
    printf("  -s     output store (memory) used information\n"
           "  -t     time compilation and execution\n");
    yield = 1;
@ -723,7 +864,7 @@ while (!done)
  use_utf8 = 0;

  if (infile == stdin) printf("  re> ");
-  if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
+  if (extend_inputline(infile, buffer) == NULL) break;
  if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
  fflush(outfile);

@ -735,7 +876,7 @@ while (!done)

  if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
    {
-    unsigned long int magic;
+    unsigned long int magic, get_options;
    uschar sbuf[8];
    FILE *f;

@ -783,8 +924,8 @@ while (!done)

    /* Need to know if UTF-8 for printing data strings */

-    new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
-    use_utf8 = (options & PCRE_UTF8) != 0;
+    new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
+    use_utf8 = (get_options & PCRE_UTF8) != 0;

    /* Now see if there is any following study data */

@ -838,16 +979,8 @@ while (!done)
      pp++;
      }
    if (*pp != 0) break;
-
-    len = BUFFER_SIZE - (pp - buffer);
-    if (len < 256)
-      {
-      fprintf(outfile, "** Expression too long - missing delimiter?\n");
-      goto SKIP_DATA;
-      }
-
    if (infile == stdin) printf("    > ");
-    if (fgets((char *)pp, len, infile) == NULL)
+    if ((pp = extend_inputline(infile, pp)) == NULL)
      {
      fprintf(outfile, "** Unexpected EOF\n");
      done = 1;
@ -893,6 +1026,7 @@ while (!done)
      case 'F': do_flip = 1; break;
      case 'G': do_G = 1; break;
      case 'I': do_showinfo = 1; break;
+      case 'J': options |= PCRE_DUPNAMES; break;
      case 'M': log_store = 1; break;
      case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;

@ -927,6 +1061,15 @@ while (!done)
      *pp = 0;
      break;

+      case '<':
+        {
+        int x = check_newline(pp, outfile);
+        if (x == 0) goto SKIP_DATA;
+        options |= x;
+        while (*pp++ != '>');
+        }
+      break;
+
      case '\r':                      /* So that it works in Windows */
      case '\n':
      case ' ':
@ -961,7 +1104,7 @@ while (!done)

    if (rc != 0)
      {
-      (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
+      (void)regerror(rc, &preg, (char *)buffer, buffer_size);
      fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
      goto SKIP_DATA;
      }
@ -1002,7 +1145,7 @@ while (!done)
        {
        for (;;)
          {
-          if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
+          if (extend_inputline(infile, buffer) == NULL)
            {
            done = 1;
            goto CONTINUE;
@ -1163,13 +1306,13 @@ while (!done)
      if (do_flip)
        {
        all_options = byteflip(all_options, sizeof(all_options));
-        }
+         }

      if ((all_options & PCRE_NOPARTIAL) != 0)
        fprintf(outfile, "Partial matching not supported\n");

      if (get_options == 0) fprintf(outfile, "No options\n");
-        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
+        else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
          ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
          ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
          ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
@ -1181,14 +1324,30 @@ while (!done)
          ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
          ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
          ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
-          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
+          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
+          ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");

-      if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
-        fprintf(outfile, "Case state changes\n");
+      switch (get_options & PCRE_NEWLINE_CRLF)
+        {
+        case PCRE_NEWLINE_CR:
+        fprintf(outfile, "Forced newline sequence: CR\n");
+        break;
+
+        case PCRE_NEWLINE_LF:
+        fprintf(outfile, "Forced newline sequence: LF\n");
+        break;
+
+        case PCRE_NEWLINE_CRLF:
+        fprintf(outfile, "Forced newline sequence: CRLF\n");
+        break;
+
+        default:
+        break;
+        }

      if (first_char == -1)
        {
-        fprintf(outfile, "First char at start or follows \\n\n");
+        fprintf(outfile, "First char at start or follows newline\n");
        }
      else if (first_char < 0)
        {
@ -1343,6 +1502,12 @@ while (!done)

    options = 0;

+    *copynames = 0;
+    *getnames = 0;
+
+    copynamesptr = copynames;
+    getnamesptr = getnames;
+
    pcre_callout = callout;
    first_callout = 1;
    callout_extra = 0;
@ -1351,15 +1516,24 @@ while (!done)
    callout_fail_id = -1;
    show_malloc = 0;

-    if (infile == stdin) printf("data> ");
-    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
-      {
-      done = 1;
-      goto CONTINUE;
-      }
-    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
+    if (extra != NULL) extra->flags &=
+      ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
+
+    len = 0;
+    for (;;)
+      {
+      if (infile == stdin) printf("data> ");
+      if (extend_inputline(infile, buffer + len) == NULL)
+        {
+        if (len > 0) break;
+        done = 1;
+        goto CONTINUE;
+        }
+      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
+      len = (int)strlen((char *)buffer);
+      if (buffer[len-1] == '\n') break;
+      }

-    len = (int)strlen((char *)buffer);
    while (len > 0 && isspace(buffer[len-1])) len--;
    buffer[len] = 0;
    if (len == 0) break;
@ -1389,6 +1563,17 @@ while (!done)
        c -= '0';
        while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
          c = c * 8 + *p++ - '0';
+
+#if !defined NOUTF8
+        if (use_utf8 && c > 255)
+          {
+          unsigned char buff8[8];
+          int ii, utn;
+          utn = ord2utf8(c, buff8);
+          for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
+          c = buff8[ii];   /* Last byte */
+          }
+#endif
        break;

        case 'x':
@ -1450,14 +1635,14 @@ while (!done)
          }
        else if (isalnum(*p))
          {
-          uschar name[256];
-          uschar *npp = name;
+          uschar *npp = copynamesptr;
          while (isalnum(*p)) *npp++ = *p++;
+          *npp++ = 0;
          *npp = 0;
-          n = pcre_get_stringnumber(re, (char *)name);
+          n = pcre_get_stringnumber(re, (char *)copynamesptr);
          if (n < 0)
-            fprintf(outfile, "no parentheses with name \"%s\"\n", name);
-          else copystrings |= 1 << n;
+            fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
+          copynamesptr = npp;
          }
        else if (*p == '+')
          {
@ -1518,14 +1703,14 @@ while (!done)
          }
        else if (isalnum(*p))
          {
-          uschar name[256];
-          uschar *npp = name;
+          uschar *npp = getnamesptr;
          while (isalnum(*p)) *npp++ = *p++;
+          *npp++ = 0;
          *npp = 0;
-          n = pcre_get_stringnumber(re, (char *)name);
+          n = pcre_get_stringnumber(re, (char *)getnamesptr);
          if (n < 0)
-            fprintf(outfile, "no parentheses with name \"%s\"\n", name);
-          else getstrings |= 1 << n;
+            fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
+          getnamesptr = npp;
          }
        continue;

@ -1564,6 +1749,28 @@ while (!done)
        options |= PCRE_PARTIAL;
        continue;

+        case 'Q':
+        while(isdigit(*p)) n = n * 10 + *p++ - '0';
+        if (extra == NULL)
+          {
+          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
+          extra->flags = 0;
+          }
+        extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+        extra->match_limit_recursion = n;
+        continue;
+
+        case 'q':
+        while(isdigit(*p)) n = n * 10 + *p++ - '0';
+        if (extra == NULL)
+          {
+          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
+          extra->flags = 0;
+          }
+        extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
+        extra->match_limit = n;
+        continue;
+
 #if !defined NODFA
        case 'R':
        options |= PCRE_DFA_RESTART;
@ -1581,6 +1788,15 @@ while (!done)
        case '?':
        options |= PCRE_NO_UTF8_CHECK;
        continue;
+
+        case '<':
+          {
+          int x = check_newline(p, outfile);
+          if (x == 0) goto NEXT_DATA;
+          options |= x;
+          while (*p++ != '>');
+          }
+        continue;
        }
      *q++ = c;
      }
@ -1611,7 +1827,7 @@ while (!done)

      if (rc != 0)
        {
-        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
+        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
        fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
        }
      else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
@ -1690,7 +1906,7 @@ while (!done)
          extra->flags = 0;
          }

-        count = check_match_limit(re, extra, bptr, len, start_offset,
+        (void)check_match_limit(re, extra, bptr, len, start_offset,
          options|g_notempty, use_offsets, use_size_offsets,
          PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
          PCRE_ERROR_MATCHLIMIT, "match()");
@ -1778,7 +1994,7 @@ while (!done)
          {
          if ((copystrings & (1 << i)) != 0)
            {
-            char copybuffer[16];
+            char copybuffer[256];
            int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
              i, copybuffer, sizeof(copybuffer));
            if (rc < 0)
@ -1788,6 +2004,19 @@ while (!done)
            }
          }

+        for (copynamesptr = copynames;
+             *copynamesptr != 0;
+             copynamesptr += (int)strlen((char*)copynamesptr) + 1)
+          {
+          char copybuffer[256];
+          int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
+            count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
+          if (rc < 0)
+            fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
+          else
+            fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
+          }
+
        for (i = 0; i < 32; i++)
          {
          if ((getstrings & (1 << i)) != 0)
@ -1800,12 +2029,27 @@ while (!done)
            else
              {
              fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
-              /* free((void *)substring); */
              pcre_free_substring(substring);
              }
            }
          }

+        for (getnamesptr = getnames;
+             *getnamesptr != 0;
+             getnamesptr += (int)strlen((char*)getnamesptr) + 1)
+          {
+          const char *substring;
+          int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
+            count, (char *)getnamesptr, &substring);
+          if (rc < 0)
+            fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
+          else
+            {
+            fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
+            pcre_free_substring(substring);
+            }
+          }
+
        if (getlist)
          {
          const char **stringlist;
@ -1905,6 +2149,8 @@ while (!done)
        len -= use_offsets[1];
        }
      }  /* End of loop for /g and /G */
+
+    NEXT_DATA: continue;
    }    /* End of loop for data lines */

  CONTINUE:
--- a/ext/pcre/pcrelib/testdata/grepinputx
+++ b/ext/pcre/pcrelib/testdata/grepinputx
@ -39,4 +39,11 @@ eighteen
 nineteen
 twenty

+Here follows some CR/LF/CRLF test data.
+
+abc
+def
+ghi
+jkl
+
 This is the last line of this file.
--- a/ext/pcre/pcrelib/testdata/testinput1
+++ b/ext/pcre/pcrelib/testdata/testinput1
@ -1474,11 +1474,11 @@
 /(abc)\323/
    abc\xd3

-/(abc)\500/
+/(abc)\100/
    abc\x40
    abc\100

-/(abc)\5000/
+/(abc)\1000/
    abc\x400
    abc\x40\x30
    abc\1000
@ -3847,4 +3847,41 @@
    ** Failers 
    abcddefg

+/(?<![^f]oo)(bar)/
+    foobarX 
+    ** Failers 
+    boobarX
+
+/(?<![^f])X/
+    offX
+    ** Failers
+    onyX  
+
+/(?<=[^f])X/
+    onyX
+    ** Failers
+    offX 
+
+/^/mg
+    a\nb\nc\n
+    \ 
+    
+/(?<=C\n)^/mg
+    A\nC\nC\n 
+
+/(?:(?(1)a|b)(X))+/
+    bXaX
+
+/(?:(?(1)\1a|b)(X|Y))+/
+    bXXaYYaY
+    bXYaXXaX  
+
+/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
+    bXXaYYaY
+
+/[[,abc,]+]/
+    abc]
+    a,b]
+    [a,b,c]  
+
 / End of testinput1 /
--- a/ext/pcre/pcrelib/testdata/testinput2
+++ b/ext/pcre/pcrelib/testdata/testinput2
@ -733,7 +733,7 @@
    Ab
    AB        

-/[\200-\410]/
+/[\200-\110]/

 /^(?(0)f|b)oo/

@ -1490,4 +1490,157 @@

 /\x{0000ff}/

+/^((?P<A>a1)|(?P<A>a2)b)/
+
+/^((?P<A>a1)|(?P<A>a2)b)/J
+    a1b\CA
+    a2b\CA 
+    ** Failers
+    a1b\CZ\CA 
+    
+/^(?P<A>a)(?P<A>b)/J
+    ab\CA
+    
+/^(?P<A>a)(?P<A>b)|cd/J
+    ab\CA
+    cd\CA 
+  
+/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
+    cdefgh\CA 
+  
+/^((?P<A>a1)|(?P<A>a2)b)/J
+    a1b\GA
+    a2b\GA 
+    ** Failers
+    a1b\GZ\GA 
+    
+/^(?P<A>a)(?P<A>b)/J
+    ab\GA
+    
+/^(?P<A>a)(?P<A>b)|cd/J
+    ab\GA
+    cd\GA 
+  
+/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
+    cdefgh\GA 
+  
+/(?J)^((?P<A>a1)|(?P<A>a2)b)/
+    a1b\CA
+    a2b\CA 
+    
+/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<A>d)/
+
+/ In this next test, J is not set at the outer level; consequently it isn't
+set in the pattern's options; consequently pcre_get_named_substring() produces 
+a random value. /x
+
+/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<C>d)/
+    a bc d\CA\CB\CC
+
+/^(?P<A>a)?(?(A)a|b)/
+    aabc
+    bc
+    ** Failers
+    abc   
+
+/(?:(?(ZZ)a|b)(?P<ZZ>X))+/
+    bXaX
+
+/(?:(?(2y)a|b)(X))+/
+
+/(?:(?(ZA)a|b)(?P<ZZ>X))+/
+
+/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/
+    bbXaaX
+
+/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/
+    (b)\\Xa\\X
+
+/(?P<ABC/
+
+/(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
+    bXXaYYaY
+    bXYaXXaX  
+
+/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
+    bXXaYYaY
+
+/\777/
+
+/\s*,\s*/S
+    \x0b,\x0b
+    \x0c,\x0d 
+
+/^abc/m
+    xyz\nabc
+    xyz\nabc\<lf>
+    xyz\r\nabc\<lf>
+    xyz\rabc\<cr>
+    xyz\r\nabc\<crlf>
+    ** Failers 
+    xyz\nabc\<cr>
+    xyz\r\nabc\<cr>
+    xyz\nabc\<crlf>
+    xyz\rabc\<crlf>
+    xyz\rabc\<lf>
+    
+/abc$/m
+    xyzabc
+    xyzabc\n 
+    xyzabc\npqr 
+    xyzabc\r\<cr> 
+    xyzabc\rpqr\<cr> 
+    xyzabc\r\n\<crlf> 
+    xyzabc\r\npqr\<crlf> 
+    ** Failers
+    xyzabc\r 
+    xyzabc\rpqr 
+    xyzabc\r\n 
+    xyzabc\r\npqr 
+    
+/^abc/m<cr>
+    xyz\rabcdef
+    xyz\nabcdef\<lf>
+    ** Failers  
+    xyz\nabcdef
+       
+/^abc/m<lf>
+    xyz\nabcdef
+    xyz\rabcdef\<cr>
+    ** Failers  
+    xyz\rabcdef
+       
+/^abc/m<crlf>
+    xyz\r\nabcdef
+    xyz\rabcdef\<cr>
+    ** Failers  
+    xyz\rabcdef
+    
+/^abc/m<bad>
+
+/abc/
+    xyz\rabc\<bad>
+    abc  
+       
+/.*/
+    abc\ndef
+    abc\rdef
+    abc\r\ndef
+    \<cr>abc\ndef
+    \<cr>abc\rdef
+    \<cr>abc\r\ndef
+    \<crlf>abc\ndef
+    \<crlf>abc\rdef
+    \<crlf>abc\r\ndef
+
+/\w+(.)(.)?def/s
+    abc\ndef
+    abc\rdef
+    abc\r\ndef
+
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+
+   /* this is a C style comment */\M
+
+/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/
+
 / End of testinput2 /
--- a/ext/pcre/pcrelib/testdata/testinput4
+++ b/ext/pcre/pcrelib/testdata/testinput4
@ -510,7 +510,14 @@
 /^\x{85}$/8i
    \x{85}

+/^ሴ/8
+    ሴ 
+
 /^\ሴ/8
    ሴ 

+"(?s)(.{1,5})"8
+    abcdefg
+    ab
+
 / End of testinput4 /
--- a/ext/pcre/pcrelib/testdata/testinput5
+++ b/ext/pcre/pcrelib/testdata/testinput5
@ -265,4 +265,10 @@

 /^\ሴ/8D

+/\777/I
+
+/\777/8I
+  \x{1ff}
+  \777 
+
 / End of testinput5 /
--- a/ext/pcre/pcrelib/testdata/testinput6
+++ b/ext/pcre/pcrelib/testdata/testinput6
@ -738,4 +738,13 @@
     \x{1c5}XY
     AXY      

+/^(\p{Z}[^\p{C}\p{Z}]+)*$/
+    \xa0!
+
+/^[\pL](abc)(?1)/
+    AabcabcYZ    
+
+/([\pL]=(abc))*X/
+    L=abcX
+
 / End of testinput6 /
--- a/ext/pcre/pcrelib/testdata/testinput7
+++ b/ext/pcre/pcrelib/testdata/testinput7
@ -1909,11 +1909,11 @@
 /(abc)\323/
    abc\xd3

-/(abc)\500/
+/(abc)\100/
    abc\x40
    abc\100

-/(abc)\5000/
+/(abc)\1000/
    abc\x400
    abc\x40\x30
    abc\1000
@ -4019,4 +4019,100 @@
    123\P
    4\P\R

+/^/mg
+    a\nb\nc\n
+    \ 
+    
+/(?<=C\n)^/mg
+    A\nC\nC\n 
+
+/(?s)A?B/
+    AB
+    aB  
+
+/(?s)A*B/
+    AB
+    aB  
+
+/(?m)A?B/
+    AB
+    aB  
+
+/(?m)A*B/
+    AB
+    aB  
+
+/Content-Type\x3A[^\r\n]{6,}/
+    Content-Type:xxxxxyyy 
+
+/Content-Type\x3A[^\r\n]{6,}z/
+    Content-Type:xxxxxyyyz
+
+/Content-Type\x3A[^a]{6,}/
+    Content-Type:xxxyyy 
+
+/Content-Type\x3A[^a]{6,}z/
+    Content-Type:xxxyyyz
+
+/^abc/m
+    xyz\nabc
+    xyz\nabc\<lf>
+    xyz\r\nabc\<lf>
+    xyz\rabc\<cr>
+    xyz\r\nabc\<crlf>
+    ** Failers 
+    xyz\nabc\<cr>
+    xyz\r\nabc\<cr>
+    xyz\nabc\<crlf>
+    xyz\rabc\<crlf>
+    xyz\rabc\<lf>
+    
+/abc$/m
+    xyzabc
+    xyzabc\n 
+    xyzabc\npqr 
+    xyzabc\r\<cr> 
+    xyzabc\rpqr\<cr> 
+    xyzabc\r\n\<crlf> 
+    xyzabc\r\npqr\<crlf> 
+    ** Failers
+    xyzabc\r 
+    xyzabc\rpqr 
+    xyzabc\r\n 
+    xyzabc\r\npqr 
+    
+/^abc/m<cr>
+    xyz\rabcdef
+    xyz\nabcdef\<lf>
+    ** Failers  
+    xyz\nabcdef
+       
+/^abc/m<lf>
+    xyz\nabcdef
+    xyz\rabcdef\<cr>
+    ** Failers  
+    xyz\rabcdef
+       
+/^abc/m<crlf>
+    xyz\r\nabcdef
+    xyz\rabcdef\<cr>
+    ** Failers  
+    xyz\rabcdef
+    
+/.*/
+    abc\ndef
+    abc\rdef
+    abc\r\ndef
+    \<cr>abc\ndef
+    \<cr>abc\rdef
+    \<cr>abc\r\ndef
+    \<crlf>abc\ndef
+    \<crlf>abc\rdef
+    \<crlf>abc\r\ndef
+
+/\w+(.)(.)?def/s
+    abc\ndef
+    abc\rdef
+    abc\r\ndef
+
 / End of testinput7 /
--- a/ext/pcre/pcrelib/testdata/testoutput1
+++ b/ext/pcre/pcrelib/testdata/testoutput1
@ -2127,7 +2127,7 @@ No match
 0: abc\xd3
 1: abc

-/(abc)\500/
+/(abc)\100/
    abc\x40
 0: abc@
 1: abc
@ -2135,7 +2135,7 @@ No match
 0: abc@
 1: abc

-/(abc)\5000/
+/(abc)\1000/
    abc\x400
 0: abc@0
 1: abc
@ -6282,4 +6282,76 @@ No match
    abcddefg
 No match

+/(?<![^f]oo)(bar)/
+    foobarX 
+ 0: bar
+ 1: bar
+    ** Failers 
+No match
+    boobarX
+No match
+
+/(?<![^f])X/
+    offX
+ 0: X
+    ** Failers
+No match
+    onyX  
+No match
+
+/(?<=[^f])X/
+    onyX
+ 0: X
+    ** Failers
+No match
+    offX 
+No match
+
+/^/mg
+    a\nb\nc\n
+ 0: 
+ 0: 
+ 0: 
+    \ 
+ 0: 
+    
+/(?<=C\n)^/mg
+    A\nC\nC\n 
+ 0: 
+
+/(?:(?(1)a|b)(X))+/
+    bXaX
+ 0: bXaX
+ 1: X
+
+/(?:(?(1)\1a|b)(X|Y))+/
+    bXXaYYaY
+ 0: bXXaYYaY
+ 1: Y
+    bXYaXXaX  
+ 0: bX
+ 1: X
+
+/()()()()()()()()()(?:(?(10)\10a|b)(X|Y))+/
+    bXXaYYaY
+ 0: bX
+ 1: 
+ 2: 
+ 3: 
+ 4: 
+ 5: 
+ 6: 
+ 7: 
+ 8: 
+ 9: 
+10: X
+
+/[[,abc,]+]/
+    abc]
+ 0: abc]
+    a,b]
+ 0: a,b]
+    [a,b,c]  
+ 0: [a,b,c]
+
 / End of testinput1 /
--- a/ext/pcre/pcrelib/testdata/testoutput2
+++ b/ext/pcre/pcrelib/testdata/testoutput2
@ -115,14 +115,14 @@ Failed: unrecognized character after (? at offset 2
 Capturing subpattern count = 0
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'b'

 /.*?b/
 Capturing subpattern count = 0
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'b'

 /cat|dog|elephant/
@ -326,7 +326,7 @@ No need char
 Capturing subpattern count = 3
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 No need char
    defabc
 0: defabc
@ -517,7 +517,6 @@ No need char
 /(^b|(?i)^d)/
 Capturing subpattern count = 1
 Options: anchored
-Case state changes
 No first char
 No need char

@ -552,13 +551,13 @@ Starting byte set: b c x y
 /(^a|^b)/m
 Capturing subpattern count = 1
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 No need char

 /(?i)(^a|^b)/m
 Capturing subpattern count = 1
 Options: caseless multiline
-First char at start or follows \n
+First char at start or follows newline
 No need char

 /(a)(?(1)a|b|c)/
@ -568,13 +567,13 @@ Failed: conditional group contains more than two branches at offset 13
 Failed: conditional group contains more than two branches at offset 12

 /(?(1a)/
-Failed: malformed number after (?( at offset 4
+Failed: reference to non-existent subpattern at offset 6

 /(?(?i))/
 Failed: assertion expected after (?( at offset 3

 /(?(abc))/
-Failed: assertion expected after (?( at offset 3
+Failed: reference to non-existent subpattern at offset 7

 /(?(?<ab))/
 Failed: unrecognized character after (?< at offset 5
@ -592,7 +591,6 @@ Capturing subpattern count = 1
 Max back reference = 1
 Partial matching not supported
 No options
-Case state changes
 First char = 'b' (caseless)
 Need char = 'h' (caseless)

@ -609,7 +607,6 @@ Need char = 'h' (caseless)
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 No options
-Case state changes
 First char = 'b' (caseless)
 No need char
 Study returned NULL
@ -618,7 +615,6 @@ Study returned NULL
 Capturing subpattern count = 1
 Partial matching not supported
 No options
-Case state changes
 No first char
 No need char
 Starting byte set: C a b c d 
@ -664,7 +660,7 @@ No need char
 /^abc/m 
 Capturing subpattern count = 0
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'c'

 /^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/
@ -721,7 +717,7 @@ No match
 /^(?<=foo\n)bar/m
 Capturing subpattern count = 0
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'r'
    foo\nbarbar 
 0: bar
@ -737,7 +733,7 @@ No match
 /(?>^abc)/m
 Capturing subpattern count = 0
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'c'
    abc
 0: abc
@ -782,7 +778,6 @@ No match
 /(?<=ab(?i)x|y|z)/
 Capturing subpattern count = 0
 No options
-Case state changes
 No first char
 No need char

@ -790,7 +785,7 @@ No need char
 Capturing subpattern count = 2
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 No need char
    alphabetabcd
 0: alphabetabcd
@ -803,7 +798,6 @@ No need char
 /(?<=ab(?i)x(?-i)y|(?i)z|b)ZZ/
 Capturing subpattern count = 0
 No options
-Case state changes
 First char = 'Z'
 Need char = 'Z'
    abxyZZ
@ -966,7 +960,7 @@ Failed: unrecognized character after (? at offset 3
 Failed: unrecognized character after (? at offset 3

 /(?(1?)a|b)/
-Failed: malformed number after (?( at offset 4
+Failed: malformed number or name after (?( at offset 4

 /(?(1)a|b|c)/
 Failed: conditional group contains more than two branches at offset 10
@ -1021,7 +1015,7 @@ No need char
    abcdefghijklmnopqrstuvwxyz\C1
 0: abcdefghijklmnopqrst
 1: abcdefghijklmnopqrst
-copy substring 1 failed -6
+ 1C abcdefghijklmnopqrst (20)
    abcdefghijklmnopqrstuvwxyz\G1
 0: abcdefghijklmnopqrst
 1: abcdefghijklmnopqrst
@ -1054,7 +1048,7 @@ No need char
    abcdefghijklmnopqrstuvwxyz\C1\G1\L
 0: abcdefghijklmnop
 1: abcdefghijklmnop
-copy substring 1 failed -6
+ 1C abcdefghijklmnop (16)
 1G abcdefghijklmnop (16)
 0L abcdefghijklmnop
 1L abcdefghijklmnop
@ -1128,7 +1122,7 @@ Need char = 'd'
 Capturing subpattern count = 0
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'X'

 /.*X/Ds
@ -1161,7 +1155,7 @@ Need char = 'X'
 Capturing subpattern count = 1
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 No need char

 /(.*X|^B)/Ds
@ -1221,7 +1215,7 @@ No need char
 Capturing subpattern count = 0
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 No need char

 /\Biss\B/+
@ -1306,7 +1300,7 @@ No need char
 Capturing subpattern count = 0
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 Need char = 's'
    abciss\nxyzisspqr 
 0: abciss
@ -1365,7 +1359,7 @@ No need char
 /^ab\n/mg+
 Capturing subpattern count = 0
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 Need char = 10
    ab\nab\ncd
 0: ab\x0a
@ -2223,7 +2217,6 @@ No need char
 /((?-i)[[:lower:]])[[:lower:]]/i
 Capturing subpattern count = 1
 Options: caseless
-Case state changes
 No first char
 No need char
    ab
@ -2240,11 +2233,11 @@ No match
    AB        
 No match

-/[\200-\410]/
+/[\200-\110]/
 Failed: range out of order in character class at offset 9

 /^(?(0)f|b)oo/
-Failed: invalid condition (?(0) at offset 5
+Failed: invalid condition (?(0) at offset 6

 /This one's here because of the large output vector needed/
 Capturing subpattern count = 0
@ -2761,7 +2754,6 @@ No need char
 ------------------------------------------------------------------
 Capturing subpattern count = 0
 No options
-Case state changes
 First char = 'a'
 Need char = 'b' (caseless)
    ab
@ -2787,7 +2779,6 @@ No match
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 No options
-Case state changes
 First char = 'a'
 Need char = 'b' (caseless)
    ab
@ -3370,7 +3361,7 @@ No need char
 Capturing subpattern count = 1
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 No need char
    
 /(.*)\d+\1/Is
@ -3393,7 +3384,7 @@ Capturing subpattern count = 2
 Max back reference = 2
 Partial matching not supported
 No options
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'z'

 /((.*))\d+\1/I
@ -3430,7 +3421,6 @@ Need char = 'z' (caseless)
 /(?=abc)(?i).xyz/I
 Capturing subpattern count = 0
 No options
-Case state changes
 First char = 'a'
 Need char = 'z' (caseless)

@ -3553,7 +3543,7 @@ Need char = 'b'
 /^a/mI
 Capturing subpattern count = 0
 Options: multiline
-First char at start or follows \n
+First char at start or follows newline
 Need char = 'a'
  abcde
 0: a
@ -3580,7 +3570,6 @@ Starting byte set: A B a b
 /[ab](?i)cd/IS
 Capturing subpattern count = 0
 No options
-Case state changes
 No first char
 Need char = 'd' (caseless)
 Starting byte set: a b 
@ -4503,12 +4492,12 @@ No first char
 Need char = 'z'
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\M
 Minimum match() limit = 8
-Minimum match() recursion limit = 7
+Minimum match() recursion limit = 6
 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
  aaaaaaaaaaaaaz\M
 Minimum match() limit = 32768
-Minimum match() recursion limit = 43
+Minimum match() recursion limit = 42
 No match

 /(aaa(?C1)bbb|ab)/
@ -4555,18 +4544,19 @@ Need char = 'h'
 1: cd
 2: gh
 1C cd (2)
- 2G gh (2)
+  G gh (2) two
    abcdefgh\Cone\Ctwo
 0: abcdefgh
 1: cd
 2: gh
- 1C cd (2)
- 2C gh (2)
+  C cd (2) one
+  C gh (2) two
    abcdefgh\Cthree  
 no parentheses with name "three"
 0: abcdefgh
 1: cd
 2: gh
+copy substring three failed -7

 /(?P<Tes>)(?P<Test>)/D
 ------------------------------------------------------------------
@ -4616,18 +4606,18 @@ Need char = 'a'
 0: zzaa
 1: zz
 2: aa
- 1C zz (2)
+  C zz (2) Z
    zzaa\CA
 0: zzaa
 1: zz
 2: aa
- 2C aa (2)
+  C aa (2) A

 /(?P<x>eks)(?P<x>eccs)/
-Failed: two named groups have the same name at offset 16
+Failed: two named subpatterns have the same name at offset 16

 /(?P<abc>abc(?P<def>def)(?P<abc>xyz))/
-Failed: two named groups have the same name at offset 31
+Failed: two named subpatterns have the same name at offset 31

 "\[((?P<elem>\d+)(,(?P>elem))*)\]"
 Capturing subpattern count = 3
@ -5769,7 +5759,6 @@ Failed: number too big in {} quantifier at offset 15
 Capturing subpattern count = 1
 Max back reference = 1
 No options
-Case state changes
 First char = 'a' (caseless)
 Need char = 'B'
    abcdefghijklAkB
@ -6059,6 +6048,505 @@ No options
 First char = 255
 No need char

+/^((?P<A>a1)|(?P<A>a2)b)/
+Failed: two named subpatterns have the same name at offset 18
+
+/^((?P<A>a1)|(?P<A>a2)b)/J
+Capturing subpattern count = 3
+Named capturing subpatterns:
+  A   2
+  A   3
+Options: anchored dupnames
+No first char
+No need char
+    a1b\CA
+ 0: a1
+ 1: a1
+ 2: a1
+  C a1 (2) A
+    a2b\CA 
+ 0: a2b
+ 1: a2b
+ 2: <unset>
+ 3: a2
+  C a2 (2) A
+    ** Failers
+No match
+    a1b\CZ\CA 
+no parentheses with name "Z"
+ 0: a1
+ 1: a1
+ 2: a1
+copy substring Z failed -7
+  C a1 (2) A
+    
+/^(?P<A>a)(?P<A>b)/J
+Capturing subpattern count = 2
+Named capturing subpatterns:
+  A   1
+  A   2
+Options: anchored dupnames
+No first char
+No need char
+    ab\CA
+ 0: ab
+ 1: a
+ 2: b
+  C a (1) A
+    
+/^(?P<A>a)(?P<A>b)|cd/J
+Capturing subpattern count = 2
+Named capturing subpatterns:
+  A   1
+  A   2
+Options: dupnames
+No first char
+No need char
+    ab\CA
+ 0: ab
+ 1: a
+ 2: b
+  C a (1) A
+    cd\CA 
+ 0: cd
+copy substring A failed -7
+  
+/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
+Capturing subpattern count = 4
+Named capturing subpatterns:
+  A   1
+  A   2
+  A   3
+  A   4
+Options: dupnames
+No first char
+No need char
+    cdefgh\CA 
+ 0: cdefgh
+ 1: <unset>
+ 2: <unset>
+ 3: ef
+ 4: gh
+  C ef (2) A
+  
+/^((?P<A>a1)|(?P<A>a2)b)/J
+Capturing subpattern count = 3
+Named capturing subpatterns:
+  A   2
+  A   3
+Options: anchored dupnames
+No first char
+No need char
+    a1b\GA
+ 0: a1
+ 1: a1
+ 2: a1
+  G a1 (2) A
+    a2b\GA 
+ 0: a2b
+ 1: a2b
+ 2: <unset>
+ 3: a2
+  G a2 (2) A
+    ** Failers
+No match
+    a1b\GZ\GA 
+no parentheses with name "Z"
+ 0: a1
+ 1: a1
+ 2: a1
+copy substring Z failed -7
+  G a1 (2) A
+    
+/^(?P<A>a)(?P<A>b)/J
+Capturing subpattern count = 2
+Named capturing subpatterns:
+  A   1
+  A   2
+Options: anchored dupnames
+No first char
+No need char
+    ab\GA
+ 0: ab
+ 1: a
+ 2: b
+  G a (1) A
+    
+/^(?P<A>a)(?P<A>b)|cd/J
+Capturing subpattern count = 2
+Named capturing subpatterns:
+  A   1
+  A   2
+Options: dupnames
+No first char
+No need char
+    ab\GA
+ 0: ab
+ 1: a
+ 2: b
+  G a (1) A
+    cd\GA 
+ 0: cd
+copy substring A failed -7
+  
+/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/J
+Capturing subpattern count = 4
+Named capturing subpatterns:
+  A   1
+  A   2
+  A   3
+  A   4
+Options: dupnames
+No first char
+No need char
+    cdefgh\GA 
+ 0: cdefgh
+ 1: <unset>
+ 2: <unset>
+ 3: ef
+ 4: gh
+  G ef (2) A
+  
+/(?J)^((?P<A>a1)|(?P<A>a2)b)/
+Capturing subpattern count = 3
+Named capturing subpatterns:
+  A   2
+  A   3
+Options: anchored dupnames
+No first char
+No need char
+    a1b\CA
+ 0: a1
+ 1: a1
+ 2: a1
+  C a1 (2) A
+    a2b\CA 
+ 0: a2b
+ 1: a2b
+ 2: <unset>
+ 3: a2
+  C a2 (2) A
+    
+/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<A>d)/
+Failed: two named subpatterns have the same name at offset 38
+
+/ In this next test, J is not set at the outer level; consequently it isn't
+set in the pattern's options; consequently pcre_get_named_substring() produces 
+a random value. /x
+Capturing subpattern count = 1
+Options: extended
+First char = 'I'
+Need char = 'e'
+
+/^(?P<A>a) (?J:(?P<B>b)(?P<B>c)) (?P<C>d)/
+Capturing subpattern count = 4
+Named capturing subpatterns:
+  A   1
+  B   2
+  B   3
+  C   4
+Options: anchored
+No first char
+No need char
+    a bc d\CA\CB\CC
+ 0: a bc d
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+  C a (1) A
+  C b (1) B
+  C d (1) C
+
+/^(?P<A>a)?(?(A)a|b)/
+Capturing subpattern count = 1
+Named capturing subpatterns:
+  A   1
+Options: anchored
+No first char
+No need char
+    aabc
+ 0: aa
+ 1: a
+    bc
+ 0: b
+    ** Failers
+No match
+    abc   
+No match
+
+/(?:(?(ZZ)a|b)(?P<ZZ>X))+/
+Capturing subpattern count = 1
+Named capturing subpatterns:
+  ZZ   1
+No options
+No first char
+Need char = 'X'
+    bXaX
+ 0: bXaX
+ 1: X
+
+/(?:(?(2y)a|b)(X))+/
+Failed: reference to non-existent subpattern at offset 9
+
+/(?:(?(ZA)a|b)(?P<ZZ>X))+/
+Failed: reference to non-existent subpattern at offset 9
+
+/(?:(?(ZZ)a|b)(?(ZZ)a|b)(?P<ZZ>X))+/
+Capturing subpattern count = 1
+Named capturing subpatterns:
+  ZZ   1
+No options
+No first char
+Need char = 'X'
+    bbXaaX
+ 0: bbXaaX
+ 1: X
+
+/(?:(?(ZZ)a|\(b\))\\(?P<ZZ>X))+/
+Capturing subpattern count = 1
+Named capturing subpatterns:
+  ZZ   1
+No options
+No first char
+Need char = 'X'
+    (b)\\Xa\\X
+ 0: (b)\Xa\X
+ 1: X
+
+/(?P<ABC/
+Failed: syntax error after (?P at offset 7
+
+/(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
+Capturing subpattern count = 1
+Max back reference = 1
+Named capturing subpatterns:
+  A   1
+No options
+No first char
+No need char
+    bXXaYYaY
+ 0: bXXaYYaY
+ 1: Y
+    bXYaXXaX  
+ 0: bX
+ 1: X
+
+/()()()()()()()()()(?:(?(A)(?P=A)a|b)(?P<A>X|Y))+/
+Capturing subpattern count = 10
+Max back reference = 10
+Named capturing subpatterns:
+  A  10
+No options
+No first char
+No need char
+    bXXaYYaY
+ 0: bXXaYYaY
+ 1: 
+ 2: 
+ 3: 
+ 4: 
+ 5: 
+ 6: 
+ 7: 
+ 8: 
+ 9: 
+10: Y
+
+/\777/
+Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
+
+/\s*,\s*/S
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+No first char
+Need char = ','
+Starting byte set: \x09 \x0a \x0c \x0d \x20 , 
+    \x0b,\x0b
+ 0: ,
+    \x0c,\x0d 
+ 0: \x0c,\x0d
+
+/^abc/m
+Capturing subpattern count = 0
+Options: multiline
+First char at start or follows newline
+Need char = 'c'
+    xyz\nabc
+ 0: abc
+    xyz\nabc\<lf>
+ 0: abc
+    xyz\r\nabc\<lf>
+ 0: abc
+    xyz\rabc\<cr>
+ 0: abc
+    xyz\r\nabc\<crlf>
+ 0: abc
+    ** Failers 
+No match
+    xyz\nabc\<cr>
+No match
+    xyz\r\nabc\<cr>
+No match
+    xyz\nabc\<crlf>
+No match
+    xyz\rabc\<crlf>
+No match
+    xyz\rabc\<lf>
+No match
+    
+/abc$/m
+Capturing subpattern count = 0
+Options: multiline
+First char = 'a'
+Need char = 'c'
+    xyzabc
+ 0: abc
+    xyzabc\n 
+ 0: abc
+    xyzabc\npqr 
+ 0: abc
+    xyzabc\r\<cr> 
+ 0: abc
+    xyzabc\rpqr\<cr> 
+ 0: abc
+    xyzabc\r\n\<crlf> 
+ 0: abc
+    xyzabc\r\npqr\<crlf> 
+ 0: abc
+    ** Failers
+No match
+    xyzabc\r 
+No match
+    xyzabc\rpqr 
+No match
+    xyzabc\r\n 
+No match
+    xyzabc\r\npqr 
+No match
+    
+/^abc/m<cr>
+Capturing subpattern count = 0
+Options: multiline
+Forced newline sequence: CR
+First char at start or follows newline
+Need char = 'c'
+    xyz\rabcdef
+ 0: abc
+    xyz\nabcdef\<lf>
+ 0: abc
+    ** Failers  
+No match
+    xyz\nabcdef
+No match
+       
+/^abc/m<lf>
+Capturing subpattern count = 0
+Options: multiline
+Forced newline sequence: LF
+First char at start or follows newline
+Need char = 'c'
+    xyz\nabcdef
+ 0: abc
+    xyz\rabcdef\<cr>
+ 0: abc
+    ** Failers  
+No match
+    xyz\rabcdef
+No match
+       
+/^abc/m<crlf>
+Capturing subpattern count = 0
+Options: multiline
+Forced newline sequence: CRLF
+First char at start or follows newline
+Need char = 'c'
+    xyz\r\nabcdef
+ 0: abc
+    xyz\rabcdef\<cr>
+ 0: abc
+    ** Failers  
+No match
+    xyz\rabcdef
+No match
+    
+/^abc/m<bad>
+Unknown newline type at: <bad>
+
+
+/abc/
+Capturing subpattern count = 0
+No options
+First char = 'a'
+Need char = 'c'
+    xyz\rabc\<bad>
+Unknown newline type at: <bad>
+    abc  
+ 0: abc
+       
+/.*/
+Capturing subpattern count = 0
+Partial matching not supported
+No options
+First char at start or follows newline
+No need char
+    abc\ndef
+ 0: abc
+    abc\rdef
+ 0: abc\x0ddef
+    abc\r\ndef
+ 0: abc\x0d
+    \<cr>abc\ndef
+ 0: abc\x0adef
+    \<cr>abc\rdef
+ 0: abc
+    \<cr>abc\r\ndef
+ 0: abc
+    \<crlf>abc\ndef
+ 0: abc\x0adef
+    \<crlf>abc\rdef
+ 0: abc\x0ddef
+    \<crlf>abc\r\ndef
+ 0: abc
+
+/\w+(.)(.)?def/s
+Capturing subpattern count = 2
+Partial matching not supported
+Options: dotall
+No first char
+Need char = 'f'
+    abc\ndef
+ 0: abc\x0adef
+ 1: \x0a
+    abc\rdef
+ 0: abc\x0ddef
+ 1: \x0d
+    abc\r\ndef
+ 0: abc\x0d\x0adef
+ 1: \x0d
+ 2: \x0a
+
+((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)+
+Capturing subpattern count = 1
+Partial matching not supported
+No options
+No first char
+No need char
+   /* this is a C style comment */\M
+Minimum match() limit = 120
+Minimum match() recursion limit = 6
+ 0: /* this is a C style comment */
+ 1: /* this is a C style comment */
+
+/(?P<B>25[0-5]|2[0-4]\d|[01]?\d?\d)(?:\.(?P>B)){3}/
+Capturing subpattern count = 1
+Named capturing subpatterns:
+  B   1
+No options
+No first char
+Need char = '.'
+
 / End of testinput2 /
 Capturing subpattern count = 0
 No options
--- a/ext/pcre/pcrelib/testdata/testoutput4
+++ b/ext/pcre/pcrelib/testdata/testoutput4
@ -898,8 +898,20 @@ No match
    \x{85}
 0: \x{85}

+/^ሴ/8
+    ሴ 
+ 0: \x{1234}
+
 /^\ሴ/8
    ሴ 
 0: \x{1234}

+"(?s)(.{1,5})"8
+    abcdefg
+ 0: abcde
+ 1: abcde
+    ab
+ 0: ab
+ 1: ab
+
 / End of testinput4 /
--- a/ext/pcre/pcrelib/testdata/testoutput5
+++ b/ext/pcre/pcrelib/testdata/testoutput5
@ -1107,4 +1107,17 @@ Options: anchored utf8
 No first char
 No need char

+/\777/I
+Failed: octal value is greater than \377 (not in UTF-8 mode) at offset 3
+
+/\777/8I
+Capturing subpattern count = 0
+Options: utf8
+First char = 199
+Need char = 191
+  \x{1ff}
+ 0: \x{1ff}
+  \777 
+ 0: \x{1ff}
+
 / End of testinput5 /
--- a/ext/pcre/pcrelib/testdata/testoutput6
+++ b/ext/pcre/pcrelib/testdata/testoutput6
@ -1394,4 +1394,20 @@ No match
     AXY      
 No match

+/^(\p{Z}[^\p{C}\p{Z}]+)*$/
+    \xa0!
+ 0: \xa0!
+ 1: \xa0!
+
+/^[\pL](abc)(?1)/
+    AabcabcYZ    
+ 0: Aabcabc
+ 1: abc
+
+/([\pL]=(abc))*X/
+    L=abcX
+ 0: L=abcX
+ 1: L=abc
+ 2: abc
+
 / End of testinput6 /
--- a/ext/pcre/pcrelib/testdata/testoutput7
+++ b/ext/pcre/pcrelib/testdata/testoutput7
@ -3004,13 +3004,13 @@ No match
    abc\xd3
 0: abc\xd3

-/(abc)\500/
+/(abc)\100/
    abc\x40
 0: abc@
    abc\100
 0: abc@

-/(abc)\5000/
+/(abc)\1000/
    abc\x400
 0: abc@0
    abc\x40\x30
@ -6523,4 +6523,210 @@ Partial match: 123
    4\P\R
 0: 4

+/^/mg
+    a\nb\nc\n
+ 0: 
+ 0: 
+ 0: 
+    \ 
+ 0: 
+    
+/(?<=C\n)^/mg
+    A\nC\nC\n 
+ 0: 
+
+/(?s)A?B/
+    AB
+ 0: AB
+    aB  
+ 0: B
+
+/(?s)A*B/
+    AB
+ 0: AB
+    aB  
+ 0: B
+
+/(?m)A?B/
+    AB
+ 0: AB
+    aB  
+ 0: B
+
+/(?m)A*B/
+    AB
+ 0: AB
+    aB  
+ 0: B
+
+/Content-Type\x3A[^\r\n]{6,}/
+    Content-Type:xxxxxyyy 
+ 0: Content-Type:xxxxxyyy
+ 1: Content-Type:xxxxxyy
+ 2: Content-Type:xxxxxy
+
+/Content-Type\x3A[^\r\n]{6,}z/
+    Content-Type:xxxxxyyyz
+ 0: Content-Type:xxxxxyyyz
+
+/Content-Type\x3A[^a]{6,}/
+    Content-Type:xxxyyy 
+ 0: Content-Type:xxxyyy
+
+/Content-Type\x3A[^a]{6,}z/
+    Content-Type:xxxyyyz
+ 0: Content-Type:xxxyyyz
+
+/^abc/m
+    xyz\nabc
+ 0: abc
+    xyz\nabc\<lf>
+ 0: abc
+    xyz\r\nabc\<lf>
+ 0: abc
+    xyz\rabc\<cr>
+ 0: abc
+    xyz\r\nabc\<crlf>
+ 0: abc
+    ** Failers 
+No match
+    xyz\nabc\<cr>
+No match
+    xyz\r\nabc\<cr>
+No match
+    xyz\nabc\<crlf>
+No match
+    xyz\rabc\<crlf>
+No match
+    xyz\rabc\<lf>
+No match
+    
+/abc$/m
+    xyzabc
+ 0: abc
+    xyzabc\n 
+ 0: abc
+    xyzabc\npqr 
+ 0: abc
+    xyzabc\r\<cr> 
+ 0: abc
+    xyzabc\rpqr\<cr> 
+ 0: abc
+    xyzabc\r\n\<crlf> 
+ 0: abc
+    xyzabc\r\npqr\<crlf> 
+ 0: abc
+    ** Failers
+No match
+    xyzabc\r 
+No match
+    xyzabc\rpqr 
+No match
+    xyzabc\r\n 
+No match
+    xyzabc\r\npqr 
+No match
+    
+/^abc/m<cr>
+    xyz\rabcdef
+ 0: abc
+    xyz\nabcdef\<lf>
+ 0: abc
+    ** Failers  
+No match
+    xyz\nabcdef
+No match
+       
+/^abc/m<lf>
+    xyz\nabcdef
+ 0: abc
+    xyz\rabcdef\<cr>
+ 0: abc
+    ** Failers  
+No match
+    xyz\rabcdef
+No match
+       
+/^abc/m<crlf>
+    xyz\r\nabcdef
+ 0: abc
+    xyz\rabcdef\<cr>
+ 0: abc
+    ** Failers  
+No match
+    xyz\rabcdef
+No match
+    
+/.*/
+    abc\ndef
+ 0: abc
+ 1: ab
+ 2: a
+ 3: 
+    abc\rdef
+ 0: abc\x0ddef
+ 1: abc\x0dde
+ 2: abc\x0dd
+ 3: abc\x0d
+ 4: abc
+ 5: ab
+ 6: a
+ 7: 
+    abc\r\ndef
+ 0: abc\x0d
+ 1: abc
+ 2: ab
+ 3: a
+ 4: 
+    \<cr>abc\ndef
+ 0: abc\x0adef
+ 1: abc\x0ade
+ 2: abc\x0ad
+ 3: abc\x0a
+ 4: abc
+ 5: ab
+ 6: a
+ 7: 
+    \<cr>abc\rdef
+ 0: abc
+ 1: ab
+ 2: a
+ 3: 
+    \<cr>abc\r\ndef
+ 0: abc
+ 1: ab
+ 2: a
+ 3: 
+    \<crlf>abc\ndef
+ 0: abc\x0adef
+ 1: abc\x0ade
+ 2: abc\x0ad
+ 3: abc\x0a
+ 4: abc
+ 5: ab
+ 6: a
+ 7: 
+    \<crlf>abc\rdef
+ 0: abc\x0ddef
+ 1: abc\x0dde
+ 2: abc\x0dd
+ 3: abc\x0d
+ 4: abc
+ 5: ab
+ 6: a
+ 7: 
+    \<crlf>abc\r\ndef
+ 0: abc
+ 1: ab
+ 2: a
+ 3: 
+
+/\w+(.)(.)?def/s
+    abc\ndef
+ 0: abc\x0adef
+    abc\rdef
+ 0: abc\x0ddef
+    abc\r\ndef
+ 0: abc\x0d\x0adef
+
 / End of testinput7 /