Merge branch 'PHP-5.3' into PHP-5.4

* PHP-5.3: Fixed bug #63284 PCRE upgrade to 8.31
2024-12-02 22:34:55 +08:00 · 2012-10-19 09:51:58 +02:00 · 2012-10-19 09:51:58 +02:00 · d2fa182f25
commit d2fa182f25
parent bb51549915 276c5de0d8
59 changed files with 40225 additions and 29027 deletions
--- a/ext/pcre/config.w32
+++ b/ext/pcre/config.w32
@ -3,7 +3,7 @@

 EXTENSION("pcre", "php_pcre.c", false /* never shared */,
 		"-Iext/pcre/pcrelib");
-ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
+ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
 ADD_DEF_FILE("ext\\pcre\\php_pcre.def");

 AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
--- a/ext/pcre/config0.m4
+++ b/ext/pcre/config0.m4
@ -55,9 +55,9 @@ PHP_ARG_WITH(pcre-regex,,
    pcrelib_sources="pcrelib/pcre_chartables.c pcrelib/pcre_ucd.c \
    				 pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c \
    				 pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c \
-    				 pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
+    				 pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
    				 pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
-    				 pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c \
+    				 pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
    				 pcrelib/pcre_version.c pcrelib/pcre_xclass.c"
    PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,-I@ext_srcdir@/pcrelib)
    PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
--- a/ext/pcre/pcrelib/AUTHORS
+++ b/ext/pcre/pcrelib/AUTHORS
@ -8,16 +8,38 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2010 University of Cambridge
+Copyright (c) 1997-2012 University of Cambridge
 All rights reserved


+PCRE JUST-IN-TIME COMPILATION SUPPORT
+-------------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Emain domain:     freemail.hu
+
+Copyright(c) 2010-2012 Zoltan Herczeg
+All rights reserved.
+
+
+STACK-LESS JUST-IN-TIME COMPILER
+--------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Emain domain:     freemail.hu
+
+Copyright(c) 2009-2012 Zoltan Herczeg
+All rights reserved.
+
+
 THE C++ WRAPPER LIBRARY
 -----------------------

 Written by:       Google Inc.

-Copyright (c) 2007-2010 Google Inc
+Copyright (c) 2007-2012 Google Inc
 All rights reserved

 ####
--- a/ext/pcre/pcrelib/ChangeLog
+++ b/ext/pcre/pcrelib/ChangeLog
@ -1,6 +1,699 @@
 ChangeLog for PCRE
 ------------------

+Version 8.31 06-July-2012
+-------------------------
+
+1.  Fixing a wrong JIT test case and some compiler warnings.
+
+2.  Removed a bashism from the RunTest script.
+
+3.  Add a cast to pcre_exec.c to fix the warning "unary minus operator applied
+    to unsigned type, result still unsigned" that was given by an MS compiler
+    on encountering the code "-sizeof(xxx)".
+
+4.  Partial matching support is added to the JIT compiler.
+
+5.  Fixed several bugs concerned with partial matching of items that consist
+    of more than one character:
+
+    (a) /^(..)\1/ did not partially match "aba" because checking references was
+        done on an "all or nothing" basis. This also applied to repeated
+        references.
+
+    (b) \R did not give a hard partial match if \r was found at the end of the
+        subject.
+
+    (c) \X did not give a hard partial match after matching one or more
+        characters at the end of the subject.
+
+    (d) When newline was set to CRLF, a pattern such as /a$/ did not recognize
+        a partial match for the string "\r".
+
+    (e) When newline was set to CRLF, the metacharacter "." did not recognize
+        a partial match for a CR character at the end of the subject string.
+
+6.  If JIT is requested using /S++ or -s++ (instead of just /S+ or -s+) when
+    running pcretest, the text "(JIT)" added to the output whenever JIT is
+    actually used to run the match.
+
+7.  Individual JIT compile options can be set in pcretest by following -s+[+]
+    or /S+[+] with a digit between 1 and 7.
+
+8.  OP_NOT now supports any UTF character not just single-byte ones.
+
+9.  (*MARK) control verb is now supported by the JIT compiler.
+
+10. The command "./RunTest list" lists the available tests without actually
+    running any of them. (Because I keep forgetting what they all are.)
+
+11. Add PCRE_INFO_MAXLOOKBEHIND.
+
+12. Applied a (slightly modified) user-supplied patch that improves performance
+    when the heap is used for recursion (compiled with --disable-stack-for-
+    recursion). Instead of malloc and free for each heap frame each time a
+    logical recursion happens, frames are retained on a chain and re-used where
+    possible. This sometimes gives as much as 30% improvement.
+
+13. As documented, (*COMMIT) is now confined to within a recursive subpattern
+    call.
+
+14. As documented, (*COMMIT) is now confined to within a positive assertion.
+
+15. It is now possible to link pcretest with libedit as an alternative to
+    libreadline.
+
+16. (*COMMIT) control verb is now supported by the JIT compiler.
+
+17. The Unicode data tables have been updated to Unicode 6.1.0.
+
+18. Added --file-list option to pcregrep.
+
+19. Added binary file support to pcregrep, including the -a, --binary-files,
+    -I, and --text options.
+
+20. The madvise function is renamed for posix_madvise for QNX compatibility
+    reasons. Fixed by Giuseppe D'Angelo.
+
+21. Fixed a bug for backward assertions with REVERSE 0 in the JIT compiler.
+
+22. Changed the option for creating symbolic links for 16-bit man pages from
+    -s to -sf so that re-installing does not cause issues.
+
+23. Support PCRE_NO_START_OPTIMIZE in JIT as (*MARK) support requires it.
+
+24. Fixed a very old bug in pcretest that caused errors with restarted DFA
+    matches in certain environments (the workspace was not being correctly
+    retained). Also added to pcre_dfa_exec() a simple plausibility check on
+    some of the workspace data at the beginning of a restart.
+
+25. \s*\R was auto-possessifying the \s* when it should not, whereas \S*\R
+    was not doing so when it should - probably a typo introduced by SVN 528
+    (change 8.10/14).
+
+26. When PCRE_UCP was not set, \w+\x{c4} was incorrectly auto-possessifying the
+    \w+ when the character tables indicated that \x{c4} was a word character.
+    There were several related cases, all because the tests for doing a table
+    lookup were testing for characters less than 127 instead of 255.
+
+27. If a pattern contains capturing parentheses that are not used in a match,
+    their slots in the ovector are set to -1. For those that are higher than
+    any matched groups, this happens at the end of processing. In the case when
+    there were back references that the ovector was too small to contain
+    (causing temporary malloc'd memory to be used during matching), and the
+    highest capturing number was not used, memory off the end of the ovector
+    was incorrectly being set to -1. (It was using the size of the temporary
+    memory instead of the true size.)
+
+28. To catch bugs like 27 using valgrind, when pcretest is asked to specify an
+    ovector size, it uses memory at the end of the block that it has got.
+
+29. Check for an overlong MARK name and give an error at compile time. The
+    limit is 255 for the 8-bit library and 65535 for the 16-bit library.
+
+30. JIT compiler update.
+
+31. JIT is now supported on jailbroken iOS devices. Thanks for Ruiger
+    Rill for the patch.
+
+32. Put spaces around SLJIT_PRINT_D in the JIT compiler. Required by CXX11.
+
+33. Variable renamings in the PCRE-JIT compiler. No functionality change.
+
+34. Fixed typos in pcregrep: in two places there was SUPPORT_LIBZ2 instead of
+    SUPPORT_LIBBZ2. This caused a build problem when bzip2 but not gzip (zlib)
+    was enabled.
+
+35. Improve JIT code generation for greedy plus quantifier.
+
+36. When /((?:a?)*)*c/ or /((?>a?)*)*c/ was matched against "aac", it set group
+    1 to "aa" instead of to an empty string. The bug affected repeated groups
+    that could potentially match an empty string.
+
+37. Optimizing single character iterators in JIT.
+
+38. Wide characters specified with \uxxxx in JavaScript mode are now subject to
+    the same checks as \x{...} characters in non-JavaScript mode. Specifically,
+    codepoints that are too big for the mode are faulted, and in a UTF mode,
+    disallowed codepoints are also faulted.
+
+39. If PCRE was compiled with UTF support, in three places in the DFA
+    matcher there was code that should only have been obeyed in UTF mode, but
+    was being obeyed unconditionally. In 8-bit mode this could cause incorrect
+    processing when bytes with values greater than 127 were present. In 16-bit
+    mode the bug would be provoked by values in the range 0xfc00 to 0xdc00. In
+    both cases the values are those that cannot be the first data item in a UTF
+    character. The three items that might have provoked this were recursions,
+    possessively repeated groups, and atomic groups.
+
+40. Ensure that libpcre is explicitly listed in the link commands for pcretest
+    and pcregrep, because some OS require shared objects to be explicitly
+    passed to ld, causing the link step to fail if they are not.
+
+41. There were two incorrect #ifdefs in pcre_study.c, meaning that, in 16-bit
+    mode, patterns that started with \h* or \R* might be incorrectly matched.
+
+
+Version 8.30 04-February-2012
+-----------------------------
+
+1.  Renamed "isnumber" as "is_a_number" because in some Mac environments this
+    name is defined in ctype.h.
+
+2.  Fixed a bug in fixed-length calculation for lookbehinds that would show up
+    only in quite long subpatterns.
+
+3.  Removed the function pcre_info(), which has been obsolete and deprecated
+    since it was replaced by pcre_fullinfo() in February 2000.
+
+4.  For a non-anchored pattern, if (*SKIP) was given with a name that did not
+    match a (*MARK), and the match failed at the start of the subject, a
+    reference to memory before the start of the subject could occur. This bug
+    was introduced by fix 17 of release 8.21.
+
+5.  A reference to an unset group with zero minimum repetition was giving
+    totally wrong answers (in non-JavaScript-compatibility mode). For example,
+    /(another)?(\1?)test/ matched against "hello world test". This bug was
+    introduced in release 8.13.
+
+6.  Add support for 16-bit character strings (a large amount of work involving
+    many changes and refactorings).
+
+7.  RunGrepTest failed on msys because \r\n was replaced by whitespace when the
+    command "pattern=`printf 'xxx\r\njkl'`" was run. The pattern is now taken
+    from a file.
+
+8.  Ovector size of 2 is also supported by JIT based pcre_exec (the ovector size
+    rounding is not applied in this particular case).
+
+9.  The invalid Unicode surrogate codepoints U+D800 to U+DFFF are now rejected
+    if they appear, or are escaped, in patterns.
+
+10. Get rid of a number of -Wunused-but-set-variable warnings.
+
+11. The pattern /(?=(*:x))(q|)/ matches an empty string, and returns the mark
+    "x". The similar pattern /(?=(*:x))((*:y)q|)/ did not return a mark at all.
+    Oddly, Perl behaves the same way. PCRE has been fixed so that this pattern
+    also returns the mark "x". This bug applied to capturing parentheses,
+    non-capturing parentheses, and atomic parentheses. It also applied to some
+    assertions.
+
+12. Stephen Kelly's patch to CMakeLists.txt allows it to parse the version
+    information out of configure.ac instead of relying on pcre.h.generic, which
+    is not stored in the repository.
+
+13. Applied Dmitry V. Levin's patch for a more portable method for linking with
+    -lreadline.
+
+14. ZH added PCRE_CONFIG_JITTARGET; added its output to pcretest -C.
+
+15. Applied Graycode's patch to put the top-level frame on the stack rather
+    than the heap when not using the stack for recursion. This gives a
+    performance improvement in many cases when recursion is not deep.
+
+16. Experimental code added to "pcretest -C" to output the stack frame size.
+
+
+Version 8.21 12-Dec-2011
+------------------------
+
+1.  Updating the JIT compiler.
+
+2.  JIT compiler now supports OP_NCREF, OP_RREF and OP_NRREF. New test cases
+    are added as well.
+
+3.  Fix cache-flush issue on PowerPC (It is still an experimental JIT port).
+    PCRE_EXTRA_TABLES is not suported by JIT, and should be checked before
+    calling _pcre_jit_exec. Some extra comments are added.
+
+4.  (*MARK) settings inside atomic groups that do not contain any capturing
+    parentheses, for example, (?>a(*:m)), were not being passed out. This bug
+    was introduced by change 18 for 8.20.
+
+5.  Supporting of \x, \U and \u in JavaScript compatibility mode based on the
+    ECMA-262 standard.
+
+6.  Lookbehinds such as (?<=a{2}b) that contained a fixed repetition were
+    erroneously being rejected as "not fixed length" if PCRE_CASELESS was set.
+    This bug was probably introduced by change 9 of 8.13.
+
+7.  While fixing 6 above, I noticed that a number of other items were being
+    incorrectly rejected as "not fixed length". This arose partly because newer
+    opcodes had not been added to the fixed-length checking code. I have (a)
+    corrected the bug and added tests for these items, and (b) arranged for an
+    error to occur if an unknown opcode is encountered while checking for fixed
+    length instead of just assuming "not fixed length". The items that were
+    rejected were: (*ACCEPT), (*COMMIT), (*FAIL), (*MARK), (*PRUNE), (*SKIP),
+    (*THEN), \h, \H, \v, \V, and single character negative classes with fixed
+    repetitions, e.g. [^a]{3}, with and without PCRE_CASELESS.
+
+8.  A possessively repeated conditional subpattern such as (?(?=c)c|d)++ was
+    being incorrectly compiled and would have given unpredicatble results.
+
+9.  A possessively repeated subpattern with minimum repeat count greater than
+    one behaved incorrectly. For example, (A){2,}+ behaved as if it was
+    (A)(A)++ which meant that, after a subsequent mismatch, backtracking into
+    the first (A) could occur when it should not.
+
+10. Add a cast and remove a redundant test from the code.
+
+11. JIT should use pcre_malloc/pcre_free for allocation.
+
+12. Updated pcre-config so that it no longer shows -L/usr/lib, which seems
+    best practice nowadays, and helps with cross-compiling. (If the exec_prefix
+    is anything other than /usr, -L is still shown).
+
+13. In non-UTF-8 mode, \C is now supported in lookbehinds and DFA matching.
+
+14. Perl does not support \N without a following name in a [] class; PCRE now
+    also gives an error.
+
+15. If a forward reference was repeated with an upper limit of around 2000,
+    it caused the error "internal error: overran compiling workspace". The
+    maximum number of forward references (including repeats) was limited by the
+    internal workspace, and dependent on the LINK_SIZE. The code has been
+    rewritten so that the workspace expands (via pcre_malloc) if necessary, and
+    the default depends on LINK_SIZE. There is a new upper limit (for safety)
+    of around 200,000 forward references. While doing this, I also speeded up
+    the filling in of repeated forward references.
+
+16. A repeated forward reference in a pattern such as (a)(?2){2}(.) was
+    incorrectly expecting the subject to contain another "a" after the start.
+
+17. When (*SKIP:name) is activated without a corresponding (*MARK:name) earlier
+    in the match, the SKIP should be ignored. This was not happening; instead
+    the SKIP was being treated as NOMATCH. For patterns such as
+    /A(*MARK:A)A+(*SKIP:B)Z|AAC/ this meant that the AAC branch was never
+    tested.
+
+18. The behaviour of (*MARK), (*PRUNE), and (*THEN) has been reworked and is
+    now much more compatible with Perl, in particular in cases where the result
+    is a non-match for a non-anchored pattern. For example, if
+    /b(*:m)f|a(*:n)w/ is matched against "abc", the non-match returns the name
+    "m", where previously it did not return a name. A side effect of this
+    change is that for partial matches, the last encountered mark name is
+    returned, as for non matches. A number of tests that were previously not
+    Perl-compatible have been moved into the Perl-compatible test files. The
+    refactoring has had the pleasing side effect of removing one argument from
+    the match() function, thus reducing its stack requirements.
+
+19. If the /S+ option was used in pcretest to study a pattern using JIT,
+    subsequent uses of /S (without +) incorrectly behaved like /S+.
+
+21. Retrieve executable code size support for the JIT compiler and fixing
+    some warnings.
+
+22. A caseless match of a UTF-8 character whose other case uses fewer bytes did
+    not work when the shorter character appeared right at the end of the
+    subject string.
+
+23. Added some (int) casts to non-JIT modules to reduce warnings on 64-bit
+    systems.
+
+24. Added PCRE_INFO_JITSIZE to pass on the value from (21) above, and also
+    output it when the /M option is used in pcretest.
+
+25. The CheckMan script was not being included in the distribution. Also, added
+    an explicit "perl" to run Perl scripts from the PrepareRelease script
+    because this is reportedly needed in Windows.
+
+26. If study data was being save in a file and studying had not found a set of
+    "starts with" bytes for the pattern, the data written to the file (though
+    never used) was taken from uninitialized memory and so caused valgrind to
+    complain.
+
+27. Updated RunTest.bat as provided by Sheri Pierce.
+
+28. Fixed a possible uninitialized memory bug in pcre_jit_compile.c.
+
+29. Computation of memory usage for the table of capturing group names was
+    giving an unnecessarily large value.
+
+
+Version 8.20 21-Oct-2011
+------------------------
+
+1.  Change 37 of 8.13 broke patterns like [:a]...[b:] because it thought it had
+    a POSIX class. After further experiments with Perl, which convinced me that
+    Perl has bugs and confusions, a closing square bracket is no longer allowed
+    in a POSIX name. This bug also affected patterns with classes that started
+    with full stops.
+
+2.  If a pattern such as /(a)b|ac/ is matched against "ac", there is no
+    captured substring, but while checking the failing first alternative,
+    substring 1 is temporarily captured. If the output vector supplied to
+    pcre_exec() was not big enough for this capture, the yield of the function
+    was still zero ("insufficient space for captured substrings"). This cannot
+    be totally fixed without adding another stack variable, which seems a lot
+    of expense for a edge case. However, I have improved the situation in cases
+    such as /(a)(b)x|abc/ matched against "abc", where the return code
+    indicates that fewer than the maximum number of slots in the ovector have
+    been set.
+
+3.  Related to (2) above: when there are more back references in a pattern than
+    slots in the output vector, pcre_exec() uses temporary memory during
+    matching, and copies in the captures as far as possible afterwards. It was
+    using the entire output vector, but this conflicts with the specification
+    that only 2/3 is used for passing back captured substrings. Now it uses
+    only the first 2/3, for compatibility. This is, of course, another edge
+    case.
+
+4.  Zoltan Herczeg's just-in-time compiler support has been integrated into the
+    main code base, and can be used by building with --enable-jit. When this is
+    done, pcregrep automatically uses it unless --disable-pcregrep-jit or the
+    runtime --no-jit option is given.
+
+5.  When the number of matches in a pcre_dfa_exec() run exactly filled the
+    ovector, the return from the function was zero, implying that there were
+    other matches that did not fit. The correct "exactly full" value is now
+    returned.
+
+6.  If a subpattern that was called recursively or as a subroutine contained
+    (*PRUNE) or any other control that caused it to give a non-standard return,
+    invalid errors such as "Error -26 (nested recursion at the same subject
+    position)" or even infinite loops could occur.
+
+7.  If a pattern such as /a(*SKIP)c|b(*ACCEPT)|/ was studied, it stopped
+    computing the minimum length on reaching *ACCEPT, and so ended up with the
+    wrong value of 1 rather than 0. Further investigation indicates that
+    computing a minimum subject length in the presence of *ACCEPT is difficult
+    (think back references, subroutine calls), and so I have changed the code
+    so that no minimum is registered for a pattern that contains *ACCEPT.
+
+8.  If (*THEN) was present in the first (true) branch of a conditional group,
+    it was not handled as intended. [But see 16 below.]
+
+9.  Replaced RunTest.bat and CMakeLists.txt with improved versions provided by
+    Sheri Pierce.
+
+10. A pathological pattern such as /(*ACCEPT)a/ was miscompiled, thinking that
+    the first byte in a match must be "a".
+
+11. Change 17 for 8.13 increased the recursion depth for patterns like
+    /a(?:.)*?a/ drastically. I've improved things by remembering whether a
+    pattern contains any instances of (*THEN). If it does not, the old
+    optimizations are restored. It would be nice to do this on a per-group
+    basis, but at the moment that is not feasible.
+
+12. In some environments, the output of pcretest -C is CRLF terminated. This
+    broke RunTest's code that checks for the link size. A single white space
+    character after the value is now allowed for.
+
+13. RunTest now checks for the "fr" locale as well as for "fr_FR" and "french".
+    For "fr", it uses the Windows-specific input and output files.
+
+14. If (*THEN) appeared in a group that was called recursively or as a
+    subroutine, it did not work as intended. [But see next item.]
+
+15. Consider the pattern /A (B(*THEN)C) | D/ where A, B, C, and D are complex
+    pattern fragments (but not containing any | characters). If A and B are
+    matched, but there is a failure in C so that it backtracks to (*THEN), PCRE
+    was behaving differently to Perl. PCRE backtracked into A, but Perl goes to
+    D. In other words, Perl considers parentheses that do not contain any |
+    characters to be part of a surrounding alternative, whereas PCRE was
+    treading (B(*THEN)C) the same as (B(*THEN)C|(*FAIL)) -- which Perl handles
+    differently. PCRE now behaves in the same way as Perl, except in the case
+    of subroutine/recursion calls such as (?1) which have in any case always
+    been different (but PCRE had them first :-).
+
+16. Related to 15 above: Perl does not treat the | in a conditional group as
+    creating alternatives. Such a group is treated in the same way as an
+    ordinary group without any | characters when processing (*THEN). PCRE has
+    been changed to match Perl's behaviour.
+
+17. If a user had set PCREGREP_COLO(U)R to something other than 1:31, the
+    RunGrepTest script failed.
+
+18. Change 22 for version 13 caused atomic groups to use more stack. This is
+    inevitable for groups that contain captures, but it can lead to a lot of
+    stack use in large patterns. The old behaviour has been restored for atomic
+    groups that do not contain any capturing parentheses.
+
+19. If the PCRE_NO_START_OPTIMIZE option was set for pcre_compile(), it did not
+    suppress the check for a minimum subject length at run time. (If it was
+    given to pcre_exec() or pcre_dfa_exec() it did work.)
+
+20. Fixed an ASCII-dependent infelicity in pcretest that would have made it
+    fail to work when decoding hex characters in data strings in EBCDIC
+    environments.
+
+21. It appears that in at least one Mac OS environment, the isxdigit() function
+    is implemented as a macro that evaluates to its argument more than once,
+    contravening the C 90 Standard (I haven't checked a later standard). There
+    was an instance in pcretest which caused it to go wrong when processing
+    \x{...} escapes in subject strings. The has been rewritten to avoid using
+    things like p++ in the argument of isxdigit().
+
+
+Version 8.13 16-Aug-2011
+------------------------
+
+1.  The Unicode data tables have been updated to Unicode 6.0.0.
+
+2.  Two minor typos in pcre_internal.h have been fixed.
+
+3.  Added #include <string.h> to pcre_scanner_unittest.cc, pcrecpp.cc, and
+    pcrecpp_unittest.cc. They are needed for strcmp(), memset(), and strchr()
+    in some environments (e.g. Solaris 10/SPARC using Sun Studio 12U2).
+
+4.  There were a number of related bugs in the code for matching backrefences
+    caselessly in UTF-8 mode when codes for the characters concerned were
+    different numbers of bytes. For example, U+023A and U+2C65 are an upper
+    and lower case pair, using 2 and 3 bytes, respectively. The main bugs were:
+    (a) A reference to 3 copies of a 2-byte code matched only 2 of a 3-byte
+    code. (b) A reference to 2 copies of a 3-byte code would not match 2 of a
+    2-byte code at the end of the subject (it thought there wasn't enough data
+    left).
+
+5.  Comprehensive information about what went wrong is now returned by
+    pcre_exec() and pcre_dfa_exec() when the UTF-8 string check fails, as long
+    as the output vector has at least 2 elements. The offset of the start of
+    the failing character and a reason code are placed in the vector.
+
+6.  When the UTF-8 string check fails for pcre_compile(), the offset that is
+    now returned is for the first byte of the failing character, instead of the
+    last byte inspected. This is an incompatible change, but I hope it is small
+    enough not to be a problem. It makes the returned offset consistent with
+    pcre_exec() and pcre_dfa_exec().
+
+7.  pcretest now gives a text phrase as well as the error number when
+    pcre_exec() or pcre_dfa_exec() fails; if the error is a UTF-8 check
+    failure, the offset and reason code are output.
+
+8.  When \R was used with a maximizing quantifier it failed to skip backwards
+    over a \r\n pair if the subsequent match failed. Instead, it just skipped
+    back over a single character (\n). This seems wrong (because it treated the
+    two characters as a single entity when going forwards), conflicts with the
+    documentation that \R is equivalent to (?>\r\n|\n|...etc), and makes the
+    behaviour of \R* different to (\R)*, which also seems wrong. The behaviour
+    has been changed.
+
+9.  Some internal refactoring has changed the processing so that the handling
+    of the PCRE_CASELESS and PCRE_MULTILINE options is done entirely at compile
+    time (the PCRE_DOTALL option was changed this way some time ago: version
+    7.7 change 16). This has made it possible to abolish the OP_OPT op code,
+    which was always a bit of a fudge. It also means that there is one less
+    argument for the match() function, which reduces its stack requirements
+    slightly. This change also fixes an incompatibility with Perl: the pattern
+    (?i:([^b]))(?1) should not match "ab", but previously PCRE gave a match.
+
+10. More internal refactoring has drastically reduced the number of recursive
+    calls to match() for possessively repeated groups such as (abc)++ when
+    using pcre_exec().
+
+11. While implementing 10, a number of bugs in the handling of groups were
+    discovered and fixed:
+
+    (?<=(a)+) was not diagnosed as invalid (non-fixed-length lookbehind).
+    (a|)*(?1) gave a compile-time internal error.
+    ((a|)+)+  did not notice that the outer group could match an empty string.
+    (^a|^)+   was not marked as anchored.
+    (.*a|.*)+ was not marked as matching at start or after a newline.
+
+12. Yet more internal refactoring has removed another argument from the match()
+    function. Special calls to this function are now indicated by setting a
+    value in a variable in the "match data" data block.
+
+13. Be more explicit in pcre_study() instead of relying on "default" for
+    opcodes that mean there is no starting character; this means that when new
+    ones are added and accidentally left out of pcre_study(), testing should
+    pick them up.
+
+14. The -s option of pcretest has been documented for ages as being an old
+    synonym of -m (show memory usage). I have changed it to mean "force study
+    for every regex", that is, assume /S for every regex. This is similar to -i
+    and -d etc. It's slightly incompatible, but I'm hoping nobody is still
+    using it. It makes it easier to run collections of tests with and without
+    study enabled, and thereby test pcre_study() more easily. All the standard
+    tests are now run with and without -s (but some patterns can be marked as
+    "never study" - see 20 below).
+
+15. When (*ACCEPT) was used in a subpattern that was called recursively, the
+    restoration of the capturing data to the outer values was not happening
+    correctly.
+
+16. If a recursively called subpattern ended with (*ACCEPT) and matched an
+    empty string, and PCRE_NOTEMPTY was set, pcre_exec() thought the whole
+    pattern had matched an empty string, and so incorrectly returned a no
+    match.
+
+17. There was optimizing code for the last branch of non-capturing parentheses,
+    and also for the obeyed branch of a conditional subexpression, which used
+    tail recursion to cut down on stack usage. Unfortunately, now that there is
+    the possibility of (*THEN) occurring in these branches, tail recursion is
+    no longer possible because the return has to be checked for (*THEN). These
+    two optimizations have therefore been removed. [But see 8.20/11 above.]
+
+18. If a pattern containing \R was studied, it was assumed that \R always
+    matched two bytes, thus causing the minimum subject length to be
+    incorrectly computed because \R can also match just one byte.
+
+19. If a pattern containing (*ACCEPT) was studied, the minimum subject length
+    was incorrectly computed.
+
+20. If /S is present twice on a test pattern in pcretest input, it now
+    *disables* studying, thereby overriding the use of -s on the command line
+    (see 14 above). This is necessary for one or two tests to keep the output
+    identical in both cases.
+
+21. When (*ACCEPT) was used in an assertion that matched an empty string and
+    PCRE_NOTEMPTY was set, PCRE applied the non-empty test to the assertion.
+
+22. When an atomic group that contained a capturing parenthesis was
+    successfully matched, but the branch in which it appeared failed, the
+    capturing was not being forgotten if a higher numbered group was later
+    captured. For example, /(?>(a))b|(a)c/ when matching "ac" set capturing
+    group 1 to "a", when in fact it should be unset. This applied to multi-
+    branched capturing and non-capturing groups, repeated or not, and also to
+    positive assertions (capturing in negative assertions does not happen
+    in PCRE) and also to nested atomic groups.
+
+23. Add the ++ qualifier feature to pcretest, to show the remainder of the
+    subject after a captured substring, to make it easier to tell which of a
+    number of identical substrings has been captured.
+
+24. The way atomic groups are processed by pcre_exec() has been changed so that
+    if they are repeated, backtracking one repetition now resets captured
+    values correctly. For example, if ((?>(a+)b)+aabab) is matched against
+    "aaaabaaabaabab" the value of captured group 2 is now correctly recorded as
+    "aaa". Previously, it would have been "a". As part of this code
+    refactoring, the way recursive calls are handled has also been changed.
+
+25. If an assertion condition captured any substrings, they were not passed
+    back unless some other capturing happened later. For example, if
+    (?(?=(a))a) was matched against "a", no capturing was returned.
+
+26. When studying a pattern that contained subroutine calls or assertions,
+    the code for finding the minimum length of a possible match was handling
+    direct recursions such as (xxx(?1)|yyy) but not mutual recursions (where
+    group 1 called group 2 while simultaneously a separate group 2 called group
+    1). A stack overflow occurred in this case. I have fixed this by limiting
+    the recursion depth to 10.
+
+27. Updated RunTest.bat in the distribution to the version supplied by Tom
+    Fortmann. This supports explicit test numbers on the command line, and has
+    argument validation and error reporting.
+
+28. An instance of \X with an unlimited repeat could fail if at any point the
+    first character it looked at was a mark character.
+
+29. Some minor code refactoring concerning Unicode properties and scripts
+    should reduce the stack requirement of match() slightly.
+
+30. Added the '=' option to pcretest to check the setting of unused capturing
+    slots at the end of the pattern, which are documented as being -1, but are
+    not included in the return count.
+
+31. If \k was not followed by a braced, angle-bracketed, or quoted name, PCRE
+    compiled something random. Now it gives a compile-time error (as does
+    Perl).
+
+32. A *MARK encountered during the processing of a positive assertion is now
+    recorded and passed back (compatible with Perl).
+
+33. If --only-matching or --colour was set on a pcregrep call whose pattern
+    had alternative anchored branches, the search for a second match in a line
+    was done as if at the line start. Thus, for example, /^01|^02/ incorrectly
+    matched the line "0102" twice. The same bug affected patterns that started
+    with a backwards assertion. For example /\b01|\b02/ also matched "0102"
+    twice.
+
+34. Previously, PCRE did not allow quantification of assertions. However, Perl
+    does, and because of capturing effects, quantifying parenthesized
+    assertions may at times be useful. Quantifiers are now allowed for
+    parenthesized assertions.
+
+35. A minor code tidy in pcre_compile() when checking options for \R usage.
+
+36. \g was being checked for fancy things in a character class, when it should
+    just be a literal "g".
+
+37. PCRE was rejecting [:a[:digit:]] whereas Perl was not. It seems that the
+    appearance of a nested POSIX class supersedes an apparent external class.
+    For example, [:a[:digit:]b:] matches "a", "b", ":", or a digit. Also,
+    unescaped square brackets may also appear as part of class names. For
+    example, [:a[:abc]b:] gives unknown class "[:abc]b:]". PCRE now behaves
+    more like Perl. (But see 8.20/1 above.)
+
+38. PCRE was giving an error for \N with a braced quantifier such as {1,} (this
+    was because it thought it was \N{name}, which is not supported).
+
+39. Add minix to OS list not supporting the -S option in pcretest.
+
+40. PCRE tries to detect cases of infinite recursion at compile time, but it
+    cannot analyze patterns in sufficient detail to catch mutual recursions
+    such as ((?1))((?2)). There is now a runtime test that gives an error if a
+    subgroup is called recursively as a subpattern for a second time at the
+    same position in the subject string. In previous releases this might have
+    been caught by the recursion limit, or it might have run out of stack.
+
+41. A pattern such as /(?(R)a+|(?R)b)/ is quite safe, as the recursion can
+    happen only once. PCRE was, however incorrectly giving a compile time error
+    "recursive call could loop indefinitely" because it cannot analyze the
+    pattern in sufficient detail. The compile time test no longer happens when
+    PCRE is compiling a conditional subpattern, but actual runaway loops are
+    now caught at runtime (see 40 above).
+
+42. It seems that Perl allows any characters other than a closing parenthesis
+    to be part of the NAME in (*MARK:NAME) and other backtracking verbs. PCRE
+    has been changed to be the same.
+
+43. Updated configure.ac to put in more quoting round AC_LANG_PROGRAM etc. so
+    as not to get warnings when autogen.sh is called. Also changed
+    AC_PROG_LIBTOOL (deprecated) to LT_INIT (the current macro).
+
+44. To help people who use pcregrep to scan files containing exceedingly long
+    lines, the following changes have been made:
+
+    (a) The default value of the buffer size parameter has been increased from
+        8K to 20K. (The actual buffer used is three times this size.)
+
+    (b) The default can be changed by ./configure --with-pcregrep-bufsize when
+        PCRE is built.
+
+    (c) A --buffer-size=n option has been added to pcregrep, to allow the size
+        to be set at run time.
+
+    (d) Numerical values in pcregrep options can be followed by K or M, for
+        example --buffer-size=50K.
+
+    (e) If a line being scanned overflows pcregrep's buffer, an error is now
+        given and the return code is set to 2.
+
+45. Add a pointer to the latest mark to the callout data block.
+
+46. The pattern /.(*F)/, when applied to "abc" with PCRE_PARTIAL_HARD, gave a
+    partial match of an empty string instead of no match. This was specific to
+    the use of ".".
+
+47. The pattern /f.*/8s, when applied to "for" with PCRE_PARTIAL_HARD, gave a
+    complete match instead of a partial match. This bug was dependent on both
+    the PCRE_UTF8 and PCRE_DOTALL options being set.
+
+48. For a pattern such as /\babc|\bdef/ pcre_study() was failing to set up the
+    starting byte set, because \b was not being ignored.
+
+
 Version 8.12 15-Jan-2011
 ------------------------

--- a/ext/pcre/pcrelib/HACKING
+++ b/ext/pcre/pcrelib/HACKING
@ -2,7 +2,8 @@ Technical Notes about PCRE
 --------------------------

 These are very rough technical notes that record potentially useful information 
-about PCRE internals.
+about PCRE internals. For information about testing PCRE, see the pcretest 
+documentation and the comment at the head of the RunTest file.


 Historical note 1
@ -48,6 +49,18 @@ complexity in Perl regular expressions, I couldn't do this. In any case, a
 first pass through the pattern is helpful for other reasons. 


+Support for 16-bit data strings
+-------------------------------
+
+From release 8.30, PCRE supports 16-bit as well as 8-bit data strings, by being 
+compilable in either 8-bit or 16-bit modes, or both. Thus, two different 
+libraries can be created. In the description that follows, the word "short" is 
+used for a 16-bit data quantity, and the word "unit" is used for a quantity
+that is a byte in 8-bit mode and a short in 16-bit mode. However, so as not to
+over-complicate the text, the names of PCRE functions are given in 8-bit form
+only.
+
+
 Computing the memory requirement: how it was
 --------------------------------------------

@ -68,7 +81,7 @@ things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
 I had a flash of inspiration as to how I could run the real compile function in
 a "fake" mode that enables it to compute how much memory it would need, while
 actually only ever using a few hundred bytes of working memory, and without too
-many tests of the mode that might slow it down. So I re-factored the compiling
+many tests of the mode that might slow it down. So I refactored the compiling
 functions to work this way. This got rid of about 600 lines of source. It
 should make future maintenance and development easier. As this was such a major 
 change, I never released 6.8, instead upping the number to 7.0 (other quite 
@ -88,7 +101,10 @@ The "traditional", and original, matching function is called pcre_exec(), and
 it implements an NFA algorithm, similar to the original Henry Spencer algorithm 
 and the way that Perl works. This is not surprising, since it is intended to be
 as compatible with Perl as possible. This is the function most users of PCRE
-will use most of the time.
+will use most of the time. From release 8.20, if PCRE is compiled with 
+just-in-time (JIT) support, and studying a compiled pattern with JIT is 
+successful, the JIT code is run instead of the normal pcre_exec() code, but the 
+result is the same.


 Supplementary matching function
@ -108,28 +124,38 @@ needed at compile time to produce a traditional FSM where only one state is
 ever active at once. I believe some other regex matchers work this way.


+Changeable options
+------------------
+
+The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
+change in the middle of patterns. From PCRE 8.13, their processing is handled
+entirely at compile time by generating different opcodes for the different
+settings. The runtime functions do not need to keep track of an options state 
+any more.
+
+
 Format of compiled patterns
 ---------------------------

-The compiled form of a pattern is a vector of bytes, containing items of
-variable length. The first byte in an item is an opcode, and the length of the
-item is either implicit in the opcode or contained in the data bytes that
-follow it. 
+The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
+shorts in 16-bit mode), containing items of variable length. The first unit in
+an item contains an opcode, and the length of the item is either implicit in
+the opcode or contained in the data that follows it.

-In many cases below LINK_SIZE data values are specified for offsets within the 
-compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be
-compiled to use 3-byte or 4-byte values for these offsets (impairing the
-performance). This is necessary only when patterns whose compiled length is
-greater than 64K are going to be processed. In this description, we assume the
-"normal" compilation options. Data values that are counts (e.g. for
-quantifiers) are always just two bytes long.
-
-A list of the opcodes follows:
+In many cases listed below, LINK_SIZE data values are specified for offsets
+within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
+default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
+4-byte values for these offsets, although this impairs the performance. (3-byte
+LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
+larger than 2 is necessary only when patterns whose compiled length is greater
+than 64K are going to be processed. In this description, we assume the "normal"
+compilation options. Data values that are counts (e.g. for quantifiers) are
+always just two bytes long (one short in 16-bit mode).

 Opcodes with no following data
 ------------------------------

-These items are all just one byte long
+These items are all just one unit long

  OP_END                 end of pattern
  OP_ANY                 match any one character other than newline
@ -138,7 +164,8 @@ These items are all just one byte long
  OP_SOD                 match start of data: \A
  OP_SOM,                start of match (subject + offset): \G
  OP_SET_SOM,            set start of match (\K) 
-  OP_CIRC                ^ (start of data, or after \n in multiline)
+  OP_CIRC                ^ (start of data)
+  OP_CIRCM               ^ multiline mode (start of data or after newline)
  OP_NOT_WORD_BOUNDARY   \W
  OP_WORD_BOUNDARY       \w
  OP_NOT_DIGIT           \D
@ -153,7 +180,8 @@ These items are all just one byte long
  OP_WORDCHAR            \w
  OP_EODN                match end of data or \n at end: \Z
  OP_EOD                 match end of data: \z
-  OP_DOLL                $ (end of data, or before \n in multiline)
+  OP_DOLL                $ (end of data, or before final newline)
+  OP_DOLLM               $ multiline mode (end of data or before newline)
  OP_EXTUNI              match an extended Unicode character 
  OP_ANYNL               match any Unicode newline sequence 
  
@ -164,49 +192,57 @@ These items are all just one byte long
  OP_SKIP                ) indicating which parentheses must be closed.
  

-Backtracking control verbs with data
------------------------------------
- 
-OP_THEN is followed by a LINK_SIZE offset, which is the distance back to the
-start of the current branch.
+Backtracking control verbs with (optional) data
+-----------------------------------------------

-OP_MARK is followed by the mark name, preceded by a one-byte length, and 
-followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments, 
-the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used. For the first 
-two, the name follows immediately; for OP_THEN_ARG, it follows the LINK_SIZE 
-offset value.
+(*THEN) without an argument generates the opcode OP_THEN and no following data.
+OP_MARK is followed by the mark name, preceded by a one-unit length, and
+followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
+the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
+following in the same format.
  

+Matching literal characters
+---------------------------
+
+The OP_CHAR opcode is followed by a single character that is to be matched 
+casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
+the character may be more than one unit long.
+
+
 Repeating single characters
 ---------------------------

-The common repeats (*, +, ?) when applied to a single character use the
-following opcodes:
+The common repeats (*, +, ?), when applied to a single character, use the
+following opcodes, which come in caseful and caseless versions:

-  OP_STAR
-  OP_MINSTAR
-  OP_POSSTAR 
-  OP_PLUS
-  OP_MINPLUS
-  OP_POSPLUS 
-  OP_QUERY
-  OP_MINQUERY
-  OP_POSQUERY 
+  Caseful         Caseless
+  OP_STAR         OP_STARI      
+  OP_MINSTAR      OP_MINSTARI   
+  OP_POSSTAR      OP_POSSTARI   
+  OP_PLUS         OP_PLUSI      
+  OP_MINPLUS      OP_MINPLUSI   
+  OP_POSPLUS      OP_POSPLUSI   
+  OP_QUERY        OP_QUERYI     
+  OP_MINQUERY     OP_MINQUERYI  
+  OP_POSQUERY     OP_POSQUERYI  

-In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
-Those with "MIN" in their name are the minimizing versions. Those with "POS" in 
-their names are possessive versions. Each is followed by the character that is
-to be repeated. Other repeats make use of
+Each opcode is followed by the character that is to be repeated. In ASCII mode,
+these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable.
+Those with "MIN" in their names are the minimizing versions. Those with "POS"
+in their names are possessive versions. Other repeats make use of these
+opcodes:

-  OP_UPTO
-  OP_MINUPTO
-  OP_POSUPTO 
-  OP_EXACT
+  Caseful         Caseless
+  OP_UPTO         OP_UPTOI    
+  OP_MINUPTO      OP_MINUPTOI 
+  OP_POSUPTO      OP_POSUPTOI 
+  OP_EXACT        OP_EXACTI   

-which are followed by a two-byte count (most significant first) and the
-repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
-non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
-OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
+Each of these is followed by a two-byte (one short) count (most significant
+byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
+0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
+coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).


 Repeating character types
@ -214,7 +250,7 @@ Repeating character types

 Repeats of things like \d are done exactly as for single characters, except
 that instead of a character, the opcode for the type is stored in the data
-byte. The opcodes are:
+unit. The opcodes are:

  OP_TYPESTAR
  OP_TYPEMINSTAR
@ -236,65 +272,58 @@ Match by Unicode property

 OP_PROP and OP_NOTPROP are used for positive and negative matches of a 
 character by testing its Unicode property (the \p and \P escape sequences).
-Each is followed by two bytes that encode the desired property as a type and a 
+Each is followed by two units that encode the desired property as a type and a
 value.

-Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by 
-three bytes: OP_PROP or OP_NOTPROP and then the desired property type and 
+Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
+three units: OP_PROP or OP_NOTPROP, and then the desired property type and
 value.


-Matching literal characters
---------------------------
-
-The OP_CHAR opcode is followed by a single character that is to be matched 
-casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the 
-character may be more than one byte long. (Earlier versions of PCRE used 
-multi-character strings, but this was changed to allow some new features to be 
-added.)
-
-
 Character classes
 -----------------

-If there is only one character, OP_CHAR or OP_CHARNC is used for a positive
-class, and OP_NOT for a negative one (that is, for something like [^a]).
-However, in UTF-8 mode, the use of OP_NOT applies only to characters with
-values < 128, because OP_NOT is confined to single bytes.
+If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
+positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
+something like [^a]). 

-Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
-negated, single-character class. The normal ones (OP_STAR etc.) are used for a
-repeated positive single-character class.
+Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
+repeated, negated, single-character classes. The normal single-character
+opcodes (OP_STAR, etc.) are used for repeated positive single-character
+classes.

-When there's more than one character in a class and all the characters are less
-than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative
-one. In either case, the opcode is followed by a 32-byte bit map containing a 1
-bit for every character that is acceptable. The bits are counted from the least
-significant end of each byte.
+When there is more than one character in a class and all the characters are
+less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
+negative one. In either case, the opcode is followed by a 32-byte (16-short)
+bit map containing a 1 bit for every character that is acceptable. The bits are
+counted from the least significant end of each unit. In caseless mode, bits for
+both cases are set.

-The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode,
-subject characters with values greater than 256 can be handled correctly. For
-OP_CLASS they don't match, whereas for OP_NCLASS they do.
+The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16 mode,
+subject characters with values greater than 255 can be handled correctly. For
+OP_CLASS they do not match, whereas for OP_NCLASS they do.

-For classes containing characters with values > 255, OP_XCLASS is used. It
-optionally uses a bit map (if any characters lie within it), followed by a list
-of pairs and single characters. There is a flag character than indicates
-whether it's a positive or a negative class.
+For classes containing characters with values greater than 255, OP_XCLASS is
+used. It optionally uses a bit map (if any characters lie within it), followed
+by a list of pairs (for a range) and single characters. In caseless mode, both
+cases are explicitly listed. There is a flag character than indicates whether
+it is a positive or a negative class.


 Back references
 ---------------

-OP_REF is followed by two bytes containing the reference number.
+OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
+containing the reference number.


 Repeating character classes and back references
 -----------------------------------------------

 Single-character classes are handled specially (see above). This section
-applies to OP_CLASS and OP_REF. In both cases, the repeat information follows
-the base item. The matching code looks at the following opcode to see if it is
-one of
+applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
+follows the base item. The matching code looks at the following opcode to see
+if it is one of

  OP_CRSTAR
  OP_CRMINSTAR
@ -305,10 +334,10 @@ one of
  OP_CRRANGE
  OP_CRMINRANGE

-All but the last two are just single-byte items. The others are followed by
-four bytes of data, comprising the minimum and maximum repeat counts. There are 
-no special possessive opcodes for these repeats; a possessive repeat is 
-compiled into an atomic group.
+All but the last two are just single-unit items. The others are followed by
+four bytes (two shorts) of data, comprising the minimum and maximum repeat
+counts. There are no special possessive opcodes for these repeats; a possessive
+repeat is compiled into an atomic group.


 Brackets and alternation
@ -318,7 +347,8 @@ A pair of non-capturing (round) brackets is wrapped round each expression at
 compile time, so alternation always happens in the context of brackets.

 [Note for North Americans: "bracket" to some English speakers, including
-myself, can be round, square, curly, or pointy. Hence this usage.]
+myself, can be round, square, curly, or pointy. Hence this usage rather than 
+"parentheses".]

 Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
 capturing brackets and it used a different opcode for each one. From release
@ -330,16 +360,17 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
 next alternative OP_ALT or, if there aren't any branches, to the matching
 OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
 the next one, or to the OP_KET opcode. For capturing brackets, the bracket 
-number immediately follows the offset, always as a 2-byte item.
+number immediately follows the offset, always as a 2-byte (one short) item.

-OP_KET is used for subpatterns that do not repeat indefinitely, while
+OP_KET is used for subpatterns that do not repeat indefinitely, and
 OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
-maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
-positive number) the offset back to the matching bracket opcode.
+maximally respectively (see below for possessive repetitions). All three are
+followed by LINK_SIZE bytes giving (as a positive number) the offset back to
+the matching bracket opcode.

 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
-single-byte opcodes that tell the matcher that skipping the following
+single-unit opcodes that tell the matcher that skipping the following
 subpattern entirely is a valid branch. In the case of the first two, not 
 skipping the pattern is also valid (greedy and non-greedy). The third is used 
 when a pattern has the quantifier {0,0}. It cannot be entirely discarded, 
@ -362,6 +393,15 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
 that it needs to check for matching an empty string when it hits OP_KETRMIN or
 OP_KETRMAX, and if so, to break the loop.

+Possessive brackets
+-------------------
+
+When a repeated group (capturing or non-capturing) is marked as possessive by
+the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
+have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead 
+of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum 
+repetition is zero, the group is preceded by OP_BRAPOSZERO.
+

 Assertions
 ----------
@ -369,11 +409,11 @@ Assertions
 Forward assertions are just like other subpatterns, but starting with one of
 the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
 OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
-is OP_REVERSE, followed by a two byte count of the number of characters to move
-back the pointer in the subject string. When operating in UTF-8 mode, the count
-is a character count rather than a byte count. A separate count is present in
-each alternative of a lookbehind assertion, allowing them to have different
-fixed lengths.
+is OP_REVERSE, followed by a two byte (one short) count of the number of
+characters to move back the pointer in the subject string. In ASCII mode, the 
+count is a number of units, but in UTF-8/16 mode each character may occupy more
+than one unit. A separate count is present in each alternative of a lookbehind
+assertion, allowing them to have different fixed lengths.


 Once-only (atomic) subpatterns
@ -390,14 +430,15 @@ Conditional subpatterns
 These are like other subpatterns, but they start with the opcode OP_COND, or
 OP_SCOND for one that might match an empty string in an unbounded repeat. If
 the condition is a back reference, this is stored at the start of the
-subpattern using the opcode OP_CREF followed by two bytes containing the
-reference number. OP_NCREF is used instead if the reference was generated by 
-name (so that the runtime code knows to check for duplicate names).
+subpattern using the opcode OP_CREF followed by two bytes (one short)
+containing the reference number. OP_NCREF is used instead if the reference was
+generated by name (so that the runtime code knows to check for duplicate
+names).

 If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
 group x" (coded as "(?(Rx)"), the group number is stored at the start of the
 subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
-zero for "the whole pattern". For a DEFINE condition, just the single byte
+zero for "the whole pattern". For a DEFINE condition, just the single unit
 OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
 always starts with one of the assertions.

@ -416,25 +457,12 @@ are not strictly a recursion.
 Callout
 -------

-OP_CALLOUT is followed by one byte of data that holds a callout number in the
+OP_CALLOUT is followed by one unit of data that holds a callout number in the
 range 0 to 254 for manual callouts, or 255 for an automatic callout. In both 
-cases there follows a two-byte value giving the offset in the pattern to the
-start of the following item, and another two-byte item giving the length of the
-next item.
+cases there follows a two-byte (one short) value giving the offset in the
+pattern to the start of the following item, and another two-byte (one short)
+item giving the length of the next item.


-Changing options
----------------
-
-If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
-opcode is compiled, followed by one byte containing the new settings of these
-flags. If there are several alternatives, there is an occurrence of OP_OPT at
-the start of all those following the first options change, to set appropriate
-options for the start of the alternative. Immediately after the end of the
-group there is another such item to reset the flags to their previous values. A
-change of flag right at the very start of the pattern can be handled entirely
-at compile time, and so does not cause anything to be put into the compiled
-data.
-
 Philip Hazel
-October 2010
+February 2012
--- a/ext/pcre/pcrelib/LICENCE
+++ b/ext/pcre/pcrelib/LICENCE
@ -9,7 +9,9 @@ specified below. The documentation for PCRE, supplied in the "doc"
 directory, is distributed under the same terms as the software itself.

 The basic library functions are written in C and are freestanding. Also
-included in the distribution is a set of C++ wrapper functions.
+included in the distribution is a set of C++ wrapper functions, and a
+just-in-time compiler that can be used to optimize pattern matching. These
+are both optional features that can be omitted when the library is built.


 THE BASIC LIBRARY FUNCTIONS
@ -22,7 +24,29 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.

-Copyright (c) 1997-2010 University of Cambridge
+Copyright (c) 1997-2012 University of Cambridge
+All rights reserved.
+
+
+PCRE JUST-IN-TIME COMPILATION SUPPORT
+-------------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Emain domain:     freemail.hu
+
+Copyright(c) 2010-2012 Zoltan Herczeg
+All rights reserved.
+
+
+STACK-LESS JUST-IN-TIME COMPILER
+--------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Emain domain:     freemail.hu
+
+Copyright(c) 2009-2012 Zoltan Herczeg
 All rights reserved.


@ -31,7 +55,7 @@ THE C++ WRAPPER FUNCTIONS

 Contributed by:   Google Inc.

-Copyright (c) 2007-2010, Google Inc.
+Copyright (c) 2007-2012, Google Inc.
 All rights reserved.


--- a/ext/pcre/pcrelib/NEWS
+++ b/ext/pcre/pcrelib/NEWS
@ -1,6 +1,82 @@
 News about PCRE releases
 ------------------------

+Release 8.31 06-July-2012
+-------------------------
+
+This is mainly a bug-fixing release, with a small number of developments:
+
+. The JIT compiler now supports partial matching and the (*MARK) and
+  (*COMMIT) verbs.
+
+. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehing in a
+  pattern.
+
+. There should be a performance improvement when using the heap instead of the
+  stack for recursion.
+
+. pcregrep can now be linked with libedit as an alternative to libreadline.
+
+. pcregrep now has a --file-list option where the list of files to scan is
+  given as a file.
+
+. pcregrep now recognizes binary files and there are related options.
+
+. The Unicode tables have been updated to 6.1.0.
+
+As always, the full list of changes is in the ChangeLog file.
+
+
+Release 8.30 04-February-2012
+-----------------------------
+
+Release 8.30 introduces a major new feature: support for 16-bit character
+strings, compiled as a separate library. There are a few changes to the
+8-bit library, in addition to some bug fixes.
+
+. The pcre_info() function, which has been obsolete for over 10 years, has
+  been removed.
+
+. When a compiled pattern was saved to a file and later reloaded on a host
+  with different endianness, PCRE used automatically to swap the bytes in some
+  of the data fields. With the advent of the 16-bit library, where more of this
+  swapping is needed, it is no longer done automatically. Instead, the bad
+  endianness is detected and a specific error is given. The user can then call
+  a new function called pcre_pattern_to_host_byte_order() (or an equivalent
+  16-bit function) to do the swap.
+
+. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
+  code points and are now faulted. (They are the so-called "surrogates"
+  that are reserved for coding high values in UTF-16.)
+
+
+Release 8.21 12-Dec-2011
+------------------------
+
+This is almost entirely a bug-fix release. The only new feature is the ability
+to obtain the size of the memory used by the JIT compiler.
+
+
+Release 8.20 21-Oct-2011
+------------------------
+
+The main change in this release is the inclusion of Zoltan Herczeg's
+just-in-time compiler support, which can be accessed by building PCRE with
+--enable-jit. Large performance benefits can be had in many situations. 8.20
+also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
+a number of infelicities and differences from Perl.
+
+
+Release 8.13 16-Aug-2011
+------------------------
+
+This is mainly a bug-fix release. There has been a lot of internal refactoring.
+The Unicode tables have been updated. The only new feature in the library is
+the passing of *MARK information to callouts. Some additions have been made to
+pcretest to make testing easier and more comprehensive. There is a new option
+for pcregrep to adjust its internal buffer size.
+
+
 Release 8.12 15-Jan-2011
 ------------------------

--- a/ext/pcre/pcrelib/NON-UNIX-USE
+++ b/ext/pcre/pcrelib/NON-UNIX-USE
@ -1,501 +1,7 @@
 Compiling PCRE on non-Unix systems
 ----------------------------------

-This document contains the following sections:
+This has been renamed to better reflect its contents. Please see the file
+NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.

-  General
-  Generic instructions for the PCRE C library
-  The C++ wrapper functions
-  Building for virtual Pascal
-  Stack size in Windows environments
-  Linking programs in Windows environments
-  Comments about Win32 builds
-  Building PCRE on Windows with CMake
-  Use of relative paths with CMake on Windows
-  Testing with RunTest.bat
-  Building under Windows with BCC5.5
-  Building PCRE on OpenVMS
-  Building PCRE on Stratus OpenVOS
-
-
-GENERAL
-
-I (Philip Hazel) have no experience of Windows or VMS sytems and how their
-libraries work. The items in the PCRE distribution and Makefile that relate to
-anything other than Unix-like systems are untested by me.
-
-There are some other comments and files (including some documentation in CHM
-format) in the Contrib directory on the FTP site:
-
-  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
-
-If you want to compile PCRE for a non-Unix system (especially for a system that
-does not support "configure" and "make" files), note that the basic PCRE
-library consists entirely of code written in Standard C, and so should compile
-successfully on any system that has a Standard C compiler and library. The C++
-wrapper functions are a separate issue (see below).
-
-The PCRE distribution includes a "configure" file for use by the Configure/Make
-build system, as found in many Unix-like environments. There is also support
-support for CMake, which some users prefer, especially in Windows environments.
-There are some instructions for CMake under Windows in the section entitled
-"Building PCRE with CMake" below. CMake can also be used to build PCRE in
-Unix-like systems.
-
-
-GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
-
-The following are generic comments about building the PCRE C library "by hand".
-
- (1) Copy or rename the file config.h.generic as config.h, and edit the macro
-     settings that it contains to whatever is appropriate for your environment.
-     In particular, if you want to force a specific value for newline, you can
-     define the NEWLINE macro. When you compile any of the PCRE modules, you
-     must specify -DHAVE_CONFIG_H to your compiler so that config.h is included
-     in the sources.
-
-     An alternative approach is not to edit config.h, but to use -D on the
-     compiler command line to make any changes that you need to the
-     configuration options. In this case -DHAVE_CONFIG_H must not be set.
-
-     NOTE: There have been occasions when the way in which certain parameters
-     in config.h are used has changed between releases. (In the configure/make
-     world, this is handled automatically.) When upgrading to a new release,
-     you are strongly advised to review config.h.generic before re-using what
-     you had previously.
-
- (2) Copy or rename the file pcre.h.generic as pcre.h.
-
- (3) EITHER:
-       Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
-
-     OR:
-       Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
-       you have set up config.h), and then run it with the single argument
-       "pcre_chartables.c". This generates a set of standard character tables
-       and writes them to that file. The tables are generated using the default
-       C locale for your system. If you want to use a locale that is specified
-       by LC_xxx environment variables, add the -L option to the dftables
-       command. You must use this method if you are building on a system that
-       uses EBCDIC code.
-
-     The tables in pcre_chartables.c are defaults. The caller of PCRE can
-     specify alternative tables at run time.
-
- (4) Ensure that you have the following header files:
-
-       pcre_internal.h
-       ucp.h
-
- (5) Also ensure that you have the following file, which is #included as source
-     when building a debugging version of PCRE, and is also used by pcretest.
-
-       pcre_printint.src
-
- (6) Compile the following source files, setting -DHAVE_CONFIG_H as a compiler
-     option if you have set up config.h with your configuration, or else use
-     other -D settings to change the configuration as required.
-
-       pcre_chartables.c
-       pcre_compile.c
-       pcre_config.c
-       pcre_dfa_exec.c
-       pcre_exec.c
-       pcre_fullinfo.c
-       pcre_get.c
-       pcre_globals.c
-       pcre_info.c
-       pcre_maketables.c
-       pcre_newline.c
-       pcre_ord2utf8.c
-       pcre_refcount.c
-       pcre_study.c
-       pcre_tables.c
-       pcre_try_flipped.c
-       pcre_ucd.c
-       pcre_valid_utf8.c
-       pcre_version.c
-       pcre_xclass.c
-
-     Make sure that you include -I. in the compiler command (or equivalent for
-     an unusual compiler) so that all included PCRE header files are first
-     sought in the current directory. Otherwise you run the risk of picking up
-     a previously-installed file from somewhere else.
-
- (7) Now link all the compiled code into an object library in whichever form
-     your system keeps such libraries. This is the basic PCRE C library. If
-     your system has static and shared libraries, you may have to do this once
-     for each type.
-
- (8) Similarly, if you want to build the POSIX wrapper functions, ensure that
-     you have the pcreposix.h file and then compile pcreposix.c (remembering
-     -DHAVE_CONFIG_H if necessary). Link the result (on its own) as the
-     pcreposix library.
-
- (9) Compile the test program pcretest.c (again, don't forget -DHAVE_CONFIG_H).
-     This needs the functions in the PCRE library when linking. It also needs
-     the pcreposix wrapper functions unless you compile it with -DNOPOSIX. The
-     pcretest.c program also needs the pcre_printint.src source file, which it
-     #includes.
-
-(10) Run pcretest on the testinput files in the testdata directory, and check
-     that the output matches the corresponding testoutput files. Note that the
-     supplied files are in Unix format, with just LF characters as line
-     terminators. You may need to edit them to change this if your system uses
-     a different convention. If you are using Windows, you probably should use
-     the wintestinput3 file instead of testinput3 (and the corresponding output
-     file). This is a locale test; wintestinput3 sets the locale to "french"
-     rather than "fr_FR", and there some minor output differences.
-
-(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
-     uses only the basic PCRE library (it does not need the pcreposix library).
-
-
-THE C++ WRAPPER FUNCTIONS
-
-The PCRE distribution also contains some C++ wrapper functions and tests,
-contributed by Google Inc. On a system that can use "configure" and "make",
-the functions are automatically built into a library called pcrecpp. It should
-be straightforward to compile the .cc files manually on other systems. The
-files called xxx_unittest.cc are test programs for each of the corresponding
-xxx.cc files.
-
-
-BUILDING FOR VIRTUAL PASCAL
-
-A script for building PCRE using Borland's C++ compiler for use with VPASCAL
-was contributed by Alexander Tokarev. Stefan Weber updated the script and added
-additional files. The following files in the distribution are for building PCRE
-for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
-
-
-STACK SIZE IN WINDOWS ENVIRONMENTS
-
-The default processor stack size of 1Mb in some Windows environments is too
-small for matching patterns that need much recursion. In particular, test 2 may
-fail because of this. Normally, running out of stack causes a crash, but there
-have been cases where the test program has just died silently. See your linker
-documentation for how to increase stack size if you experience problems. The
-Linux default of 8Mb is a reasonable choice for the stack, though even that can
-be too small for some pattern/subject combinations.
-
-PCRE has a compile configuration option to disable the use of stack for
-recursion so that heap is used instead. However, pattern matching is
-significantly slower when this is done. There is more about stack usage in the
-"pcrestack" documentation.
-
-
-LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
-
-If you want to statically link a program against a PCRE library in the form of
-a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
-pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
-be declared __declspec(dllimport), with unwanted results.
-
-
-CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
-
-It is possible to compile programs to use different calling conventions using
-MSVC. Search the web for "calling conventions" for more information. To make it
-easier to change the calling convention for the exported functions in the
-PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
-definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
-not set, it defaults to empty; the default calling convention is then used
-(which is what is wanted most of the time).
-
-
-COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
-
-There are two ways of building PCRE using the "configure, make, make install"
-paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
-the same thing; they are completely different from each other. There is also
-support for building using CMake, which some users find a more straightforward
-way of building PCRE under Windows. However, the tests are not run
-automatically when CMake is used.
-
-The MinGW home page (http://www.mingw.org/) says this:
-
-  MinGW: A collection of freely available and freely distributable Windows
-  specific header files and import libraries combined with GNU toolsets that
-  allow one to produce native Windows programs that do not rely on any
-  3rd-party C runtime DLLs.
-
-The Cygwin home page (http://www.cygwin.com/) says this:
-
-  Cygwin is a Linux-like environment for Windows. It consists of two parts:
-
-  . A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
-    substantial Linux API functionality
-
-  . A collection of tools which provide Linux look and feel.
-
-  The Cygwin DLL currently works with all recent, commercially released x86 32
-  bit and 64 bit versions of Windows, with the exception of Windows CE.
-
-On both MinGW and Cygwin, PCRE should build correctly using:
-
-  ./configure && make && make install
-
-This should create two libraries called libpcre and libpcreposix, and, if you
-have enabled building the C++ wrapper, a third one called libpcrecpp. These are
-independent libraries: when you like with libpcreposix or libpcrecpp you must
-also link with libpcre, which contains the basic functions. (Some earlier
-releases of PCRE included the basic libpcre functions in libpcreposix. This no
-longer happens.)
-
-A user submitted a special-purpose patch that makes it easy to create
-"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
-as a special target. If you use this target, no other files are built, and in
-particular, the pcretest and pcregrep programs are not built. An example of how
-this might be used is:
-
-  ./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
-
-Using Cygwin's compiler generates libraries and executables that depend on
-cygwin1.dll. If a library that is generated this way is distributed,
-cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
-licence, this forces not only PCRE to be under the GPL, but also the entire
-application. A distributor who wants to keep their own code proprietary must
-purchase an appropriate Cygwin licence.
-
-MinGW has no such restrictions. The MinGW compiler generates a library or
-executable that can run standalone on Windows without any third party dll or
-licensing issues.
-
-But there is more complication:
-
-If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
-to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
-front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
-gcc and MinGW's gcc). So, a user can:
-
-. Build native binaries by using MinGW or by getting Cygwin and using
-  -mno-cygwin.
-
-. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
-  compiler flags.
-
-The test files that are supplied with PCRE are in Unix format, with LF
-characters as line terminators. It may be necessary to change the line
-terminators in order to get some of the tests to work.
-
-
-BUILDING PCRE ON WINDOWS WITH CMAKE
-
-CMake is an alternative configuration facility that can be used instead of the
-traditional Unix "configure". CMake creates project files (make files, solution
-files, etc.) tailored to numerous development environments, including Visual
-Studio, Borland, Msys, MinGW, NMake, and Unix. The following instructions
-were contributed by a PCRE user.
-
-1.  Install the latest CMake version available from http://www.cmake.org/, and
-    ensure that cmake\bin is on your path.
-
-2.  Unzip (retaining folder structure) the PCRE source tree into a source
-    directory such as C:\pcre.
-
-3.  Create a new, empty build directory, for example C:\pcre\build\
-
-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
-    Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++.
-
-5.  Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
-    directories, respectively.
-
-6.  Hit the "Configure" button.
-
-7.  Select the particular IDE / build tool that you are using (Visual
-    Studio, MSYS makefiles, MinGW makefiles, etc.)
-
-8.  The GUI will then list several configuration options. This is where
-    you can enable UTF-8 support or other PCRE optional features.
-
-9.  Hit "Configure" again. The adjacent "Generate" button should now be
-    active.
-
-10. Hit "Generate".
-
-11. The build directory should now contain a usable build system, be it a
-    solution file for Visual Studio, makefiles for MinGW, etc. Exit from
-    cmake-gui and use the generated build system with your compiler or IDE.
-
-
-USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
-
-A PCRE user comments as follows:
-
-I thought that others may want to know the current state of
-CMAKE_USE_RELATIVE_PATHS support on Windows.
-
-Here it is:
-- AdditionalIncludeDirectories is only partially modified (only the
-first path - see below)
-- Only some of the contained file paths are modified - shown below for
-pcre.vcproj
-- It properly modifies
-
-I am sure CMake people can fix that if they want to. Until then one will
-need to replace existing absolute paths in project files with relative
-paths manually (e.g. from VS) - relative to project file location. I did
-just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
-deal.
-
-AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
-AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
-
-RelativePath="pcre.h">
-RelativePath="pcre_chartables.c">
-RelativePath="pcre_chartables.c.rule">
-
-
-TESTING WITH RUNTEST.BAT
-
-1. Copy RunTest.bat into the directory where pcretest.exe has been created.
-
-2. Edit RunTest.bat and insert a line that indentifies the relative location of
-   the pcre source, e.g.:
-
-   set srcdir=..\pcre-7.4-RC3
-
-3. Run RunTest.bat from a command shell environment. Test outputs will
-   automatically be compared to expected results, and discrepancies will
-   identified in the console output.
-
-4. To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
-   pcre_scanner_unittest.exe.
-
-
-BUILDING UNDER WINDOWS WITH BCC5.5
-
-Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
-
-  Some of the core BCC libraries have a version of PCRE from 1998 built in,
-  which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
-  version mismatch. I'm including an easy workaround below, if you'd like to
-  include it in the non-unix instructions:
-
-  When linking a project with BCC5.5, pcre.lib must be included before any of
-  the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
-  line.
-
-
-BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
-
-Vincent Richomme sent a zip archive of files to help with this process. They
-can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
-site.
-
-
-BUILDING PCRE ON OPENVMS
-
-Dan Mooney sent the following comments about building PCRE on OpenVMS. They
-relate to an older version of PCRE that used fewer source files, so the exact
-commands will need changing. See the current list of source files above.
-
-"It was quite easy to compile and link the library. I don't have a formal
-make file but the attached file [reproduced below] contains the OpenVMS DCL
-commands I used to build the library. I had to add #define
-POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
-
-The library was built on:
-O/S: HP OpenVMS v7.3-1
-Compiler: Compaq C v6.5-001-48BCD
-Linker: vA13-01
-
-The test results did not match 100% due to the issues you mention in your
-documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
-modified some of the character tables temporarily and was able to get the
-results to match. Tests using the fr locale did not match since I don't have
-that locale loaded. The study size was always reported to be 3 less than the
-value in the standard test output files."
-
-=========================
-$! This DCL procedure builds PCRE on OpenVMS
-$!
-$! I followed the instructions in the non-unix-use file in the distribution.
-$!
-$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
-$ COMPILE DFTABLES.C
-$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
-$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
-$ COMPILE MAKETABLES.C
-$ COMPILE GET.C
-$ COMPILE STUDY.C
-$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
-$! did not seem to be defined anywhere.
-$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
-$ COMPILE PCRE.C
-$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
-$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
-$! did not seem to be defined anywhere.
-$ COMPILE PCREPOSIX.C
-$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
-$ COMPILE PCRETEST.C
-$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
-$! C programs that want access to command line arguments must be
-$! defined as a symbol
-$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
-$! Arguments must be enclosed in quotes.
-$ PCRETEST "-C"
-$! Test results:
-$!
-$!   The test results did not match 100%. The functions isprint(), iscntrl(),
-$!   isgraph() and ispunct() on OpenVMS must not produce the same results
-$!   as the system that built the test output files provided with the
-$!   distribution.
-$!
-$!   The study size did not match and was always 3 less on OpenVMS.
-$!
-$!   Locale could not be set to fr
-$!
-=========================
-
-
-BUILDING PCRE ON STRATUS OPENVOS
-
-These notes on the port of PCRE to VOS (lightly edited) were supplied by
-Ashutosh Warikoo, whose email address has the local part awarikoo and the
-domain nse.co.in. The port was for version 7.9 in August 2009.
-
-1.   Building PCRE
-
-I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
-problems. I used the following packages to build PCRE:
-
-  ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
-
-Please read and follow the instructions that come with these packages. To start
-the build of pcre, from the root of the package type:
-
-  ./build.sh
-
-2. Installing PCRE
-
-Once you have successfully built PCRE, login to the SysAdmin group, switch to
-the root user, and type
-
-  [ !create_dir (master_disk)>usr   --if needed ]
-  [ !create_dir (master_disk)>usr>local   --if needed ]
-    !gmake install
-
-This installs PCRE and its man pages into /usr/local. You can add
-(master_disk)>usr>local>bin to your command search paths, or if you are in
-BASH, add /usr/local/bin to the PATH environment variable.
-
-4. Restrictions
-
-This port requires readline library optionally. However during the build I
-faced some yet unexplored errors while linking with readline. As it was an
-optional component I chose to disable it.
-
-5. Known Problems
-
-I ran a the test suite, but you will have to be your own judge of whether this
-command, and this port, suits your purposes. If you find any problems that
-appear to be related to the port itself, please let me know. Please see the
-build.log file in the root of the package also.
-
-
-=========================
-Last Updated: 26 May 2010
-****
+####
--- a/ext/pcre/pcrelib/README
+++ b/ext/pcre/pcrelib/README
@ -18,11 +18,12 @@ The contents of this README file are:
  The PCRE APIs
  Documentation for PCRE
  Contributions by users of PCRE
-  Building PCRE on non-Unix systems
-  Building PCRE on Unix-like systems
-  Retrieving configuration information on Unix-like systems
-  Shared libraries on Unix-like systems
-  Cross-compiling on Unix-like systems
+  Building PCRE on non-Unix-like systems
+  Building PCRE without using autotools
+  Building PCRE using autotools
+  Retrieving configuration information
+  Shared libraries
+  Cross-compiling using autotools
  Using HP's ANSI C++ compiler (aCC)
  Using PCRE from MySQL
  Making new tarballs
@ -34,16 +35,19 @@ The contents of this README file are:
 The PCRE APIs
 -------------

-PCRE is written in C, and it has its own API. The distribution also includes a
-set of C++ wrapper functions (see the pcrecpp man page for details), courtesy
-of Google Inc.
+PCRE is written in C, and it has its own API. There are two sets of functions,
+one for the 8-bit library, which processes strings of bytes, and one for the
+16-bit library, which processes strings of 16-bit values. The distribution also
+includes a set of C++ wrapper functions (see the pcrecpp man page for details),
+courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
+C++.

-In addition, there is a set of C wrapper functions that are based on the POSIX
-regular expression API (see the pcreposix man page). These end up in the
-library called libpcreposix. Note that this just provides a POSIX calling
-interface to PCRE; the regular expressions themselves still follow Perl syntax
-and semantics. The POSIX API is restricted, and does not give full access to
-all of PCRE's facilities.
+In addition, there is a set of C wrapper functions (again, just for the 8-bit
+library) that are based on the POSIX regular expression API (see the pcreposix
+man page). These end up in the library called libpcreposix. Note that this just
+provides a POSIX calling interface to PCRE; the regular expressions themselves
+still follow Perl syntax and semantics. The POSIX API is restricted, and does
+not give full access to all of PCRE's facilities.

 The header file for the POSIX-style functions is called pcreposix.h. The
 official POSIX name is regex.h, but I did not want to risk possible problems
@ -106,36 +110,45 @@ Windows (I myself do not use Windows). Nowadays there is more Windows support
 in the standard distribution, so these contibutions have been archived.


-Building PCRE on non-Unix systems
---------------------------------
+Building PCRE on non-Unix-like systems
+--------------------------------------

-For a non-Unix system, please read the comments in the file NON-UNIX-USE,
-though if your system supports the use of "configure" and "make" you may be
-able to build PCRE in the same way as for Unix-like systems. PCRE can also be
-configured in many platform environments using the GUI facility provided by
-CMake's cmake-gui command. This creates Makefiles, solution files, etc.
+For a non-Unix-like system, please read the comments in the file
+NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
+"make" you may be able to build PCRE using autotools in the same way as for
+many Unix-like systems.
+
+PCRE can also be configured using the GUI facility provided by CMake's
+cmake-gui command. This creates Makefiles, solution files, etc. The file
+NON-AUTOTOOLS-BUILD has information about CMake.

 PCRE has been compiled on many different operating systems. It should be
 straightforward to build PCRE on any system that has a Standard C compiler and
 library, because it uses only Standard C functions.


-Building PCRE on Unix-like systems
----------------------------------
+Building PCRE without using autotools
+-------------------------------------
+
+The use of autotools (in particular, libtool) is problematic in some
+environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
+file for ways of building PCRE without using autotools.
+
+
+Building PCRE using autotools
+-----------------------------

 If you are using HP's ANSI C++ compiler (aCC), please see the special note
 in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.

-The following instructions assume the use of the widely used "configure, make,
-make install" process. There is also support for CMake in the PCRE
-distribution; there are some comments about using CMake in the NON-UNIX-USE
-file, though it can also be used in Unix-like systems.
+The following instructions assume the use of the widely used "configure; make;
+make install" (autotools) process.

-To build PCRE on a Unix-like system, first run the "configure" command from the
-PCRE distribution directory, with your current directory set to the directory
-where you want the files to be created. This command is a standard GNU
-"autoconf" configuration script, for which generic instructions are supplied in
-the file INSTALL.
+To build PCRE on system that supports autotools, first run the "configure"
+command from the PCRE distribution directory, with your current directory set
+to the directory where you want the files to be created. This command is a
+standard GNU "autoconf" configuration script, for which generic instructions
+are supplied in the file INSTALL.

 Most commonly, people build PCRE within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
@ -143,9 +156,9 @@ the usual methods of changing standard defaults are available. For example:

 CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local

-specifies that the C compiler should be run with the flags '-O2 -Wall' instead
-of the default, and that "make install" should install PCRE under /opt/local
-instead of the default /usr/local.
+This command specifies that the C compiler should be run with the flags '-O2
+-Wall' instead of the default, and that "make install" should install PCRE
+under /opt/local instead of the default /usr/local.

 If you want to build in a different directory, just run "configure" with that
 directory as current. For example, suppose you have unpacked the PCRE source
@ -159,27 +172,59 @@ possible to build it as a C++ library, though the provided building apparatus
 does not have any features to support this.

 There are some optional features that can be included or omitted from the PCRE
-library. You can read more about them in the pcrebuild man page.
+library. They are also documented in the pcrebuild man page.

-. If you want to suppress the building of the C++ wrapper library, you can add
-  --disable-cpp to the "configure" command. Otherwise, when "configure" is run,
-  it will try to find a C++ compiler and C++ header files, and if it succeeds,
-  it will try to build the C++ wrapper.
+. By default, both shared and static libraries are built. You can change this
+  by adding one of these options to the "configure" command:
+
+  --disable-shared
+  --disable-static
+
+  (See also "Shared libraries on Unix-like systems" below.)
+
+. By default, only the 8-bit library is built. If you add --enable-pcre16 to
+  the "configure" command, the 16-bit library is also built. If you want only
+  the 16-bit library, use "./configure --enable-pcre16 --disable-pcre8".
+
+. If you are building the 8-bit library and want to suppress the building of
+  the C++ wrapper library, you can add --disable-cpp to the "configure"
+  command. Otherwise, when "configure" is run without --disable-pcre8, it will
+  try to find a C++ compiler and C++ header files, and if it succeeds, it will
+  try to build the C++ wrapper.
+
+. If you want to include support for just-in-time compiling, which can give
+  large performance improvements on certain platforms, add --enable-jit to the
+  "configure" command. This support is available only for certain hardware
+  architectures. If you try to enable it on an unsupported architecture, there
+  will be a compile time error.
+
+. When JIT support is enabled, pcregrep automatically makes use of it, unless
+  you add --disable-pcregrep-jit to the "configure" command.

 . If you want to make use of the support for UTF-8 Unicode character strings in
-  PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
-  code for handling UTF-8 is not included in the library. Even when included,
-  it still has to be enabled by an option at run time. When PCRE is compiled
-  with this option, its input can only either be ASCII or UTF-8, even when
-  running on EBCDIC platforms. It is not possible to use both --enable-utf8 and
-  --enable-ebcdic at the same time.
+  the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
+  you must add --enable-utf to the "configure" command. Without it, the code
+  for handling UTF-8 and UTF-16 is not included in the relevant library. Even
+  when --enable-utf is included, the use of a UTF encoding still has to be
+  enabled by an option at run time. When PCRE is compiled with this option, its
+  input can only either be ASCII or UTF-8/16, even when running on EBCDIC
+  platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
+  the same time.

-. If, in addition to support for UTF-8 character strings, you want to include
-  support for the \P, \p, and \X sequences that recognize Unicode character
-  properties, you must add --enable-unicode-properties to the "configure"
-  command. This adds about 30K to the size of the library (in the form of a
-  property table); only the basic two-letter properties such as Lu are
-  supported.
+. There are no separate options for enabling UTF-8 and UTF-16 independently
+  because that would allow ridiculous settings such as requesting UTF-16
+  support while building only the 8-bit library. However, the option
+  --enable-utf8 is retained for backwards compatibility with earlier releases
+  that did not support 16-bit character strings. It is synonymous with
+  --enable-utf. It is not possible to configure one library with UTF support
+  and the other without in the same configuration.
+
+. If, in addition to support for UTF-8/16 character strings, you want to
+  include support for the \P, \p, and \X sequences that recognize Unicode
+  character properties, you must add --enable-unicode-properties to the
+  "configure" command. This adds about 30K to the size of the library (in the
+  form of a property table); only the basic two-letter properties such as Lu
+  are supported.

 . You can build PCRE to recognize either CR or LF or the sequence CRLF or any
  of the preceding, or any of the Unicode newline sequences as indicating the
@ -232,10 +277,11 @@ library. You can read more about them in the pcrebuild man page.
  sizes in the pcrestack man page.

 . The default maximum compiled pattern size is around 64K. You can increase
-  this by adding --with-link-size=3 to the "configure" command. You can
-  increase it even more by setting --with-link-size=4, but this is unlikely
-  ever to be necessary. Increasing the internal link size will reduce
-  performance.
+  this by adding --with-link-size=3 to the "configure" command. In the 8-bit
+  library, PCRE then uses three bytes instead of two for offsets to different
+  parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
+  the same as --with-link-size=4, which (in both libraries) uses four-byte
+  offsets. Increasing the internal link size reduces performance.

 . You can build PCRE so that its internal match() function that is called from
  pcre_exec() does not call itself recursively. Instead, it uses memory blocks
@ -247,9 +293,10 @@ library. You can read more about them in the pcrebuild man page.

  on the "configure" command. PCRE runs more slowly in this mode, but it may be
  necessary in environments with limited stack sizes. This applies only to the
-  pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not
-  use deeply nested recursion. There is a discussion about stack sizes in the
-  pcrestack man page.
+  normal execution of the pcre_exec() function; if JIT support is being
+  successfully used, it is not relevant. Equally, it does not apply to
+  pcre_dfa_exec(), which does not use deeply nested recursion. There is a
+  discussion about stack sizes in the pcrestack man page.

 . For speed, PCRE uses four tables for manipulating and identifying characters
  whose code point values are less than 256. By default, it uses a set of
@ -269,27 +316,37 @@ library. You can read more about them in the pcrebuild man page.

  This automatically implies --enable-rebuild-chartables (see above). However,
  when PCRE is built this way, it always operates in EBCDIC. It cannot support
-  both EBCDIC and UTF-8.
+  both EBCDIC and UTF-8/16.

-. It is possible to compile pcregrep to use libz and/or libbz2, in order to
-  read .gz and .bz2 files (respectively), by specifying one or both of
+. The pcregrep program currently supports only 8-bit data files, and so
+  requires the 8-bit PCRE library. It is possible to compile pcregrep to use
+  libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
+  specifying one or both of

  --enable-pcregrep-libz
  --enable-pcregrep-libbz2

  Of course, the relevant libraries must be installed on your system.

-. It is possible to compile pcretest so that it links with the libreadline
-  library, by specifying
+. The default size of internal buffer used by pcregrep can be set by, for
+  example:

-  --enable-pcretest-libreadline
+  --with-pcregrep-bufsize=50K
+
+  The default value is 20K.
+
+. It is possible to compile pcretest so that it links with the libreadline
+  or libedit libraries, by specifying, respectively,
+
+  --enable-pcretest-libreadline or --enable-pcretest-libedit

  If this is done, when pcretest's input is from a terminal, it reads it using
  the readline() function. This provides line-editing and history facilities.
  Note that libreadline is GPL-licenced, so if you distribute a binary of
-  pcretest linked in this way, there may be licensing issues.
+  pcretest linked in this way, there may be licensing issues. These can be
+  avoided by linking with libedit (which has a BSD licence) instead.

-  Setting this option causes the -lreadline option to be added to the pcretest
+  Enabling libreadline causes the -lreadline option to be added to the pcretest
  build. In many operating environments with a sytem-installed readline
  library this is sufficient. However, in some environments (e.g. if an
  unmodified distribution version of readline is in use), it may be necessary
@ -302,37 +359,42 @@ library. You can read more about them in the pcrebuild man page.

 The "configure" script builds the following files for the basic C library:

-. Makefile is the makefile that builds the library
-. config.h contains build-time configuration options for the library
-. pcre.h is the public PCRE header file
-. pcre-config is a script that shows the settings of "configure" options
-. libpcre.pc is data for the pkg-config command
-. libtool is a script that builds shared and/or static libraries
-. RunTest is a script for running tests on the basic C library
-. RunGrepTest is a script for running tests on the pcregrep command
+. Makefile             the makefile that builds the library
+. config.h             build-time configuration options for the library
+. pcre.h               the public PCRE header file
+. pcre-config          script that shows the building settings such as CFLAGS
+                         that were set for "configure"
+. libpcre.pc         ) data for the pkg-config command
+. libpcre16.pc       )
+. libpcreposix.pc    )
+. libtool              script that builds shared and/or static libraries

 Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
 names config.h.generic and pcre.h.generic. These are provided for those who
 have to built PCRE without using "configure" or CMake. If you use "configure"
 or CMake, the .generic versions are not used.

-If a C++ compiler is found, the following files are also built:
+When building the 8-bit library, if a C++ compiler is found, the following
+files are also built:

-. libpcrecpp.pc is data for the pkg-config command
-. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper
-. pcre_stringpiece.h is the header for the C++ "stringpiece" functions
+. libpcrecpp.pc        data for the pkg-config command
+. pcrecpparg.h         header file for calling PCRE via the C++ wrapper
+. pcre_stringpiece.h   header for the C++ "stringpiece" functions

 The "configure" script also creates config.status, which is an executable
 script that can be run to recreate the configuration, and config.log, which
 contains compiler output from tests that "configure" runs.

-Once "configure" has run, you can run "make". It builds two libraries, called
-libpcre and libpcreposix, a test program called pcretest, and the pcregrep
-command. If a C++ compiler was found on your system, "make" also builds the C++
-wrapper library, which is called libpcrecpp, and some test programs called
-pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
-Building the C++ wrapper can be disabled by adding --disable-cpp to the
-"configure" command.
+Once "configure" has run, you can run "make". This builds either or both of the
+libraries libpcre and libpcre16, and a test program called pcretest. If you
+enabled JIT support with --enable-jit, a test program called pcre_jit_test is
+built as well.
+
+If the 8-bit library is built, libpcreposix and the pcregrep command are also
+built, and if a C++ compiler was found on your system, and you did not disable
+it with --disable-cpp, "make" builds the C++ wrapper library, which is called
+libpcrecpp, as well as some test programs called pcrecpp_unittest,
+pcre_scanner_unittest, and pcre_stringpiece_unittest.

 The command "make check" runs all the appropriate tests. Details of the PCRE
 tests are given below in a separate section of this document.
@ -343,16 +405,19 @@ system. The following are installed (file names are all relative to the

  Commands (bin):
    pcretest
-    pcregrep
+    pcregrep (if 8-bit support is enabled)
    pcre-config

  Libraries (lib):
-    libpcre
-    libpcreposix
-    libpcrecpp (if C++ support is enabled)
+    libpcre16     (if 16-bit support is enabled)
+    libpcre       (if 8-bit support is enabled)
+    libpcreposix  (if 8-bit support is enabled)
+    libpcrecpp    (if 8-bit and C++ support is enabled)

  Configuration information (lib/pkgconfig):
+    libpcre16.pc
    libpcre.pc
+    libpcreposix.pc
    libpcrecpp.pc (if C++ support is enabled)

  Header files (include):
@ -366,6 +431,7 @@ system. The following are installed (file names are all relative to the
  Man pages (share/man/man{1,3}):
    pcregrep.1
    pcretest.1
+    pcre-config.1
    pcre.3
    pcre*.3 (lots more pages, all starting "pcre")

@ -380,17 +446,18 @@ system. The following are installed (file names are all relative to the
    LICENCE
    NEWS
    README
-    pcre.txt       (a concatenation of the man(3) pages)
-    pcretest.txt   the pcretest man page
-    pcregrep.txt   the pcregrep man page
+    pcre.txt         (a concatenation of the man(3) pages)
+    pcretest.txt     the pcretest man page
+    pcregrep.txt     the pcregrep man page
+    pcre-config.txt  the pcre-config man page

 If you want to remove PCRE from your system, you can run "make uninstall".
 This removes all the files that "make install" installed. However, it does not
 remove any directories, because these are often shared with other programs.


-Retrieving configuration information on Unix-like systems
---------------------------------------------------------
+Retrieving configuration information
+------------------------------------

 Running "make install" installs the command pcre-config, which can be used to
 recall information about the PCRE configuration and installation. For example:
@ -415,8 +482,8 @@ The data is held in *.pc files that are installed in a directory called
 <prefix>/lib/pkgconfig.


-Shared libraries on Unix-like systems
-------------------------------------
+Shared libraries
+----------------

 The default distribution builds PCRE as shared libraries and static libraries,
 as long as the operating system supports shared libraries. Shared library
@ -441,8 +508,8 @@ Then run "make" in the usual way. Similarly, you can use --disable-static to
 build only shared libraries.


-Cross-compiling on Unix-like systems
------------------------------------
+Cross-compiling using autotools
+-------------------------------

 You can specify CC and CFLAGS in the normal way to the "configure" command, in
 order to cross-compile PCRE for some other host. However, you should NOT
@ -514,30 +581,49 @@ script creates the .txt and HTML forms of the documentation from the man pages.
 Testing PCRE
 ------------

-To test the basic PCRE library on a Unix system, run the RunTest script that is
-created by the configuring process. There is also a script called RunGrepTest
-that tests the options of the pcregrep command. If the C++ wrapper library is
-built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and
-pcre_stringpiece_unittest are also built.
+To test the basic PCRE library on a Unix-like system, run the RunTest script.
+There is another script called RunGrepTest that tests the options of the
+pcregrep command. If the C++ wrapper library is built, three test programs
+called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
+are also built. When JIT support is enabled, another test program called
+pcre_jit_test is built.

 Both the scripts and all the program tests are run if you obey "make check" or
-"make test". For other systems, see the instructions in NON-UNIX-USE.
+"make test". For other environments, see the instructions in
+NON-AUTOTOOLS-BUILD.

 The RunTest script runs the pcretest test program (which is documented in its
-own man page) on each of the testinput files in the testdata directory in
-turn, and compares the output with the contents of the corresponding testoutput
-files. A file called testtry is used to hold the main output from pcretest
-(testsavedregex is also used as a working file). To run pcretest on just one of
-the test files, give its number as an argument to RunTest, for example:
+own man page) on each of the relevant testinput files in the testdata
+directory, and compares the output with the contents of the corresponding
+testoutput files. Some tests are relevant only when certain build-time options
+were selected. For example, the tests for UTF-8/16 support are run only if
+--enable-utf was used. RunTest outputs a comment when it skips a test.

-  RunTest 2
+Many of the tests that are not skipped are run up to three times. The second
+run forces pcre_study() to be called for all patterns except for a few in some
+tests that are marked "never study" (see the pcretest program for how this is
+done). If JIT support is available, the non-DFA tests are run a third time,
+this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.

-The first test file can also be fed directly into the perltest.pl script to
-check that Perl gives the same results. The only difference you should see is
-in the first few lines, where the Perl version is given instead of the PCRE
-version.
+When both 8-bit and 16-bit support is enabled, the entire set of tests is run
+twice, once for each library. If you want to run just one set of tests, call
+RunTest with either the -8 or -16 option.

-The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
+RunTest uses a file called testtry to hold the main output from pcretest.
+Other files whose names begin with "test" are used as working files in some
+tests. To run pcretest on just one or more specific test files, give their
+numbers as arguments to RunTest, for example:
+
+  RunTest 2 7 11
+
+You can also call RunTest with the single argument "list" to cause it to output
+a list of tests.
+
+The first test file can be fed directly into the perltest.pl script to check
+that Perl gives the same results. The only difference you should see is in the
+first few lines, where the Perl version is given instead of the PCRE version.
+
+The second set of tests check pcre_fullinfo(), pcre_study(),
 pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
 detection, and run-time flags that are specific to PCRE, as well as the POSIX
 wrapper API. It also uses the debugging flags to check some of the internals of
@ -572,33 +658,32 @@ RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
 Windows versions of test 2. More info on using RunTest.bat is included in the
 document entitled NON-UNIX-USE.]

-The fourth test checks the UTF-8 support. It is not run automatically unless
-PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
-running "configure". This file can be also fed directly to the perltest.pl
-script, provided you are running Perl 5.8 or higher.
+The fourth and fifth tests check the UTF-8/16 support and error handling and
+internal UTF features of PCRE that are not relevant to Perl, respectively. The
+sixth and seventh tests do the same for Unicode character properties support.

-The fifth test checks error handling with UTF-8 encoding, and internal UTF-8
-features of PCRE that are not relevant to Perl.
+The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
+matching function, in non-UTF-8/16 mode, UTF-8/16 mode, and UTF-8/16 mode with
+Unicode property support, respectively.

-The sixth test (which is Perl-5.10 compatible) checks the support for Unicode
-character properties. It it not run automatically unless PCRE is built with
-Unicode property support. To to this you must set --enable-unicode-properties
-when running "configure".
+The eleventh test checks some internal offsets and code size features; it is
+run only when the default "link size" of 2 is set (in other cases the sizes
+change) and when Unicode property support is enabled.

-The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative
-matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode
-property support, respectively. The eighth and ninth tests are not run
-automatically unless PCRE is build with the relevant support.
+The twelfth test is run only when JIT support is available, and the thirteenth
+test is run only when JIT support is not available. They test some JIT-specific
+features such as information output from pcretest about JIT compilation.

-The tenth test checks some internal offsets and code size features; it is run
-only when the default "link size" of 2 is set (in other cases the sizes
-change).
+The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
+the seventeenth, eighteenth, and nineteenth tests are run only in 16-bit mode.
+These are tests that generate different output in the two modes. They are for
+general cases, UTF-8/16 support, and Unicode property support, respectively.

-The eleventh test checks out features that are new in Perl 5.10, and the
-twelfth test checks a number internals and non-Perl features concerned with
-Unicode property support. It it not run automatically unless PCRE is built with
-Unicode property support. To to this you must set --enable-unicode-properties
-when running "configure".
+The twentieth test is run only in 16-bit mode. It tests some specific 16-bit
+features of the DFA matching engine.
+
+The twenty-first and twenty-second tests are run only in 16-bit mode, when the
+link size is set to 2. They test reloading pre-compiled patterns.


 Character tables
@ -658,7 +743,9 @@ will cause PCRE to malfunction.
 File manifest
 -------------

-The distribution should contain the following files:
+The distribution should contain the files listed below. Where a file name is
+given as pcre[16]_xxx it means that there are two files, one with the name
+pcre_xxx and the other with the name pcre16_xxx.

 (A) Source files of the PCRE library functions and their headers:

@ -667,33 +754,40 @@ The distribution should contain the following files:

  pcre_chartables.c.dist  a default set of character tables that assume ASCII
                            coding; used, unless --enable-rebuild-chartables is
-                            specified, by copying to pcre_chartables.c
+                            specified, by copying to pcre[16]_chartables.c

  pcreposix.c             )
-  pcre_compile.c          )
-  pcre_config.c           )
-  pcre_dfa_exec.c         )
-  pcre_exec.c             )
-  pcre_fullinfo.c         )
-  pcre_get.c              ) sources for the functions in the library,
-  pcre_globals.c          )   and some internal functions that they use
-  pcre_info.c             )
-  pcre_maketables.c       )
-  pcre_newline.c          )
+  pcre[16]_byte_order.c   )
+  pcre[16]_compile.c      )
+  pcre[16]_config.c       )
+  pcre[16]_dfa_exec.c     )
+  pcre[16]_exec.c         )
+  pcre[16]_fullinfo.c     )
+  pcre[16]_get.c          ) sources for the functions in the library,
+  pcre[16]_globals.c      )   and some internal functions that they use
+  pcre[16]_jit_compile.c  )
+  pcre[16]_maketables.c   )
+  pcre[16]_newline.c      )
+  pcre[16]_refcount.c     )
+  pcre[16]_string_utils.c )
+  pcre[16]_study.c        )
+  pcre[16]_tables.c       )
+  pcre[16]_ucd.c          )
+  pcre[16]_version.c      )
+  pcre[16]_xclass.c       )
  pcre_ord2utf8.c         )
-  pcre_refcount.c         )
-  pcre_study.c            )
-  pcre_tables.c           )
-  pcre_try_flipped.c      )
-  pcre_ucd.c              )
  pcre_valid_utf8.c       )
-  pcre_version.c          )
-  pcre_xclass.c           )
-  pcre_printint.src       ) debugging function that is #included in pcretest,
+  pcre16_ord2utf16.c      )
+  pcre16_utf16_utils.c    )
+  pcre16_valid_utf16.c    )
+
+  pcre[16]_printint.c     ) debugging function that is used by pcretest,
                          )   and can also be #included in pcre_compile()
+
  pcre.h.in               template for pcre.h when built by "configure"
  pcreposix.h             header for the external POSIX wrapper API
  pcre_internal.h         header for internal use
+  sljit/*                 16 files that make up the JIT compiler
  ucp.h                   header for Unicode property handling

  config.h.in             template for config.h, which is built by "configure"
@ -730,7 +824,8 @@ The distribution should contain the following files:
  Makefile.am             ) the automake input that was used to create
                          )   Makefile.in
  NEWS                    important changes in this release
-  NON-UNIX-USE            notes on building PCRE on non-Unix systems
+  NON-UNIX-USE            the previous name for NON-AUTOTOOLS-BUILD
+  NON-AUTOTOOLS-BUILD     notes on building PCRE without using autotools
  PrepareRelease          script to make preparations for "make dist"
  README                  this file
  RunTest                 a Unix shell script for running tests
@ -751,6 +846,7 @@ The distribution should contain the following files:
  doc/pcretest.txt        plain text documentation of test program
  doc/perltest.txt        plain text documentation of Perl test program
  install-sh              a shell script for installing files
+  libpcre16.pc.in         template for libpcre16.pc for pkg-config
  libpcre.pc.in           template for libpcre.pc for pkg-config
  libpcreposix.pc.in      template for libpcreposix.pc for pkg-config
  libpcrecpp.pc.in        template for libpcrecpp.pc for pkg-config
@ -760,17 +856,20 @@ The distribution should contain the following files:
  mkinstalldirs           script for making install directories
  perltest.pl             Perl test program
  pcre-config.in          source of script which retains PCRE information
+  pcre_jit_test.c         test program for the JIT compiler
  pcrecpp_unittest.cc          )
  pcre_scanner_unittest.cc     ) test programs for the C++ wrapper
  pcre_stringpiece_unittest.cc )
  testdata/testinput*     test data for main library tests
  testdata/testoutput*    expected test results
  testdata/grep*          input and output for pcregrep tests
+  testdata/*              other supporting test files

 (D) Auxiliary files for cmake support

  cmake/COPYING-CMAKE-SCRIPTS
  cmake/FindPackageHandleStandardArgs.cmake
+  cmake/FindEditline.cmake
  cmake/FindReadline.cmake
  CMakeLists.txt
  config-cmake.h.in
@ -796,4 +895,4 @@ The distribution should contain the following files:
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-Last updated: 19 January 2010
+Last updated: 18 June 2012
--- a/ext/pcre/pcrelib/config.h
+++ b/ext/pcre/pcrelib/config.h
@ -282,7 +282,7 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_NAME "PCRE"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE 8.12"
+#define PACKAGE_STRING "PCRE 8.31"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre"
@ -291,7 +291,7 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_URL ""

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "8.12"
+#define PACKAGE_VERSION "8.31"


 /* If you are compiling for a system other than a Unix-like system or
@ -347,7 +347,7 @@ them both to 0; an emulation function will be used. */

 /* Version number of package */
 #ifndef VERSION
-#define VERSION "8.12"
+#define VERSION "8.31"
 #endif

 /* Define to empty if `const' does not conform to ANSI C. */
--- a/ext/pcre/pcrelib/dftables.c
+++ b/ext/pcre/pcrelib/dftables.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -112,7 +112,7 @@ fprintf(f,
  "#endif\n\n"
  "#include \"pcre_internal.h\"\n\n");
 fprintf(f,
-  "const unsigned char _pcre_default_tables[] = {\n\n"
+  "const pcre_uint8 PRIV(default_tables)[] = {\n\n"
  "/* This table is a lower casing table. */\n\n");

 fprintf(f, "  ");
--- a/ext/pcre/pcrelib/doc/pcre.txt
+++ b/ext/pcre/pcrelib/doc/pcre.txt
--- a/ext/pcre/pcrelib/pcre.h
+++ b/ext/pcre/pcrelib/pcre.h
@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.

-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */

 #define PCRE_MAJOR          8
-#define PCRE_MINOR          12
+#define PCRE_MINOR          31
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2011-01-15
+#define PCRE_DATE           2012-07-06

 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@ -98,28 +98,37 @@ extern "C" {
 /* Options. Some are compile-time only, some are run-time only, and some are
 both, so we keep them all distinct. However, almost all the bits in the options
 word are now used. In the long run, we may have to re-use some of the
-compile-time only bits for runtime options, or vice versa. */
+compile-time only bits for runtime options, or vice versa. In the comments
+below, "compile", "exec", and "DFA exec" mean that the option is permitted to
+be set for those functions; "used in" means that an option may be set only for
+compile, but is subsequently referenced in exec and/or DFA exec. Any of the
+compile-time options may be inspected during studying (and therefore JIT
+compiling). */

 #define PCRE_CASELESS           0x00000001  /* Compile */
 #define PCRE_MULTILINE          0x00000002  /* Compile */
 #define PCRE_DOTALL             0x00000004  /* Compile */
 #define PCRE_EXTENDED           0x00000008  /* Compile */
 #define PCRE_ANCHORED           0x00000010  /* Compile, exec, DFA exec */
-#define PCRE_DOLLAR_ENDONLY     0x00000020  /* Compile */
+#define PCRE_DOLLAR_ENDONLY     0x00000020  /* Compile, used in exec, DFA exec */
 #define PCRE_EXTRA              0x00000040  /* Compile */
 #define PCRE_NOTBOL             0x00000080  /* Exec, DFA exec */
 #define PCRE_NOTEOL             0x00000100  /* Exec, DFA exec */
 #define PCRE_UNGREEDY           0x00000200  /* Compile */
 #define PCRE_NOTEMPTY           0x00000400  /* Exec, DFA exec */
-#define PCRE_UTF8               0x00000800  /* Compile */
+/* The next two are also used in exec and DFA exec */
+#define PCRE_UTF8               0x00000800  /* Compile (same as PCRE_UTF16) */
+#define PCRE_UTF16              0x00000800  /* Compile (same as PCRE_UTF8) */
 #define PCRE_NO_AUTO_CAPTURE    0x00001000  /* Compile */
-#define PCRE_NO_UTF8_CHECK      0x00002000  /* Compile, exec, DFA exec */
+/* The next two are also used in exec and DFA exec */
+#define PCRE_NO_UTF8_CHECK      0x00002000  /* Compile (same as PCRE_NO_UTF16_CHECK) */
+#define PCRE_NO_UTF16_CHECK     0x00002000  /* Compile (same as PCRE_NO_UTF8_CHECK) */
 #define PCRE_AUTO_CALLOUT       0x00004000  /* Compile */
 #define PCRE_PARTIAL_SOFT       0x00008000  /* Exec, DFA exec */
 #define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
 #define PCRE_DFA_SHORTEST       0x00010000  /* DFA exec */
 #define PCRE_DFA_RESTART        0x00020000  /* DFA exec */
-#define PCRE_FIRSTLINE          0x00040000  /* Compile */
+#define PCRE_FIRSTLINE          0x00040000  /* Compile, used in exec, DFA exec */
 #define PCRE_DUPNAMES           0x00080000  /* Compile */
 #define PCRE_NEWLINE_CR         0x00100000  /* Compile, exec, DFA exec */
 #define PCRE_NEWLINE_LF         0x00200000  /* Compile, exec, DFA exec */
@ -128,41 +137,82 @@ compile-time only bits for runtime options, or vice versa. */
 #define PCRE_NEWLINE_ANYCRLF    0x00500000  /* Compile, exec, DFA exec */
 #define PCRE_BSR_ANYCRLF        0x00800000  /* Compile, exec, DFA exec */
 #define PCRE_BSR_UNICODE        0x01000000  /* Compile, exec, DFA exec */
-#define PCRE_JAVASCRIPT_COMPAT  0x02000000  /* Compile */
+#define PCRE_JAVASCRIPT_COMPAT  0x02000000  /* Compile, used in exec */
 #define PCRE_NO_START_OPTIMIZE  0x04000000  /* Compile, exec, DFA exec */
 #define PCRE_NO_START_OPTIMISE  0x04000000  /* Synonym */
 #define PCRE_PARTIAL_HARD       0x08000000  /* Exec, DFA exec */
 #define PCRE_NOTEMPTY_ATSTART   0x10000000  /* Exec, DFA exec */
-#define PCRE_UCP                0x20000000  /* Compile */
+#define PCRE_UCP                0x20000000  /* Compile, used in exec, DFA exec */

 /* Exec-time and get/set-time error codes */

-#define PCRE_ERROR_NOMATCH         (-1)
-#define PCRE_ERROR_NULL            (-2)
-#define PCRE_ERROR_BADOPTION       (-3)
-#define PCRE_ERROR_BADMAGIC        (-4)
-#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
-#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
-#define PCRE_ERROR_NOMEMORY        (-6)
-#define PCRE_ERROR_NOSUBSTRING     (-7)
-#define PCRE_ERROR_MATCHLIMIT      (-8)
-#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
-#define PCRE_ERROR_BADUTF8        (-10)
-#define PCRE_ERROR_BADUTF8_OFFSET (-11)
-#define PCRE_ERROR_PARTIAL        (-12)
-#define PCRE_ERROR_BADPARTIAL     (-13)
-#define PCRE_ERROR_INTERNAL       (-14)
-#define PCRE_ERROR_BADCOUNT       (-15)
-#define PCRE_ERROR_DFA_UITEM      (-16)
-#define PCRE_ERROR_DFA_UCOND      (-17)
-#define PCRE_ERROR_DFA_UMLIMIT    (-18)
-#define PCRE_ERROR_DFA_WSSIZE     (-19)
-#define PCRE_ERROR_DFA_RECURSE    (-20)
-#define PCRE_ERROR_RECURSIONLIMIT (-21)
-#define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
-#define PCRE_ERROR_BADNEWLINE     (-23)
-#define PCRE_ERROR_BADOFFSET      (-24)
-#define PCRE_ERROR_SHORTUTF8      (-25)
+#define PCRE_ERROR_NOMATCH          (-1)
+#define PCRE_ERROR_NULL             (-2)
+#define PCRE_ERROR_BADOPTION        (-3)
+#define PCRE_ERROR_BADMAGIC         (-4)
+#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
+#define PCRE_ERROR_UNKNOWN_NODE     (-5)  /* For backward compatibility */
+#define PCRE_ERROR_NOMEMORY         (-6)
+#define PCRE_ERROR_NOSUBSTRING      (-7)
+#define PCRE_ERROR_MATCHLIMIT       (-8)
+#define PCRE_ERROR_CALLOUT          (-9)  /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8         (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16        (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF8_OFFSET  (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16_OFFSET (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_PARTIAL         (-12)
+#define PCRE_ERROR_BADPARTIAL      (-13)
+#define PCRE_ERROR_INTERNAL        (-14)
+#define PCRE_ERROR_BADCOUNT        (-15)
+#define PCRE_ERROR_DFA_UITEM       (-16)
+#define PCRE_ERROR_DFA_UCOND       (-17)
+#define PCRE_ERROR_DFA_UMLIMIT     (-18)
+#define PCRE_ERROR_DFA_WSSIZE      (-19)
+#define PCRE_ERROR_DFA_RECURSE     (-20)
+#define PCRE_ERROR_RECURSIONLIMIT  (-21)
+#define PCRE_ERROR_NULLWSLIMIT     (-22)  /* No longer actually used */
+#define PCRE_ERROR_BADNEWLINE      (-23)
+#define PCRE_ERROR_BADOFFSET       (-24)
+#define PCRE_ERROR_SHORTUTF8       (-25)
+#define PCRE_ERROR_SHORTUTF16      (-25)  /* Same for 8/16 */
+#define PCRE_ERROR_RECURSELOOP     (-26)
+#define PCRE_ERROR_JIT_STACKLIMIT  (-27)
+#define PCRE_ERROR_BADMODE         (-28)
+#define PCRE_ERROR_BADENDIANNESS   (-29)
+#define PCRE_ERROR_DFA_BADRESTART  (-30)
+
+/* Specific error codes for UTF-8 validity checks */
+
+#define PCRE_UTF8_ERR0               0
+#define PCRE_UTF8_ERR1               1
+#define PCRE_UTF8_ERR2               2
+#define PCRE_UTF8_ERR3               3
+#define PCRE_UTF8_ERR4               4
+#define PCRE_UTF8_ERR5               5
+#define PCRE_UTF8_ERR6               6
+#define PCRE_UTF8_ERR7               7
+#define PCRE_UTF8_ERR8               8
+#define PCRE_UTF8_ERR9               9
+#define PCRE_UTF8_ERR10             10
+#define PCRE_UTF8_ERR11             11
+#define PCRE_UTF8_ERR12             12
+#define PCRE_UTF8_ERR13             13
+#define PCRE_UTF8_ERR14             14
+#define PCRE_UTF8_ERR15             15
+#define PCRE_UTF8_ERR16             16
+#define PCRE_UTF8_ERR17             17
+#define PCRE_UTF8_ERR18             18
+#define PCRE_UTF8_ERR19             19
+#define PCRE_UTF8_ERR20             20
+#define PCRE_UTF8_ERR21             21
+
+/* Specific error codes for UTF-16 validity checks */
+
+#define PCRE_UTF16_ERR0              0
+#define PCRE_UTF16_ERR1              1
+#define PCRE_UTF16_ERR2              2
+#define PCRE_UTF16_ERR3              3
+#define PCRE_UTF16_ERR4              4

 /* Request types for pcre_fullinfo() */

@ -183,6 +233,9 @@ compile-time only bits for runtime options, or vice versa. */
 #define PCRE_INFO_JCHANGED          13
 #define PCRE_INFO_HASCRORLF         14
 #define PCRE_INFO_MINLENGTH         15
+#define PCRE_INFO_JIT               16
+#define PCRE_INFO_JITSIZE           17
+#define PCRE_INFO_MAXLOOKBEHIND     18

 /* Request types for pcre_config(). Do not re-arrange, in order to remain
 compatible. */
@ -196,8 +249,18 @@ compatible. */
 #define PCRE_CONFIG_UNICODE_PROPERTIES      6
 #define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 #define PCRE_CONFIG_BSR                     8
+#define PCRE_CONFIG_JIT                     9
+#define PCRE_CONFIG_UTF16                  10
+#define PCRE_CONFIG_JITTARGET              11

-/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
+/* Request types for pcre_study(). Do not re-arrange, in order to remain
+compatible. */
+
+#define PCRE_STUDY_JIT_COMPILE                0x0001
+#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE   0x0002
+#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE   0x0004
+
+/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
 these bits, just add new ones on the end, in order to remain compatible. */

 #define PCRE_EXTRA_STUDY_DATA             0x0001
@ -206,12 +269,33 @@ these bits, just add new ones on the end, in order to remain compatible. */
 #define PCRE_EXTRA_TABLES                 0x0008
 #define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010
 #define PCRE_EXTRA_MARK                   0x0020
+#define PCRE_EXTRA_EXECUTABLE_JIT         0x0040

 /* Types */

 struct real_pcre;                 /* declaration; the definition is private  */
 typedef struct real_pcre pcre;

+struct real_pcre16;               /* declaration; the definition is private  */
+typedef struct real_pcre16 pcre16;
+
+struct real_pcre_jit_stack;       /* declaration; the definition is private  */
+typedef struct real_pcre_jit_stack pcre_jit_stack;
+
+struct real_pcre16_jit_stack;     /* declaration; the definition is private  */
+typedef struct real_pcre16_jit_stack pcre16_jit_stack;
+
+/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
+a 16 bit wide signed data type. Otherwise it can be a dummy data type since
+pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
+#ifndef PCRE_UCHAR16
+#define PCRE_UCHAR16 unsigned short
+#endif
+
+#ifndef PCRE_SPTR16
+#define PCRE_SPTR16 const PCRE_UCHAR16 *
+#endif
+
 /* When PCRE is compiled as a C++ library, the subject pointer type can be
 replaced with a custom type. For conventional use, the public interface is a
 const char *. */
@ -232,8 +316,22 @@ typedef struct pcre_extra {
  const unsigned char *tables;    /* Pointer to character tables */
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
  unsigned char **mark;           /* For passing back a mark pointer */
+  void *executable_jit;           /* Contains a pointer to a compiled jit code */
 } pcre_extra;

+/* Same structure as above, but with 16 bit char pointers. */
+
+typedef struct pcre16_extra {
+  unsigned long int flags;        /* Bits for which fields are set */
+  void *study_data;               /* Opaque data from pcre_study() */
+  unsigned long int match_limit;  /* Maximum number of calls to match() */
+  void *callout_data;             /* Data passed back in callouts */
+  const unsigned char *tables;    /* Pointer to character tables */
+  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
+  PCRE_UCHAR16 **mark;            /* For passing back a mark pointer */
+  void *executable_jit;           /* Contains a pointer to a compiled jit code */
+} pcre16_extra;
+
 /* The structure for passing out data via the pcre_callout_function. We use a
 structure so that new fields can be added on the end in future versions,
 without changing the API of the function, thereby allowing old clients to work
@ -254,9 +352,33 @@ typedef struct pcre_callout_block {
  /* ------------------- Added for Version 1 -------------------------- */
  int          pattern_position;  /* Offset to next item in the pattern */
  int          next_item_length;  /* Length of next item in the pattern */
+  /* ------------------- Added for Version 2 -------------------------- */
+  const unsigned char *mark;      /* Pointer to current mark or NULL    */
  /* ------------------------------------------------------------------ */
 } pcre_callout_block;

+/* Same structure as above, but with 16 bit char pointers. */
+
+typedef struct pcre16_callout_block {
+  int          version;           /* Identifies version of block */
+  /* ------------------------ Version 0 ------------------------------- */
+  int          callout_number;    /* Number compiled into pattern */
+  int         *offset_vector;     /* The offset vector */
+  PCRE_SPTR16  subject;           /* The subject being matched */
+  int          subject_length;    /* The length of the subject */
+  int          start_match;       /* Offset to start of this match attempt */
+  int          current_position;  /* Where we currently are in the subject */
+  int          capture_top;       /* Max current capture */
+  int          capture_last;      /* Most recently closed capture */
+  void        *callout_data;      /* Data passed in with the call */
+  /* ------------------- Added for Version 1 -------------------------- */
+  int          pattern_position;  /* Offset to next item in the pattern */
+  int          next_item_length;  /* Length of next item in the pattern */
+  /* ------------------- Added for Version 2 -------------------------- */
+  const PCRE_UCHAR16 *mark;       /* Pointer to current mark or NULL    */
+  /* ------------------------------------------------------------------ */
+} pcre16_callout_block;
+
 /* Indirection for store get and free functions. These can be set to
 alternative malloc/free functions if required. Special ones are used in the
 non-recursive case for "frames". There is also an optional callout function
@ -269,47 +391,114 @@ PCRE_EXP_DECL void  (*pcre_free)(void *);
 PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
+
+PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre16_free)(void *);
+PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre16_stack_free)(void *);
+PCRE_EXP_DECL int   (*pcre16_callout)(pcre16_callout_block *);
 #else   /* VPCOMPAT */
 PCRE_EXP_DECL void *pcre_malloc(size_t);
 PCRE_EXP_DECL void  pcre_free(void *);
 PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre_stack_free(void *);
 PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
+
+PCRE_EXP_DECL void *pcre16_malloc(size_t);
+PCRE_EXP_DECL void  pcre16_free(void *);
+PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
+PCRE_EXP_DECL void  pcre16_stack_free(void *);
+PCRE_EXP_DECL int   pcre16_callout(pcre16_callout_block *);
 #endif  /* VPCOMPAT */

+/* User defined callback which provides a stack just before the match starts. */
+
+typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
+typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
+
 /* Exported PCRE functions */

 PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
                  const unsigned char *);
+PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
+                  const unsigned char *);
 PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
                  int *, const unsigned char *);
+PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
+                  int *, const unsigned char *);
 PCRE_EXP_DECL int  pcre_config(int, void *);
+PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                  int *, int, const char *, char *, int);
-PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
-                  int);
+PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
+PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int,
+                  char *, int);
+PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_UCHAR16 *, int);
 PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
                  const char *, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
+                  PCRE_SPTR16, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
                   int, int, int, int *, int);
+PCRE_EXP_DECL int  pcre16_exec(const pcre16 *, const pcre16_extra *,
+                   PCRE_SPTR16, int, int, int, int *, int);
 PCRE_EXP_DECL void pcre_free_substring(const char *);
+PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre_free_substring_list(const char **);
+PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
                  void *);
+PCRE_EXP_DECL int  pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
+                  void *);
 PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
                  int *, int, const char *, const char **);
+PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
+PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
 PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
                  char **, char **);
+PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
+                  PCRE_UCHAR16 **, PCRE_UCHAR16 **);
 PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
                  const char **);
+PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
                  const char ***);
-PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
+PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
+                  PCRE_SPTR16 **);
 PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
+PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
 PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
+PCRE_EXP_DECL int  pcre16_refcount(pcre16 *, int);
 PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
+PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
+PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
+PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
 PCRE_EXP_DECL const char *pcre_version(void);
+PCRE_EXP_DECL const char *pcre16_version(void);
+
+/* Utility functions for byte order swaps. */
+PCRE_EXP_DECL int  pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
+                  const unsigned char *);
+PCRE_EXP_DECL int  pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
+                  const unsigned char *);
+PCRE_EXP_DECL int  pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
+                  PCRE_SPTR16, int, int *, int);
+
+/* JIT compiler related functions. */
+
+PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
+PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
+PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
+PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
+PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
+                  pcre_jit_callback, void *);
+PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
+                  pcre16_jit_callback, void *);

 #ifdef __cplusplus
 }  /* extern "C" */
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
--- a/ext/pcre/pcrelib/pcre_config.c
+++ b/ext/pcre/pcrelib/pcre_config.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -43,6 +43,9 @@ POSSIBILITY OF SUCH DAMAGE.

 #include "config.h"

+/* Keep the original link size. */
+static int real_link_size = LINK_SIZE;
+
 #include "pcre_internal.h"


@ -60,18 +63,41 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_config(int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_config(int what, void *where)
+#endif
 {
 switch (what)
  {
  case PCRE_CONFIG_UTF8:
-#ifdef SUPPORT_UTF8
+#if defined COMPILE_PCRE16
+  *((int *)where) = 0;
+  return PCRE_ERROR_BADOPTION;
+#else
+#if defined SUPPORT_UTF
  *((int *)where) = 1;
 #else
  *((int *)where) = 0;
 #endif
  break;
+#endif
+
+  case PCRE_CONFIG_UTF16:
+#if defined COMPILE_PCRE8
+  *((int *)where) = 0;
+  return PCRE_ERROR_BADOPTION;
+#else
+#if defined SUPPORT_UTF
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
+#endif

  case PCRE_CONFIG_UNICODE_PROPERTIES:
 #ifdef SUPPORT_UCP
@ -81,6 +107,22 @@ switch (what)
 #endif
  break;

+  case PCRE_CONFIG_JIT:
+#ifdef SUPPORT_JIT
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
+
+  case PCRE_CONFIG_JITTARGET:
+#ifdef SUPPORT_JIT
+  *((const char **)where) = PRIV(jit_get_target)();
+#else
+  *((const char **)where) = NULL;
+#endif
+  break;
+
  case PCRE_CONFIG_NEWLINE:
  *((int *)where) = NEWLINE;
  break;
@ -94,7 +136,7 @@ switch (what)
  break;

  case PCRE_CONFIG_LINK_SIZE:
-  *((int *)where) = LINK_SIZE;
+  *((int *)where) = real_link_size;
  break;

  case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
--- a/ext/pcre/pcrelib/pcre_fullinfo.c
+++ b/ext/pcre/pcrelib/pcre_fullinfo.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -63,13 +63,17 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
-  void *where)
+pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
+  int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  int what, void *where)
+#endif
 {
-real_pcre internal_re;
-pcre_study_data internal_study;
-const real_pcre *re = (const real_pcre *)argument_re;
+const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
 const pcre_study_data *study = NULL;

 if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
@ -77,12 +81,18 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
  study = (const pcre_study_data *)extra_data->study_data;

+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
 if (re->magic_number != MAGIC_NUMBER)
-  {
-  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
-  if (re == NULL) return PCRE_ERROR_BADMAGIC;
-  if (study != NULL) study = &internal_study;
-  }
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+
+/* Check that this pattern was compiled in the correct bit mode */
+
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

 switch (what)
  {
@ -98,6 +108,18 @@ switch (what)
  *((size_t *)where) = (study == NULL)? 0 : study->size;
  break;

+  case PCRE_INFO_JITSIZE:
+#ifdef SUPPORT_JIT
+  *((size_t *)where) =
+      (extra_data != NULL &&
+      (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
+      extra_data->executable_jit != NULL)?
+    PRIV(jit_get_size)(extra_data->executable_jit) : 0;
+#else
+  *((size_t *)where) = 0;
+#endif
+  break;
+
  case PCRE_INFO_CAPTURECOUNT:
  *((int *)where) = re->top_bracket;
  break;
@ -108,7 +130,7 @@ switch (what)

  case PCRE_INFO_FIRSTBYTE:
  *((int *)where) =
-    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
    ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
  break;

@ -116,7 +138,7 @@ switch (what)
  block, not the internal copy (with flipped integer fields). */

  case PCRE_INFO_FIRSTTABLE:
-  *((const uschar **)where) =
+  *((const pcre_uint8 **)where) =
    (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
      ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
  break;
@ -124,12 +146,18 @@ switch (what)
  case PCRE_INFO_MINLENGTH:
  *((int *)where) =
    (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
-      study->minlength : -1;
+      (int)(study->minlength) : -1;
+  break;
+
+  case PCRE_INFO_JIT:
+  *((int *)where) = extra_data != NULL &&
+                    (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
+                    extra_data->executable_jit != NULL;
  break;

  case PCRE_INFO_LASTLITERAL:
  *((int *)where) =
-    ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
+    ((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
  break;

  case PCRE_INFO_NAMEENTRYSIZE:
@ -141,11 +169,11 @@ switch (what)
  break;

  case PCRE_INFO_NAMETABLE:
-  *((const uschar **)where) = (const uschar *)re + re->name_table_offset;
+  *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
  break;

  case PCRE_INFO_DEFAULT_TABLES:
-  *((const uschar **)where) = (const uschar *)(_pcre_default_tables);
+  *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
  break;

  /* From release 8.00 this will always return TRUE because NOPARTIAL is
@ -163,6 +191,10 @@ switch (what)
  *((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
  break;

+  case PCRE_INFO_MAXLOOKBEHIND:
+  *((int *)where) = re->max_lookbehind;
+  break;
+
  default: return PCRE_ERROR_BADOPTION;
  }

--- a/ext/pcre/pcrelib/pcre_get.c
+++ b/ext/pcre/pcrelib/pcre_get.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -63,14 +63,20 @@ Returns:      the number of the named parentheses, or a negative number
                (PCRE_ERROR_NOSUBSTRING) if not found
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringnumber(const pcre *code, const char *stringname)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
+#endif
 {
 int rc;
 int entrysize;
 int top, bot;
-uschar *nametable;
+pcre_uchar *nametable;

+#ifdef COMPILE_PCRE8
 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
@ -79,14 +85,26 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;
+#endif
+#ifdef COMPILE_PCRE16
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+  return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+  return rc;
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+  return rc;
+#endif

 bot = 0;
 while (top > bot)
  {
  int mid = (top + bot) / 2;
-  uschar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
-  if (c == 0) return (entry[0] << 8) + entry[1];
+  pcre_uchar *entry = nametable + entrysize*mid;
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
+  if (c == 0) return GET2(entry, 0);
  if (c > 0) bot = mid + 1; else top = mid;
  }

@ -112,15 +130,22 @@ Returns:      the length of each entry, or a negative number
                (PCRE_ERROR_NOSUBSTRING) if not found
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
  char **firstptr, char **lastptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
+  PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
+#endif
 {
 int rc;
 int entrysize;
 int top, bot;
-uschar *nametable, *lastentry;
+pcre_uchar *nametable, *lastentry;

+#ifdef COMPILE_PCRE8
 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
@ -129,30 +154,49 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;
+#endif
+#ifdef COMPILE_PCRE16
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
+  return rc;
+if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
+
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
+  return rc;
+if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
+  return rc;
+#endif

 lastentry = nametable + entrysize * (top - 1);
 bot = 0;
 while (top > bot)
  {
  int mid = (top + bot) / 2;
-  uschar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
+  pcre_uchar *entry = nametable + entrysize*mid;
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
  if (c == 0)
    {
-    uschar *first = entry;
-    uschar *last = entry;
+    pcre_uchar *first = entry;
+    pcre_uchar *last = entry;
    while (first > nametable)
      {
-      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
      first -= entrysize;
      }
    while (last < lastentry)
      {
-      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
      last += entrysize;
      }
+#ifdef COMPILE_PCRE8
    *firstptr = (char *)first;
    *lastptr = (char *)last;
+#else
+    *firstptr = (PCRE_UCHAR16 *)first;
+    *lastptr = (PCRE_UCHAR16 *)last;
+#endif
    return entrysize;
    }
  if (c > 0) bot = mid + 1; else top = mid;
@ -180,23 +224,39 @@ Returns:       the number of the first that is set,
               or a negative number on error
 */

+#ifdef COMPILE_PCRE8
 static int
 get_first_set(const pcre *code, const char *stringname, int *ovector)
+#else
+static int
+get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
+#endif
 {
-const real_pcre *re = (const real_pcre *)code;
+const REAL_PCRE *re = (const REAL_PCRE *)code;
 int entrysize;
+pcre_uchar *entry;
+#ifdef COMPILE_PCRE8
 char *first, *last;
-uschar *entry;
+#else
+PCRE_UCHAR16 *first, *last;
+#endif
+
+#ifdef COMPILE_PCRE8
 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  return pcre_get_stringnumber(code, stringname);
 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
+#else
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+  return pcre16_get_stringnumber(code, stringname);
+entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
+#endif
 if (entrysize <= 0) return entrysize;
-for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
+for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
  {
-  int n = (entry[0] << 8) + entry[1];
+  int n = GET2(entry, 0);
  if (ovector[n*2] >= 0) return n;
  }
-return (first[0] << 8) + first[1];
+return GET2(entry, 0);
 }


@ -229,9 +289,15 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
  int stringnumber, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  int stringnumber, PCRE_UCHAR16 *buffer, int size)
+#endif
 {
 int yield;
 if (stringnumber < 0 || stringnumber >= stringcount)
@ -239,7 +305,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
 stringnumber *= 2;
 yield = ovector[stringnumber+1] - ovector[stringnumber];
 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
-memcpy(buffer, subject + ovector[stringnumber], yield);
+memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
 buffer[yield] = 0;
 return yield;
 }
@ -274,13 +340,25 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
-  int stringcount, const char *stringname, char *buffer, int size)
+pcre_copy_named_substring(const pcre *code, const char *subject,
+  int *ovector, int stringcount, const char *stringname,
+  char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
+  int *ovector, int stringcount, PCRE_SPTR16 stringname,
+  PCRE_UCHAR16 *buffer, int size)
+#endif
 {
 int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#else
+return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#endif
 }


@ -306,29 +384,39 @@ Returns:         if successful: 0
                   PCRE_ERROR_NOMEMORY (-6) failed to get store
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
  const char ***listptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  PCRE_SPTR16 **listptr)
+#endif
 {
 int i;
-int size = sizeof(char *);
+int size = sizeof(pcre_uchar *);
 int double_count = stringcount * 2;
-char **stringlist;
-char *p;
+pcre_uchar **stringlist;
+pcre_uchar *p;

 for (i = 0; i < double_count; i += 2)
-  size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
+  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);

-stringlist = (char **)(pcre_malloc)(size);
+stringlist = (pcre_uchar **)(PUBL(malloc))(size);
 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;

+#ifdef COMPILE_PCRE8
 *listptr = (const char **)stringlist;
-p = (char *)(stringlist + stringcount + 1);
+#else
+*listptr = (PCRE_SPTR16 *)stringlist;
+#endif
+p = (pcre_uchar *)(stringlist + stringcount + 1);

 for (i = 0; i < double_count; i += 2)
  {
  int len = ovector[i+1] - ovector[i];
-  memcpy(p, subject + ovector[i], len);
+  memcpy(p, subject + ovector[i], IN_UCHARS(len));
  *stringlist++ = p;
  p += len;
  *p++ = 0;
@ -345,16 +433,22 @@ return 0;
 *************************************************/

 /* This function exists for the benefit of people calling PCRE from non-C
-programs that can call its functions, but not free() or (pcre_free)() directly.
+programs that can call its functions, but not free() or (PUBL(free))()
+directly.

 Argument:   the result of a previous pcre_get_substring_list()
 Returns:    nothing
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring_list(const char **pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring_list(PCRE_SPTR16 *pointer)
+#endif
 {
-(pcre_free)((void *)pointer);
+(PUBL(free))((void *)pointer);
 }


@ -384,21 +478,31 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) substring not present
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring(const char *subject, int *ovector, int stringcount,
  int stringnumber, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  int stringnumber, PCRE_SPTR16 *stringptr)
+#endif
 {
 int yield;
-char *substring;
+pcre_uchar *substring;
 if (stringnumber < 0 || stringnumber >= stringcount)
  return PCRE_ERROR_NOSUBSTRING;
 stringnumber *= 2;
 yield = ovector[stringnumber+1] - ovector[stringnumber];
-substring = (char *)(pcre_malloc)(yield + 1);
+substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
-memcpy(substring, subject + ovector[stringnumber], yield);
+memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
 substring[yield] = 0;
-*stringptr = substring;
+#ifdef COMPILE_PCRE8
+*stringptr = (const char *)substring;
+#else
+*stringptr = (PCRE_SPTR16)substring;
+#endif
 return yield;
 }

@ -431,13 +535,25 @@ Returns:         if successful:
                   PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
-  int stringcount, const char *stringname, const char **stringptr)
+pcre_get_named_substring(const pcre *code, const char *subject,
+  int *ovector, int stringcount, const char *stringname,
+  const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
+  int *ovector, int stringcount, PCRE_SPTR16 stringname,
+  PCRE_SPTR16 *stringptr)
+#endif
 {
 int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
+#else
+return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
+#endif
 }


@ -448,16 +564,22 @@ return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
 *************************************************/

 /* This function exists for the benefit of people calling PCRE from non-C
-programs that can call its functions, but not free() or (pcre_free)() directly.
+programs that can call its functions, but not free() or (PUBL(free))()
+directly.

 Argument:   the result of a previous pcre_get_substring()
 Returns:    nothing
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring(const char *pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring(PCRE_SPTR16 pointer)
+#endif
 {
-(pcre_free)((void *)pointer);
+(PUBL(free))((void *)pointer);
 }

 /* End of pcre_get.c */
--- a/ext/pcre/pcrelib/pcre_globals.c
+++ b/ext/pcre/pcrelib/pcre_globals.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -65,18 +65,18 @@ static void LocalPcreFree(void* aPtr)
  {
  free(aPtr);
  }
-PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = LocalPcreMalloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_free)(void *) = LocalPcreFree;
-PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = LocalPcreMalloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_stack_free)(void *) = LocalPcreFree;
-PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = LocalPcreFree;
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = LocalPcreFree;
+PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;

 #elif !defined VPCOMPAT
-PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_free)(void *) = free;
-PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_stack_free)(void *) = free;
-PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = free;
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = free;
+PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
 #endif

 /* End of pcre_globals.c */
--- a/ext/pcre/pcrelib/pcre_info.c
+++ b/ext/pcre/pcrelib/pcre_info.c
@ -1,91 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
-                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
-
-----------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the name of the University of Cambridge nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
-*/
-
-
-/* This module contains the external function pcre_info(), which gives some
-information about a compiled pattern. However, use of this function is now
-deprecated, as it has been superseded by pcre_fullinfo(). */
-
-
-#include "config.h"
-
-#include "pcre_internal.h"
-
-
-/*************************************************
-* (Obsolete) Return info about compiled pattern  *
-*************************************************/
-
-/* This is the original "info" function. It picks potentially useful data out
-of the private structure, but its interface was too rigid. It remains for
-backwards compatibility. The public options are passed back in an int - though
-the re->options field has been expanded to a long int, all the public options
-at the low end of it, and so even on 16-bit systems this will still be OK.
-Therefore, I haven't changed the API for pcre_info().
-
-Arguments:
-  argument_re   points to compiled code
-  optptr        where to pass back the options
-  first_byte    where to pass back the first character,
-                or -1 if multiline and all branches start ^,
-                or -2 otherwise
-
-Returns:        number of capturing subpatterns
-                or negative values on error
-*/
-
-PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
-{
-real_pcre internal_re;
-const real_pcre *re = (const real_pcre *)argument_re;
-if (re == NULL) return PCRE_ERROR_NULL;
-if (re->magic_number != MAGIC_NUMBER)
-  {
-  re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
-  if (re == NULL) return PCRE_ERROR_BADMAGIC;
-  }
-if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
-if (first_byte != NULL)
-  *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
-     ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
-return re->top_bracket;
-}
-
-/* End of pcre_info.c */
--- a/ext/pcre/pcrelib/pcre_internal.h
+++ b/ext/pcre/pcrelib/pcre_internal.h
--- a/ext/pcre/pcrelib/pcre_maketables.c
+++ b/ext/pcre/pcrelib/pcre_maketables.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -57,21 +57,26 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
 /* This function builds a set of character tables for use by PCRE and returns
 a pointer to them. They are build using the ctype functions, and consequently
 their contents will depend upon the current locale setting. When compiled as
-part of the library, the store is obtained via pcre_malloc(), but when compiled
-inside dftables, use malloc().
+part of the library, the store is obtained via PUBL(malloc)(), but when
+compiled inside dftables, use malloc().

 Arguments:   none
 Returns:     pointer to the contiguous block of data
 */

+#ifdef COMPILE_PCRE8
 const unsigned char *
 pcre_maketables(void)
+#else
+const unsigned char *
+pcre16_maketables(void)
+#endif
 {
 unsigned char *yield, *p;
 int i;

 #ifndef DFTABLES
-yield = (unsigned char*)(pcre_malloc)(tables_length);
+yield = (unsigned char*)(PUBL(malloc))(tables_length);
 #else
 yield = (unsigned char*)malloc(tables_length);
 #endif
--- a/ext/pcre/pcrelib/pcre_newline.c
+++ b/ext/pcre/pcrelib/pcre_newline.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -65,16 +65,25 @@ Arguments:
  type         the newline type
  endptr       pointer to the end of the string
  lenptr       where to return the length
-  utf8         TRUE if in utf8 mode
+  utf          TRUE if in utf mode

 Returns:       TRUE or FALSE
 */

 BOOL
-_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
+PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
+  BOOL utf)
 {
 int c;
-if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
+(void)utf;
+#ifdef SUPPORT_UTF
+if (utf)
+  {
+  GETCHAR(c, ptr);
+  }
+else
+#endif  /* SUPPORT_UTF */
+  c = *ptr;

 if (type == NLTYPE_ANYCRLF) switch(c)
  {
@ -93,9 +102,15 @@ else switch(c)
  case 0x000c: *lenptr = 1; return TRUE;             /* FF */
  case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
               return TRUE;                          /* CR */
-  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */
+#ifdef COMPILE_PCRE8
+  case 0x0085: *lenptr = utf? 2 : 1; return TRUE;    /* NEL */
  case 0x2028:                                       /* LS */
  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
+#else
+  case 0x0085:                                       /* NEL */
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif /* COMPILE_PCRE8 */
  default: return FALSE;
  }
 }
@ -114,26 +129,27 @@ Arguments:
  type         the newline type
  startptr     pointer to the start of the string
  lenptr       where to return the length
-  utf8         TRUE if in utf8 mode
+  utf          TRUE if in utf mode

 Returns:       TRUE or FALSE
 */

 BOOL
-_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
+PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
+  BOOL utf)
 {
 int c;
+(void)utf;
 ptr--;
-#ifdef SUPPORT_UTF8
-if (utf8)
+#ifdef SUPPORT_UTF
+if (utf)
  {
  BACKCHAR(ptr);
  GETCHAR(c, ptr);
  }
-else c = *ptr;
-#else   /* no UTF-8 support */
-c = *ptr;
-#endif  /* SUPPORT_UTF8 */
+else
+#endif  /* SUPPORT_UTF */
+  c = *ptr;

 if (type == NLTYPE_ANYCRLF) switch(c)
  {
@ -150,9 +166,15 @@ else switch(c)
  case 0x000b:                                      /* VT */
  case 0x000c:                                      /* FF */
  case 0x000d: *lenptr = 1; return TRUE;            /* CR */
-  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */
+#ifdef COMPILE_PCRE8
+  case 0x0085: *lenptr = utf? 2 : 1; return TRUE;   /* NEL */
  case 0x2028:                                      /* LS */
  case 0x2029: *lenptr = 3; return TRUE;            /* PS */
+#else
+  case 0x0085:                                       /* NEL */
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif /* COMPILE_PCRE8 */
  default: return FALSE;
  }
 }
--- a/ext/pcre/pcrelib/pcre_ord2utf8.c
+++ b/ext/pcre/pcrelib/pcre_ord2utf8.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -50,35 +50,45 @@ character value into a UTF8 string. */
 *       Convert character value to UTF-8         *
 *************************************************/

-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
+/* This function takes an integer value in the range 0 - 0x10ffff
+and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.

 Arguments:
  cvalue     the character value
-  buffer     pointer to buffer for result - at least 6 bytes long
+  buffer     pointer to buffer for result - at least 6 pcre_uchars long

 Returns:     number of characters placed in the buffer
 */

 int
-_pcre_ord2utf8(int cvalue, uschar *buffer)
+PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
+
 register int i, j;
-for (i = 0; i < _pcre_utf8_table1_size; i++)
-  if (cvalue <= _pcre_utf8_table1[i]) break;
+
+/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
+Should never happen in practice. */
+if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
+  cvalue = 0xfffe;
+
+for (i = 0; i < PRIV(utf8_table1_size); i++)
+  if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
 buffer += i;
 for (j = i; j > 0; j--)
 {
 *buffer-- = 0x80 | (cvalue & 0x3f);
 cvalue >>= 6;
 }
-*buffer = _pcre_utf8_table2[i] | cvalue;
+*buffer = PRIV(utf8_table2)[i] | cvalue;
 return i + 1;
+
 #else
+
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
-(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer);  /* called when SUPPORT_UTF is not defined. */
 return 0;
+
 #endif
 }

--- a/ext/pcre/pcrelib/pcre_refcount.c
+++ b/ext/pcre/pcrelib/pcre_refcount.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -66,11 +66,18 @@ Returns:        the (possibly updated) count value (a non-negative number), or
                a negative error number
 */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_refcount(pcre *argument_re, int adjust)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_refcount(pcre16 *argument_re, int adjust)
+#endif
 {
-real_pcre *re = (real_pcre *)argument_re;
+REAL_PCRE *re = (REAL_PCRE *)argument_re;
 if (re == NULL) return PCRE_ERROR_NULL;
+if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 re->ref_count = (-adjust > re->ref_count)? 0 :
                (adjust + re->ref_count > 65535)? 65535 :
                re->ref_count + adjust;
--- a/ext/pcre/pcrelib/pcre_study.c
+++ b/ext/pcre/pcrelib/pcre_study.c
--- a/ext/pcre/pcrelib/pcre_tables.c
+++ b/ext/pcre/pcrelib/pcre_tables.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */

+#ifndef PCRE_INCLUDED

 /* This module contains some fixed tables that are used by more than one of the
 PCRE code modules. The tables are also #included by the pcretest program, which
@ -48,11 +49,12 @@ clashes with the library. */

 #include "pcre_internal.h"

+#endif /* PCRE_INCLUDED */

 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
 the definition is next to the definition of the opcodes in pcre_internal.h. */

-const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
+const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };



@ -63,31 +65,38 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
 /* These are the breakpoints for different numbers of bytes in a UTF-8
 character. */

-#ifdef SUPPORT_UTF8
+#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
+  || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)

-const int _pcre_utf8_table1[] =
+/* These tables are also required by pcretest in 16 bit mode. */
+
+const int PRIV(utf8_table1)[] =
  { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};

-const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
+const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);

 /* These are the indicator bits and the mask for the data bits to set in the
 first byte of a character, indexed by the number of additional bytes. */

-const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
-const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};

 /* Table of the number of extra bytes, indexed by the first byte masked with
 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */

-const uschar _pcre_utf8_table4[] = {
+const pcre_uint8 PRIV(utf8_table4)[] = {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };

+#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
+
+#ifdef SUPPORT_UTF
+
 /* Table to translate from particular type value to the general value. */

-const int _pcre_ucp_gentype[] = {
+const int PRIV(ucp_gentype)[] = {
  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
@ -98,6 +107,21 @@ const int _pcre_ucp_gentype[] = {
  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
 };

+#ifdef SUPPORT_JIT
+/* This table reverses PRIV(ucp_gentype). We can save the cost
+of a memory load. */
+
+const int PRIV(ucp_typerange)[] = {
+  ucp_Cc, ucp_Cs,
+  ucp_Ll, ucp_Lu,
+  ucp_Mc, ucp_Mn,
+  ucp_Nd, ucp_No,
+  ucp_Pc, ucp_Ps,
+  ucp_Sc, ucp_So,
+  ucp_Zl, ucp_Zs,
+};
+#endif /* SUPPORT_JIT */
+
 /* The pcre_utt[] table below translates Unicode property names into type and
 code values. It is searched by binary chop, so must be in collating sequence of
 name. Originally, the table contained pointers to the name strings in the first
@ -108,7 +132,7 @@ table itself. Maintenance is more error-prone, but frequent changes to this
 data are unlikely.

 July 2008: There is now a script called maint/GenerateUtt.py that can be used
-to generate this data instead of maintaining it entirely by hand.
+to generate this data automatically instead of maintaining it by hand.

 The script was updated in March 2009 to generate a new EBCDIC-compliant
 version. Like all other character and string literals that are compared against
@ -121,8 +145,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
 #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
 #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
+#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
 #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
 #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
+#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
 #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
 #define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
 #define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
@ -131,6 +157,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
 #define STRING_Cc0 STR_C STR_c "\0"
 #define STRING_Cf0 STR_C STR_f "\0"
+#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
 #define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
 #define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
 #define STRING_Cn0 STR_C STR_n "\0"
@ -184,9 +211,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
 #define STRING_M0 STR_M "\0"
 #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
+#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
 #define STRING_Mc0 STR_M STR_c "\0"
 #define STRING_Me0 STR_M STR_e "\0"
 #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
+#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
+#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
 #define STRING_Mn0 STR_M STR_n "\0"
 #define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
 #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
@ -220,11 +251,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
 #define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
 #define STRING_Sc0 STR_S STR_c "\0"
+#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
 #define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
 #define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
 #define STRING_Sk0 STR_S STR_k "\0"
 #define STRING_Sm0 STR_S STR_m "\0"
 #define STRING_So0 STR_S STR_o "\0"
+#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
 #define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
 #define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
 #define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
@ -233,6 +266,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
 #define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
 #define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
+#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
 #define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
 #define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
 #define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
@ -251,15 +285,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Zp0 STR_Z STR_p "\0"
 #define STRING_Zs0 STR_Z STR_s "\0"

-const char _pcre_utt_names[] =
+const char PRIV(utt_names)[] =
  STRING_Any0
  STRING_Arabic0
  STRING_Armenian0
  STRING_Avestan0
  STRING_Balinese0
  STRING_Bamum0
+  STRING_Batak0
  STRING_Bengali0
  STRING_Bopomofo0
+  STRING_Brahmi0
  STRING_Braille0
  STRING_Buginese0
  STRING_Buhid0
@ -268,6 +304,7 @@ const char _pcre_utt_names[] =
  STRING_Carian0
  STRING_Cc0
  STRING_Cf0
+  STRING_Chakma0
  STRING_Cham0
  STRING_Cherokee0
  STRING_Cn0
@ -321,9 +358,13 @@ const char _pcre_utt_names[] =
  STRING_Lydian0
  STRING_M0
  STRING_Malayalam0
+  STRING_Mandaic0
  STRING_Mc0
  STRING_Me0
  STRING_Meetei_Mayek0
+  STRING_Meroitic_Cursive0
+  STRING_Meroitic_Hieroglyphs0
+  STRING_Miao0
  STRING_Mn0
  STRING_Mongolian0
  STRING_Myanmar0
@ -357,11 +398,13 @@ const char _pcre_utt_names[] =
  STRING_Samaritan0
  STRING_Saurashtra0
  STRING_Sc0
+  STRING_Sharada0
  STRING_Shavian0
  STRING_Sinhala0
  STRING_Sk0
  STRING_Sm0
  STRING_So0
+  STRING_Sora_Sompeng0
  STRING_Sundanese0
  STRING_Syloti_Nagri0
  STRING_Syriac0
@ -370,6 +413,7 @@ const char _pcre_utt_names[] =
  STRING_Tai_Le0
  STRING_Tai_Tham0
  STRING_Tai_Viet0
+  STRING_Takri0
  STRING_Tamil0
  STRING_Telugu0
  STRING_Thaana0
@ -388,146 +432,156 @@ const char _pcre_utt_names[] =
  STRING_Zp0
  STRING_Zs0;

-const ucp_type_table _pcre_utt[] = {
+const ucp_type_table PRIV(utt)[] = {
  {   0, PT_ANY, 0 },
  {   4, PT_SC, ucp_Arabic },
  {  11, PT_SC, ucp_Armenian },
  {  20, PT_SC, ucp_Avestan },
  {  28, PT_SC, ucp_Balinese },
  {  37, PT_SC, ucp_Bamum },
-  {  43, PT_SC, ucp_Bengali },
-  {  51, PT_SC, ucp_Bopomofo },
-  {  60, PT_SC, ucp_Braille },
-  {  68, PT_SC, ucp_Buginese },
-  {  77, PT_SC, ucp_Buhid },
-  {  83, PT_GC, ucp_C },
-  {  85, PT_SC, ucp_Canadian_Aboriginal },
-  { 105, PT_SC, ucp_Carian },
-  { 112, PT_PC, ucp_Cc },
-  { 115, PT_PC, ucp_Cf },
-  { 118, PT_SC, ucp_Cham },
-  { 123, PT_SC, ucp_Cherokee },
-  { 132, PT_PC, ucp_Cn },
-  { 135, PT_PC, ucp_Co },
-  { 138, PT_SC, ucp_Common },
-  { 145, PT_SC, ucp_Coptic },
-  { 152, PT_PC, ucp_Cs },
-  { 155, PT_SC, ucp_Cuneiform },
-  { 165, PT_SC, ucp_Cypriot },
-  { 173, PT_SC, ucp_Cyrillic },
-  { 182, PT_SC, ucp_Deseret },
-  { 190, PT_SC, ucp_Devanagari },
-  { 201, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 222, PT_SC, ucp_Ethiopic },
-  { 231, PT_SC, ucp_Georgian },
-  { 240, PT_SC, ucp_Glagolitic },
-  { 251, PT_SC, ucp_Gothic },
-  { 258, PT_SC, ucp_Greek },
-  { 264, PT_SC, ucp_Gujarati },
-  { 273, PT_SC, ucp_Gurmukhi },
-  { 282, PT_SC, ucp_Han },
-  { 286, PT_SC, ucp_Hangul },
-  { 293, PT_SC, ucp_Hanunoo },
-  { 301, PT_SC, ucp_Hebrew },
-  { 308, PT_SC, ucp_Hiragana },
-  { 317, PT_SC, ucp_Imperial_Aramaic },
-  { 334, PT_SC, ucp_Inherited },
-  { 344, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 366, PT_SC, ucp_Inscriptional_Parthian },
-  { 389, PT_SC, ucp_Javanese },
-  { 398, PT_SC, ucp_Kaithi },
-  { 405, PT_SC, ucp_Kannada },
-  { 413, PT_SC, ucp_Katakana },
-  { 422, PT_SC, ucp_Kayah_Li },
-  { 431, PT_SC, ucp_Kharoshthi },
-  { 442, PT_SC, ucp_Khmer },
-  { 448, PT_GC, ucp_L },
-  { 450, PT_LAMP, 0 },
-  { 453, PT_SC, ucp_Lao },
-  { 457, PT_SC, ucp_Latin },
-  { 463, PT_SC, ucp_Lepcha },
-  { 470, PT_SC, ucp_Limbu },
-  { 476, PT_SC, ucp_Linear_B },
-  { 485, PT_SC, ucp_Lisu },
-  { 490, PT_PC, ucp_Ll },
-  { 493, PT_PC, ucp_Lm },
-  { 496, PT_PC, ucp_Lo },
-  { 499, PT_PC, ucp_Lt },
-  { 502, PT_PC, ucp_Lu },
-  { 505, PT_SC, ucp_Lycian },
-  { 512, PT_SC, ucp_Lydian },
-  { 519, PT_GC, ucp_M },
-  { 521, PT_SC, ucp_Malayalam },
-  { 531, PT_PC, ucp_Mc },
-  { 534, PT_PC, ucp_Me },
-  { 537, PT_SC, ucp_Meetei_Mayek },
-  { 550, PT_PC, ucp_Mn },
-  { 553, PT_SC, ucp_Mongolian },
-  { 563, PT_SC, ucp_Myanmar },
-  { 571, PT_GC, ucp_N },
-  { 573, PT_PC, ucp_Nd },
-  { 576, PT_SC, ucp_New_Tai_Lue },
-  { 588, PT_SC, ucp_Nko },
-  { 592, PT_PC, ucp_Nl },
-  { 595, PT_PC, ucp_No },
-  { 598, PT_SC, ucp_Ogham },
-  { 604, PT_SC, ucp_Ol_Chiki },
-  { 613, PT_SC, ucp_Old_Italic },
-  { 624, PT_SC, ucp_Old_Persian },
-  { 636, PT_SC, ucp_Old_South_Arabian },
-  { 654, PT_SC, ucp_Old_Turkic },
-  { 665, PT_SC, ucp_Oriya },
-  { 671, PT_SC, ucp_Osmanya },
-  { 679, PT_GC, ucp_P },
-  { 681, PT_PC, ucp_Pc },
-  { 684, PT_PC, ucp_Pd },
-  { 687, PT_PC, ucp_Pe },
-  { 690, PT_PC, ucp_Pf },
-  { 693, PT_SC, ucp_Phags_Pa },
-  { 702, PT_SC, ucp_Phoenician },
-  { 713, PT_PC, ucp_Pi },
-  { 716, PT_PC, ucp_Po },
-  { 719, PT_PC, ucp_Ps },
-  { 722, PT_SC, ucp_Rejang },
-  { 729, PT_SC, ucp_Runic },
-  { 735, PT_GC, ucp_S },
-  { 737, PT_SC, ucp_Samaritan },
-  { 747, PT_SC, ucp_Saurashtra },
-  { 758, PT_PC, ucp_Sc },
-  { 761, PT_SC, ucp_Shavian },
-  { 769, PT_SC, ucp_Sinhala },
-  { 777, PT_PC, ucp_Sk },
-  { 780, PT_PC, ucp_Sm },
-  { 783, PT_PC, ucp_So },
-  { 786, PT_SC, ucp_Sundanese },
-  { 796, PT_SC, ucp_Syloti_Nagri },
-  { 809, PT_SC, ucp_Syriac },
-  { 816, PT_SC, ucp_Tagalog },
-  { 824, PT_SC, ucp_Tagbanwa },
-  { 833, PT_SC, ucp_Tai_Le },
-  { 840, PT_SC, ucp_Tai_Tham },
-  { 849, PT_SC, ucp_Tai_Viet },
-  { 858, PT_SC, ucp_Tamil },
-  { 864, PT_SC, ucp_Telugu },
-  { 871, PT_SC, ucp_Thaana },
-  { 878, PT_SC, ucp_Thai },
-  { 883, PT_SC, ucp_Tibetan },
-  { 891, PT_SC, ucp_Tifinagh },
-  { 900, PT_SC, ucp_Ugaritic },
-  { 909, PT_SC, ucp_Vai },
-  { 913, PT_ALNUM, 0 },
-  { 917, PT_PXSPACE, 0 },
-  { 921, PT_SPACE, 0 },
-  { 925, PT_WORD, 0 },
-  { 929, PT_SC, ucp_Yi },
-  { 932, PT_GC, ucp_Z },
-  { 934, PT_PC, ucp_Zl },
-  { 937, PT_PC, ucp_Zp },
-  { 940, PT_PC, ucp_Zs }
+  {  43, PT_SC, ucp_Batak },
+  {  49, PT_SC, ucp_Bengali },
+  {  57, PT_SC, ucp_Bopomofo },
+  {  66, PT_SC, ucp_Brahmi },
+  {  73, PT_SC, ucp_Braille },
+  {  81, PT_SC, ucp_Buginese },
+  {  90, PT_SC, ucp_Buhid },
+  {  96, PT_GC, ucp_C },
+  {  98, PT_SC, ucp_Canadian_Aboriginal },
+  { 118, PT_SC, ucp_Carian },
+  { 125, PT_PC, ucp_Cc },
+  { 128, PT_PC, ucp_Cf },
+  { 131, PT_SC, ucp_Chakma },
+  { 138, PT_SC, ucp_Cham },
+  { 143, PT_SC, ucp_Cherokee },
+  { 152, PT_PC, ucp_Cn },
+  { 155, PT_PC, ucp_Co },
+  { 158, PT_SC, ucp_Common },
+  { 165, PT_SC, ucp_Coptic },
+  { 172, PT_PC, ucp_Cs },
+  { 175, PT_SC, ucp_Cuneiform },
+  { 185, PT_SC, ucp_Cypriot },
+  { 193, PT_SC, ucp_Cyrillic },
+  { 202, PT_SC, ucp_Deseret },
+  { 210, PT_SC, ucp_Devanagari },
+  { 221, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 242, PT_SC, ucp_Ethiopic },
+  { 251, PT_SC, ucp_Georgian },
+  { 260, PT_SC, ucp_Glagolitic },
+  { 271, PT_SC, ucp_Gothic },
+  { 278, PT_SC, ucp_Greek },
+  { 284, PT_SC, ucp_Gujarati },
+  { 293, PT_SC, ucp_Gurmukhi },
+  { 302, PT_SC, ucp_Han },
+  { 306, PT_SC, ucp_Hangul },
+  { 313, PT_SC, ucp_Hanunoo },
+  { 321, PT_SC, ucp_Hebrew },
+  { 328, PT_SC, ucp_Hiragana },
+  { 337, PT_SC, ucp_Imperial_Aramaic },
+  { 354, PT_SC, ucp_Inherited },
+  { 364, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 386, PT_SC, ucp_Inscriptional_Parthian },
+  { 409, PT_SC, ucp_Javanese },
+  { 418, PT_SC, ucp_Kaithi },
+  { 425, PT_SC, ucp_Kannada },
+  { 433, PT_SC, ucp_Katakana },
+  { 442, PT_SC, ucp_Kayah_Li },
+  { 451, PT_SC, ucp_Kharoshthi },
+  { 462, PT_SC, ucp_Khmer },
+  { 468, PT_GC, ucp_L },
+  { 470, PT_LAMP, 0 },
+  { 473, PT_SC, ucp_Lao },
+  { 477, PT_SC, ucp_Latin },
+  { 483, PT_SC, ucp_Lepcha },
+  { 490, PT_SC, ucp_Limbu },
+  { 496, PT_SC, ucp_Linear_B },
+  { 505, PT_SC, ucp_Lisu },
+  { 510, PT_PC, ucp_Ll },
+  { 513, PT_PC, ucp_Lm },
+  { 516, PT_PC, ucp_Lo },
+  { 519, PT_PC, ucp_Lt },
+  { 522, PT_PC, ucp_Lu },
+  { 525, PT_SC, ucp_Lycian },
+  { 532, PT_SC, ucp_Lydian },
+  { 539, PT_GC, ucp_M },
+  { 541, PT_SC, ucp_Malayalam },
+  { 551, PT_SC, ucp_Mandaic },
+  { 559, PT_PC, ucp_Mc },
+  { 562, PT_PC, ucp_Me },
+  { 565, PT_SC, ucp_Meetei_Mayek },
+  { 578, PT_SC, ucp_Meroitic_Cursive },
+  { 595, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 616, PT_SC, ucp_Miao },
+  { 621, PT_PC, ucp_Mn },
+  { 624, PT_SC, ucp_Mongolian },
+  { 634, PT_SC, ucp_Myanmar },
+  { 642, PT_GC, ucp_N },
+  { 644, PT_PC, ucp_Nd },
+  { 647, PT_SC, ucp_New_Tai_Lue },
+  { 659, PT_SC, ucp_Nko },
+  { 663, PT_PC, ucp_Nl },
+  { 666, PT_PC, ucp_No },
+  { 669, PT_SC, ucp_Ogham },
+  { 675, PT_SC, ucp_Ol_Chiki },
+  { 684, PT_SC, ucp_Old_Italic },
+  { 695, PT_SC, ucp_Old_Persian },
+  { 707, PT_SC, ucp_Old_South_Arabian },
+  { 725, PT_SC, ucp_Old_Turkic },
+  { 736, PT_SC, ucp_Oriya },
+  { 742, PT_SC, ucp_Osmanya },
+  { 750, PT_GC, ucp_P },
+  { 752, PT_PC, ucp_Pc },
+  { 755, PT_PC, ucp_Pd },
+  { 758, PT_PC, ucp_Pe },
+  { 761, PT_PC, ucp_Pf },
+  { 764, PT_SC, ucp_Phags_Pa },
+  { 773, PT_SC, ucp_Phoenician },
+  { 784, PT_PC, ucp_Pi },
+  { 787, PT_PC, ucp_Po },
+  { 790, PT_PC, ucp_Ps },
+  { 793, PT_SC, ucp_Rejang },
+  { 800, PT_SC, ucp_Runic },
+  { 806, PT_GC, ucp_S },
+  { 808, PT_SC, ucp_Samaritan },
+  { 818, PT_SC, ucp_Saurashtra },
+  { 829, PT_PC, ucp_Sc },
+  { 832, PT_SC, ucp_Sharada },
+  { 840, PT_SC, ucp_Shavian },
+  { 848, PT_SC, ucp_Sinhala },
+  { 856, PT_PC, ucp_Sk },
+  { 859, PT_PC, ucp_Sm },
+  { 862, PT_PC, ucp_So },
+  { 865, PT_SC, ucp_Sora_Sompeng },
+  { 878, PT_SC, ucp_Sundanese },
+  { 888, PT_SC, ucp_Syloti_Nagri },
+  { 901, PT_SC, ucp_Syriac },
+  { 908, PT_SC, ucp_Tagalog },
+  { 916, PT_SC, ucp_Tagbanwa },
+  { 925, PT_SC, ucp_Tai_Le },
+  { 932, PT_SC, ucp_Tai_Tham },
+  { 941, PT_SC, ucp_Tai_Viet },
+  { 950, PT_SC, ucp_Takri },
+  { 956, PT_SC, ucp_Tamil },
+  { 962, PT_SC, ucp_Telugu },
+  { 969, PT_SC, ucp_Thaana },
+  { 976, PT_SC, ucp_Thai },
+  { 981, PT_SC, ucp_Tibetan },
+  { 989, PT_SC, ucp_Tifinagh },
+  { 998, PT_SC, ucp_Ugaritic },
+  { 1007, PT_SC, ucp_Vai },
+  { 1011, PT_ALNUM, 0 },
+  { 1015, PT_PXSPACE, 0 },
+  { 1019, PT_SPACE, 0 },
+  { 1023, PT_WORD, 0 },
+  { 1027, PT_SC, ucp_Yi },
+  { 1030, PT_GC, ucp_Z },
+  { 1032, PT_PC, ucp_Zl },
+  { 1035, PT_PC, ucp_Zp },
+  { 1038, PT_PC, ucp_Zs }
 };

-const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
+const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

-#endif  /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */

 /* End of pcre_tables.c */
--- a/ext/pcre/pcrelib/pcre_try_flipped.c
+++ b/ext/pcre/pcrelib/pcre_try_flipped.c
@ -1,137 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
-                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
-
-----------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the name of the University of Cambridge nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
-*/
-
-
-/* This module contains an internal function that tests a compiled pattern to
-see if it was compiled with the opposite endianness. If so, it uses an
-auxiliary local function to flip the appropriate bytes. */
-
-
-#include "config.h"
-
-#include "pcre_internal.h"
-
-
-/*************************************************
-*         Flip bytes in an integer               *
-*************************************************/
-
-/* This function is called when the magic number in a regex doesn't match, in
-order to flip its bytes to see if we are dealing with a pattern that was
-compiled on a host of different endianness. If so, this function is used to
-flip other byte values.
-
-Arguments:
-  value        the number to flip
-  n            the number of bytes to flip (assumed to be 2 or 4)
-
-Returns:       the flipped value
-*/
-
-static unsigned long int
-byteflip(unsigned long int value, int n)
-{
-if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
-return ((value & 0x000000ff) << 24) |
-       ((value & 0x0000ff00) <<  8) |
-       ((value & 0x00ff0000) >>  8) |
-       ((value & 0xff000000) >> 24);
-}
-
-
-
-/*************************************************
-*       Test for a byte-flipped compiled regex   *
-*************************************************/
-
-/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
-pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
-is, it was compiled on a system of opposite endianness. The function is called
-only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
-we flip all the relevant values into a different data block, and return it.
-
-Arguments:
-  re               points to the regex
-  study            points to study data, or NULL
-  internal_re      points to a new regex block
-  internal_study   points to a new study block
-
-Returns:           the new block if is is indeed a byte-flipped regex
-                   NULL if it is not
-*/
-
-real_pcre *
-_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
-  const pcre_study_data *study, pcre_study_data *internal_study)
-{
-if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
-  return NULL;
-
-*internal_re = *re;           /* To copy other fields */
-internal_re->size = byteflip(re->size, sizeof(re->size));
-internal_re->options = byteflip(re->options, sizeof(re->options));
-internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
-internal_re->top_bracket =
-  (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
-internal_re->top_backref =
-  (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
-internal_re->first_byte =
-  (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
-internal_re->req_byte =
-  (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
-internal_re->name_table_offset =
-  (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
-internal_re->name_entry_size =
-  (pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
-internal_re->name_count =
-  (pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
-
-if (study != NULL)
-  {
-  *internal_study = *study;   /* To copy other fields */
-  internal_study->size = byteflip(study->size, sizeof(study->size));
-  internal_study->flags = byteflip(study->flags, sizeof(study->flags));
-  internal_study->minlength = byteflip(study->minlength,
-    sizeof(study->minlength));
-  }
-
-return internal_re;
-}
-
-/* End of pcre_tryflipped.c */
--- a/ext/pcre/pcrelib/pcre_ucd.c
+++ b/ext/pcre/pcrelib/pcre_ucd.c
--- a/ext/pcre/pcrelib/pcre_valid_utf8.c
+++ b/ext/pcre/pcrelib/pcre_valid_utf8.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -52,127 +52,246 @@ strings. */
 *************************************************/

 /* This function is called (optionally) at the start of compile or match, to
-validate that a supposed UTF-8 string is actually valid. The early check means
+check that a supposed UTF-8 string is actually valid. The early check means
 that subsequent code can assume it is dealing with a valid string. The check
-can be turned off for maximum performance, but the consequences of supplying
-an invalid string are then undefined.
+can be turned off for maximum performance, but the consequences of supplying an
+invalid string are then undefined.

 Originally, this function checked according to RFC 2279, allowing for values in
 the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
 the canonical format. Once somebody had pointed out RFC 3629 to me (it
 obsoletes 2279), additional restrictions were applied. The values are now
 limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
-subrange 0xd000 to 0xdfff is excluded.
+subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
+characters is still checked.
+
+From release 8.13 more information about the details of the error are passed
+back in the returned value:
+
+PCRE_UTF8_ERR0   No error
+PCRE_UTF8_ERR1   Missing 1 byte at the end of the string
+PCRE_UTF8_ERR2   Missing 2 bytes at the end of the string
+PCRE_UTF8_ERR3   Missing 3 bytes at the end of the string
+PCRE_UTF8_ERR4   Missing 4 bytes at the end of the string
+PCRE_UTF8_ERR5   Missing 5 bytes at the end of the string
+PCRE_UTF8_ERR6   2nd-byte's two top bits are not 0x80
+PCRE_UTF8_ERR7   3rd-byte's two top bits are not 0x80
+PCRE_UTF8_ERR8   4th-byte's two top bits are not 0x80
+PCRE_UTF8_ERR9   5th-byte's two top bits are not 0x80
+PCRE_UTF8_ERR10  6th-byte's two top bits are not 0x80
+PCRE_UTF8_ERR11  5-byte character is not permitted by RFC 3629
+PCRE_UTF8_ERR12  6-byte character is not permitted by RFC 3629
+PCRE_UTF8_ERR13  4-byte character with value > 0x10ffff is not permitted
+PCRE_UTF8_ERR14  3-byte character with value 0xd000-0xdfff is not permitted
+PCRE_UTF8_ERR15  Overlong 2-byte sequence
+PCRE_UTF8_ERR16  Overlong 3-byte sequence
+PCRE_UTF8_ERR17  Overlong 4-byte sequence
+PCRE_UTF8_ERR18  Overlong 5-byte sequence (won't ever occur)
+PCRE_UTF8_ERR19  Overlong 6-byte sequence (won't ever occur)
+PCRE_UTF8_ERR20  Isolated 0x80 byte (not within UTF-8 character)
+PCRE_UTF8_ERR21  Byte with the illegal value 0xfe or 0xff

 Arguments:
  string       points to the string
  length       length of string, or -1 if the string is zero-terminated
+  errp         pointer to an error position offset variable

-Returns:       < 0    if the string is a valid UTF-8 string
-               >= 0   otherwise; the value is the offset of the bad byte
-
-Bad bytes can be:
-
-  . An isolated byte whose most significant bits are 0x80, because this
-    can only correctly appear within a UTF-8 character;
-
-  . A byte whose most significant bits are 0xc0, but whose other bits indicate
-    that there are more than 3 additional bytes (i.e. an RFC 2279 starting
-    byte, which is no longer valid under RFC 3629);
-
-  .
-
-The returned offset may also be equal to the length of the string; this means
-that one or more bytes is missing from the final UTF-8 character.
+Returns:       = 0    if the string is a valid UTF-8 string
+               > 0    otherwise, setting the offset of the bad character
 */

 int
-_pcre_valid_utf8(USPTR string, int length)
+PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF8
-register USPTR p;
+#ifdef SUPPORT_UTF
+register PCRE_PUCHAR p;

 if (length < 0)
  {
  for (p = string; *p != 0; p++);
-  length = p - string;
+  length = (int)(p - string);
  }

 for (p = string; length-- > 0; p++)
  {
-  register int ab;
-  register int c = *p;
-  if (c < 128) continue;
-  if (c < 0xc0) return p - string;
-  ab = _pcre_utf8_table4[c & 0x3f];     /* Number of additional bytes */
-  if (ab > 3) return p - string;        /* Too many for RFC 3629 */
-  if (length < ab) return p + 1 + length - string;   /* Missing bytes */
-  length -= ab;
+  register int ab, c, d;
+
+  c = *p;
+  if (c < 128) continue;                /* ASCII character */
+
+  if (c < 0xc0)                         /* Isolated 10xx xxxx byte */
+    {
+    *erroroffset = (int)(p - string);
+    return PCRE_UTF8_ERR20;
+    }
+
+  if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */
+    {
+    *erroroffset = (int)(p - string);
+    return PCRE_UTF8_ERR21;
+    }
+
+  ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes */
+  if (length < ab)
+    {
+    *erroroffset = (int)(p - string);          /* Missing bytes */
+    return ab - length;                 /* Codes ERR1 to ERR5 */
+    }
+  length -= ab;                         /* Length remaining */

  /* Check top bits in the second byte */
-  if ((*(++p) & 0xc0) != 0x80) return p - string;

-  /* Check for overlong sequences for each different length, and for the
-  excluded range 0xd000 to 0xdfff.  */
+  if (((d = *(++p)) & 0xc0) != 0x80)
+    {
+    *erroroffset = (int)(p - string) - 1;
+    return PCRE_UTF8_ERR6;
+    }
+
+  /* For each length, check that the remaining bytes start with the 0x80 bit
+  set and not the 0x40 bit. Then check for an overlong sequence, and for the
+  excluded range 0xd800 to 0xdfff. */

  switch (ab)
    {
-    /* Check for xx00 000x (overlong sequence) */
+    /* 2-byte character. No further bytes to check for 0x80. Check first byte
+    for for xx00 000x (overlong sequence). */

-    case 1:
-    if ((c & 0x3e) == 0) return p - string;
-    continue;   /* We know there aren't any more bytes to check */
+    case 1: if ((c & 0x3e) == 0)
+      {
+      *erroroffset = (int)(p - string) - 1;
+      return PCRE_UTF8_ERR15;
+      }
+    break;

-    /* Check for 1110 0000, xx0x xxxx (overlong sequence) or
-                 1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
+    /* 3-byte character. Check third byte for 0x80. Then check first 2 bytes
+      for 1110 0000, xx0x xxxx (overlong sequence) or
+          1110 1101, 1010 xxxx (0xd800 - 0xdfff) */

    case 2:
-    if ((c == 0xe0 && (*p & 0x20) == 0) ||
-        (c == 0xed && *p >= 0xa0))
-      return p - string;
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR7;
+      }
+    if (c == 0xe0 && (d & 0x20) == 0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR16;
+      }
+    if (c == 0xed && d >= 0xa0)
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR14;
+      }
    break;

-    /* Check for 1111 0000, xx00 xxxx (overlong sequence) or
-       greater than 0x0010ffff (f4 8f bf bf) */
+    /* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
+       bytes for for 1111 0000, xx00 xxxx (overlong sequence), then check for a
+       character greater than 0x0010ffff (f4 8f bf bf) */

    case 3:
-    if ((c == 0xf0 && (*p & 0x30) == 0) ||
-        (c > 0xf4 ) ||
-        (c == 0xf4 && *p > 0x8f))
-      return p - string;
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE_UTF8_ERR8;
+      }
+    if (c == 0xf0 && (d & 0x30) == 0)
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE_UTF8_ERR17;
+      }
+    if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE_UTF8_ERR13;
+      }
    break;

-#if 0
-    /* These cases can no longer occur, as we restrict to a maximum of four
-    bytes nowadays. Leave the code here in case we ever want to add an option
-    for longer sequences. */
+    /* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
+    rejected by the length test below. However, we do the appropriate tests
+    here so that overlong sequences get diagnosed, and also in case there is
+    ever an option for handling these larger code points. */
+
+    /* 5-byte character. Check 3rd, 4th, and 5th bytes for 0x80. Then check for
+    1111 1000, xx00 0xxx */

-    /* Check for 1111 1000, xx00 0xxx */
    case 4:
-    if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE_UTF8_ERR9;
+      }
+    if (c == 0xf8 && (d & 0x38) == 0)
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE_UTF8_ERR18;
+      }
    break;

-    /* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
+    /* 6-byte character. Check 3rd-6th bytes for 0x80. Then check for
+    1111 1100, xx00 00xx. */
+
    case 5:
-    if (c == 0xfe || c == 0xff ||
-       (c == 0xfc && (*p & 0x3c) == 0)) return p - string;
+    if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
+      {
+      *erroroffset = (int)(p - string) - 2;
+      return PCRE_UTF8_ERR7;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
+      {
+      *erroroffset = (int)(p - string) - 3;
+      return PCRE_UTF8_ERR8;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
+      {
+      *erroroffset = (int)(p - string) - 4;
+      return PCRE_UTF8_ERR9;
+      }
+    if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE_UTF8_ERR10;
+      }
+    if (c == 0xfc && (d & 0x3c) == 0)
+      {
+      *erroroffset = (int)(p - string) - 5;
+      return PCRE_UTF8_ERR19;
+      }
    break;
-#endif
-
    }

-  /* Check for valid bytes after the 2nd, if any; all must start 10 */
-  while (--ab > 0)
+  /* Character is valid under RFC 2279, but 4-byte and 5-byte characters are
+  excluded by RFC 3629. The pointer p is currently at the last byte of the
+  character. */
+
+  if (ab > 3)
    {
-    if ((*(++p) & 0xc0) != 0x80) return p - string;
+    *erroroffset = (int)(p - string) - ab;
+    return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
    }
  }
-#else
+
+#else  /* SUPPORT_UTF */
 (void)(string);  /* Keep picky compilers happy */
 (void)(length);
 #endif

-return -1;
+return PCRE_UTF8_ERR0;   /* This indicates success */
 }

 /* End of pcre_valid_utf8.c */
--- a/ext/pcre/pcrelib/pcre_version.c
+++ b/ext/pcre/pcrelib/pcre_version.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -77,8 +77,13 @@ I could find no way of detecting that a macro is defined as an empty string at
 pre-processor time. This hack uses a standard trick for avoiding calling
 the STRING macro with an empty argument when doing the test. */

+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
 pcre_version(void)
+#else
+PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
+pcre16_version(void)
+#endif
 {
 return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
  XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
--- a/ext/pcre/pcrelib/pcre_xclass.c
+++ b/ext/pcre/pcrelib/pcre_xclass.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -62,39 +62,63 @@ Returns:      TRUE if character matches, else FALSE
 */

 BOOL
-_pcre_xclass(int c, const uschar *data)
+PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
 {
 int t;
 BOOL negated = (*data & XCL_NOT) != 0;

+(void)utf;
+#ifdef COMPILE_PCRE8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
 /* Character values < 256 are matched against a bitmap, if one is present. If
 not, we still carry on, because there may be ranges that start below 256 in the
 additional data. */

 if (c < 256)
  {
-  if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
-    return !negated;   /* char found */
+  if ((*data & XCL_MAP) != 0 &&
+    (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
+    return !negated; /* char found */
  }

 /* First skip the bit map if present. Then match against the list of Unicode
 properties or large chars or ranges that end with a large char. We won't ever
 encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */

-if ((*data++ & XCL_MAP) != 0) data += 32;
+if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);

 while ((t = *data++) != XCL_END)
  {
  int x, y;
  if (t == XCL_SINGLE)
    {
-    GETCHARINC(x, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      x = *data++;
    if (c == x) return !negated;
    }
  else if (t == XCL_RANGE)
    {
-    GETCHARINC(x, data);
-    GETCHARINC(y, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      {
+      x = *data++;
+      y = *data++;
+      }
    if (c >= x && c <= y) return !negated;
    }

@ -115,7 +139,7 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_GC:
-      if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
+      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
        return !negated;
      break;

@ -128,28 +152,28 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_ALNUM:
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-           _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
        return !negated;
      break;

      case PT_SPACE:    /* Perl space */
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
             == (t == XCL_PROP))
        return !negated;
      break;

      case PT_PXSPACE:  /* POSIX space */
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
           c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
        return !negated;
      break;

      case PT_WORD:
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-           _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
             == (t == XCL_PROP))
        return !negated;
      break;
--- a/ext/pcre/pcrelib/pcredemo.c
+++ b/ext/pcre/pcrelib/pcredemo.c
@ -248,7 +248,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
 * more than one byte.                                                    *
 *                                                                        *
 * However, there is a complication concerned with newlines. When the     *
-* newline convention is such that CRLF is a valid newline, we want must  *
+* newline convention is such that CRLF is a valid newline, we must       *
 * advance by two characters rather than one. The newline convention can  *
 * be set in the regex by (*CR), etc.; if not, we must find the default.  *
 *************************************************************************/
--- a/ext/pcre/pcrelib/pcreposix.c
+++ b/ext/pcre/pcrelib/pcreposix.c
@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@ -150,6 +150,16 @@ static const int eint[] = {
  REG_BADPAT,  /* (*MARK) must have an argument */
  REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UCP support */
  REG_BADPAT,  /* \c must be followed by an ASCII character */
+  REG_BADPAT,  /* \k is not followed by a braced, angle-bracketed, or quoted name */
+  /* 70 */
+  REG_BADPAT,  /* internal error: unknown opcode in find_fixedlength() */
+  REG_BADPAT,  /* \N is not supported in a class */
+  REG_BADPAT,  /* too many forward references */
+  REG_BADPAT,  /* disallowed UTF-8/16 code point (>= 0xd800 && <= 0xdfff) */
+  REG_BADPAT,  /* invalid UTF-16 string (should not occur) */
+  /* 75 */
+  REG_BADPAT,  /* overlong MARK name */
+  REG_BADPAT   /* character value in \u.... sequence is too large */
 };

 /* Table of texts corresponding to POSIX error codes */
@ -220,7 +230,7 @@ return length + addlength;
 PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
 regfree(regex_t *preg)
 {
-(pcre_free)(preg->re_pcre);
+(PUBL(free))(preg->re_pcre);
 }


@ -265,11 +275,12 @@ should not happen, but we all make mistakes), return REG_BADPAT. */

 if (preg->re_pcre == NULL)
  {
-  return (errorcode < sizeof(eint)/sizeof(const int))?
+  return (errorcode < (int)(sizeof(eint)/sizeof(const int)))?
    eint[errorcode] : REG_BADPAT;
  }

-preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
+(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
+  &(preg->re_nsub));
 return 0;
 }

@ -395,6 +406,7 @@ switch(rc)
  case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
  case PCRE_ERROR_BADUTF8: return REG_INVARG;
  case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+  case PCRE_ERROR_BADMODE: return REG_INVARG;
  default: return REG_ASSERT;
  }
 }
--- a/ext/pcre/pcrelib/pcreposix.h
+++ b/ext/pcre/pcrelib/pcreposix.h
@ -9,7 +9,7 @@
 Compatible Regular Expression library. It defines the things POSIX says should
 be there. I hope.

-            Copyright (c) 1997-2009 University of Cambridge
+            Copyright (c) 1997-2012 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/testdata/grepinput
+++ b/ext/pcre/pcrelib/testdata/grepinput
@ -602,6 +602,8 @@ ABOVE theatre
 AB.VE
 AB.VE the turtle

+010203040506
+
 PUT NEW DATA ABOVE THIS LINE.
 =============================

--- a/ext/pcre/pcrelib/testdata/grepinput8
+++ b/ext/pcre/pcrelib/testdata/grepinput8
@ -1,6 +1,5 @@
 X one
-X twoX threeX four
-X five
+X twoX threeX four
X five
 X six
 X sevenX eight X nine X ten

--- a/ext/pcre/pcrelib/testdata/grepoutput
+++ b/ext/pcre/pcrelib/testdata/grepoutput
@ -10,7 +10,7 @@ RC=0
 7:PATTERN at the start of a line.
 8:In the middle of a line, PATTERN appears.
 10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
 RC=0
 ---------------------------- Test 4 ------------------------------
 4
@ -19,7 +19,7 @@ RC=0
 ./testdata/grepinput:7:PATTERN at the start of a line.
 ./testdata/grepinput:8:In the middle of a line, PATTERN appears.
 ./testdata/grepinput:10:This pattern is in lower case.
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput:610:Check up on PATTERN near the end.
 ./testdata/grepinputx:3:Here is the pattern again.
 ./testdata/grepinputx:5:Pattern
 ./testdata/grepinputx:42:This line contains pattern not on a line by itself.
@ -28,7 +28,7 @@ RC=0
 7:PATTERN at the start of a line.
 8:In the middle of a line, PATTERN appears.
 10:This pattern is in lower case.
-608:Check up on PATTERN near the end.
+610:Check up on PATTERN near the end.
 3:Here is the pattern again.
 5:Pattern
 42:This line contains pattern not on a line by itself.
@ -323,10 +323,10 @@ RC=0
 ./testdata/grepinput-9-
 ./testdata/grepinput:10:This pattern is in lower case.
 --
-./testdata/grepinput-605-PUT NEW DATA ABOVE THIS LINE.
-./testdata/grepinput-606-=============================
-./testdata/grepinput-607-
-./testdata/grepinput:608:Check up on PATTERN near the end.
+./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE.
+./testdata/grepinput-608-=============================
+./testdata/grepinput-609-
+./testdata/grepinput:610:Check up on PATTERN near the end.
 --
 ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
 ./testdata/grepinputx-2-
@ -348,8 +348,8 @@ RC=0
 ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
 ./testdata/grepinput-13-
 --
-./testdata/grepinput:608:Check up on PATTERN near the end.
-./testdata/grepinput-609-This is the last line of this file.
+./testdata/grepinput:610:Check up on PATTERN near the end.
+./testdata/grepinput-611-This is the last line of this file.
 --
 ./testdata/grepinputx:3:Here is the pattern again.
 ./testdata/grepinputx-4-
@ -380,6 +380,7 @@ RC=0
 ---------------------------- Test 37 -----------------------------
 aaaaa0
 aaaaa2
+010203040506
 RC=0
 ======== STDERR ========
 pcregrep: pcre_exec() gave error -8 while matching this text:
@ -390,7 +391,7 @@ pcregrep: pcre_exec() gave error -8 while matching this text:

 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
 pcregrep: Check your regex for nested unlimited loops.
 ---------------------------- Test 38 ------------------------------
 This line contains a binary zero here >< for testing.
@ -514,7 +515,7 @@ This is a file of miscellaneous text that is used as test data for checking
 that the pcregrep command is working correctly. The file must be more than 24K
 long so that it needs more than a single read

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
 pcregrep: Check your regex for nested unlimited loops.
 RC=1
 ---------------------------- Test 63 -----------------------------
@ -524,7 +525,7 @@ This is a file of miscellaneous text that is used as test data for checking
 that the pcregrep command is working correctly. The file must be more than 24K
 long so that it needs more than a single read

-pcregrep: Error -8 or -21 means that a resource limit was exceeded.
+pcregrep: Error -8, -21 or -27 means that a resource limit was exceeded.
 pcregrep: Check your regex for nested unlimited loops.
 RC=1
 ---------------------------- Test 64 ------------------------------
@ -593,3 +594,77 @@ RC=0
 [00m[1;31mtriple:	t6_txt	s2_tag	s_txt	p_tag	p_txt	o_tag	o_txt

 [00mRC=0
+---------------------------- Test 71 -----------------------------
+01
+RC=0
+---------------------------- Test 72 -----------------------------
+[1;31m01[00m0203040506
+RC=0
+---------------------------- Test 73 -----------------------------
+[1;31m01[00m
+RC=0
+---------------------------- Test 74 -----------------------------
+01
+02
+RC=0
+---------------------------- Test 75 -----------------------------
+[1;31m01[00m[1;31m02[00m03040506
+RC=0
+---------------------------- Test 76 -----------------------------
+[1;31m01[00m
+[1;31m02[00m
+RC=0
+---------------------------- Test 77 -----------------------------
+01
+03
+RC=0
+---------------------------- Test 78 -----------------------------
+[1;31m01[00m02[1;31m03[00m040506
+RC=0
+---------------------------- Test 79 -----------------------------
+[1;31m01[00m
+[1;31m03[00m
+RC=0
+---------------------------- Test 80 -----------------------------
+01
+RC=0
+---------------------------- Test 81 -----------------------------
+[1;31m01[00m0203040506
+RC=0
+---------------------------- Test 82 -----------------------------
+[1;31m01[00m
+RC=0
+---------------------------- Test 83 -----------------------------
+pcregrep: line 4 of file ./testdata/grepinput3 is too long for the internal buffer
+pcregrep: check the --buffer-size option
+RC=2
+---------------------------- Test 84 -----------------------------
+testdata/grepinputv:fox jumps
+testdata/grepinputx:complete pair
+testdata/grepinputx:That was a complete pair
+testdata/grepinputx:complete pair
+RC=0
+---------------------------- Test 85 -----------------------------
+./testdata/grepinput3:Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+RC=0
+---------------------------- Test 86 -----------------------------
+Binary file ./testdata/grepbinary matches
+RC=0
+---------------------------- Test 87 -----------------------------
+RC=1
+---------------------------- Test 88 -----------------------------
+Binary file ./testdata/grepbinary matches
+RC=0
+---------------------------- Test 89 -----------------------------
+RC=1
+---------------------------- Test 90 -----------------------------
+RC=1
+---------------------------- Test 91 -----------------------------
+The quick brown fx jumps over the lazy dog.
+RC=0
+---------------------------- Test 92 -----------------------------
+The quick brown fx jumps over the lazy dog.
+RC=0
+---------------------------- Test 93 -----------------------------
+The quick brown fx jumps over the lazy dog.
+RC=0
--- a/ext/pcre/pcrelib/testdata/grepoutput8
+++ b/ext/pcre/pcrelib/testdata/grepoutput8
@ -1,11 +1,12 @@
 ---------------------------- Test U1 ------------------------------
 1:X one
-2:X two3:X three4:X four
-5:X five
+2:X two3:X three4:X four
5:X five
 6:X six
 7:X seven8:X eight 9:X nine 10:X ten
+RC=0
 ---------------------------- Test U2 ------------------------------
 12-Before 111
 13-Before 222 14-Before 33315:Match
 16-After 111
 17-After 222 18-After 333
+RC=0
--- a/ext/pcre/pcrelib/testdata/grepoutputN
+++ b/ext/pcre/pcrelib/testdata/grepoutputN
@ -1,22 +1,16 @@
 ---------------------------- Test N1 ------------------------------
-1:abc
-2:def
---------------------------- Test N2 ------------------------------
-1:abc
-def
+1:abc
2:def
---------------------------- Test N2 ------------------------------
+1:abc
def
 2:ghi
 jkl---------------------------- Test N3 ------------------------------
-2:def
-3:
+2:def
3:
 ghi
 jkl---------------------------- Test N4 ------------------------------
 2:ghi
 jkl---------------------------- Test N5 ------------------------------
-1:abc
-2:def
+1:abc
2:def
 3:ghi
 4:jkl---------------------------- Test N6 ------------------------------
-1:abc
-2:def
+1:abc
2:def
 3:ghi
 4:jkl
--- a/ext/pcre/pcrelib/testdata/testinput1
+++ b/ext/pcre/pcrelib/testdata/testinput1
--- a/ext/pcre/pcrelib/testdata/testinput10
+++ b/ext/pcre/pcrelib/testdata/testinput10
--- a/ext/pcre/pcrelib/testdata/testinput2
+++ b/ext/pcre/pcrelib/testdata/testinput2
--- a/ext/pcre/pcrelib/testdata/testinput4
+++ b/ext/pcre/pcrelib/testdata/testinput4
@ -1,5 +1,6 @@
-/-- This set of tests if for UTF-8 support, excluding Unicode properties. It is
-    compatible with all versions of Perl 5. --/
+/-- This set of tests is for UTF support, excluding Unicode properties. It is
+    compatible with all versions of Perl >= 5.10 and both the 8-bit and 16-bit
+    PCRE libraries. --/
   
 /a.b/8
    acb
@ -126,31 +127,6 @@
    *** Failers
    XYZ 

-/X(\C{3})/8
-    X\x{1234}
-
-/X(\C{4})/8
-    X\x{1234}YZ
-    
-/X\C*/8
-    XYZabcdce
-    
-/X\C*?/8
-    XYZabcde
-    
-/X\C{3,5}/8
-    Xabcdefg   
-    X\x{1234} 
-    X\x{1234}YZ
-    X\x{1234}\x{512}  
-    X\x{1234}\x{512}YZ
-
-/X\C{3,5}?/8
-    Xabcdefg   
-    X\x{1234} 
-    X\x{1234}YZ
-    X\x{1234}\x{512}  
-
 /[^a]+/8g
    bcd
    \x{100}aY\x{256}Z 
@ -456,17 +432,6 @@
    \x{150}X
    \x{200}X   

-/a\Cb/
-    aXb
-    a\nb
-  
-/a\Cb/8
-    aXb
-    a\nb
-    
-/a\C\Cb/8 
-    a\x{100}b 
-
 /[z-\x{100}]/8i
    z
    Z 
@ -644,4 +609,16 @@
 /A*/g8
    AAB\x{123}BAA

+/(abc)\1/8i
+   abc
+
+/(abc)\1/8
+   abc
+
+/a(*:a\x{1234}b)/8K
+    abc
+
+/a(*:a£b)/8K 
+    abc
+
 /-- End of testinput4 --/
--- a/ext/pcre/pcrelib/testdata/testinput5
+++ b/ext/pcre/pcrelib/testdata/testinput5
@ -1,72 +1,36 @@
-/-- This set of tests checks the API, internals, and non-Perl stuff for UTF-8
-    support, excluding Unicode properties. --/
+/-- This set of tests checks the API, internals, and non-Perl stuff for UTF
+    support, excluding Unicode properties. However, tests that give different
+    results in 8-bit and 16-bit modes are excluded (see tests 16 and 17). --/

-/\x{100}/8DZ
-
-/\x{1000}/8DZ
-
-/\x{10000}/8DZ
-
-/\x{100000}/8DZ
-
-/\x{1000000}/8DZ
-
-/\x{4000000}/8DZ
-
-/\x{7fffFFFF}/8DZ
-
-/[\x{ff}]/8DZ
-
-/[\x{100}]/8DZ
+/\x{110000}/8DZ

 /\x{ffffffff}/8

 /\x{100000000}/8

+/\x{d800}/8
+
+/\x{dfff}/8
+
+/\x{d7ff}/8
+
+/\x{e000}/8
+
 /^\x{100}a\x{1234}/8
    \x{100}a\x{1234}bcd

-/\x80/8DZ
-
-/\xff/8DZ
-
 /\x{0041}\x{2262}\x{0391}\x{002e}/DZ8
    \x{0041}\x{2262}\x{0391}\x{002e}
    
-/\x{D55c}\x{ad6d}\x{C5B4}/DZ8 
-    \x{D55c}\x{ad6d}\x{C5B4} 
-
-/\x{65e5}\x{672c}\x{8a9e}/DZ8
-    \x{65e5}\x{672c}\x{8a9e}
-
-/\x{80}/DZ8
-
-/\x{084}/DZ8
-
-/\x{104}/DZ8
-
-/\x{861}/DZ8
-
-/\x{212ab}/DZ8
-
 /.{3,5}X/DZ8
    \x{212ab}\x{212ab}\x{212ab}\x{861}X

-
 /.{3,5}?/DZ8
    \x{212ab}\x{212ab}\x{212ab}\x{861}

 /(?<=\C)X/8
    Should produce an error diagnostic
    
-/-- This one is here not because it's different to Perl, but because the way
-the captured single-byte is displayed. (In Perl it becomes a character, and you
-can't tell the difference.) --/
-    
-/X(\C)(.*)/8
-    X\x{1234}
-    X\nabc 
-    
 /^[ab]/8DZ
    bar
    *** Failers
@ -81,26 +45,6 @@ can't tell the difference.) --/
    *** Failers 
    aaa
  
-/[^ab\xC0-\xF0]/8SDZ
-    \x{f1}
-    \x{bf}
-    \x{100}
-    \x{1000}   
-    *** Failers
-    \x{c0} 
-    \x{f0} 
-
-/Ä€{3,4}/8SDZ
-  \x{100}\x{100}\x{100}\x{100\x{100}
-
-/(\x{100}+|x)/8SDZ
-
-/(\x{100}*a|x)/8SDZ
-
-/(\x{100}{0,2}a|x)/8SDZ
-
-/(\x{100}{1,2}a|x)/8SDZ
-
 /\x{100}*(\d+|"(?1)")/8
    1234
    "1234" 
@ -111,33 +55,17 @@ can't tell the difference.) --/
    *** Failers 
    \x{100}\x{100}abcd

-/\x{100}/8DZ
-
 /\x{100}*/8DZ

 /a\x{100}*/8DZ

 /ab\x{100}*/8DZ

-/a\x{100}\x{101}*/8DZ
-
-/a\x{100}\x{101}+/8DZ
-
 /\x{100}*A/8DZ
    A

 /\x{100}*\d(?R)/8DZ

-/[^\x{c4}]/DZ
-
-/[^\x{c4}]/8DZ
-
-/[\x{100}]/8DZ
-    \x{100}
-    Z\x{100}
-    \x{100}Z
-    *** Failers 
-
 /[Z\x{100}]/8DZ
    Z\x{100}
    \x{100}
@ -162,13 +90,8 @@ can't tell the difference.) --/
 /[\xFF]/DZ
    >\xff<

-/[\xff]/DZ8
-    >\x{ff}<
-
 /[^\xFF]/DZ

-/[^\xff]/8DZ
-
 /[Ä-Ü]/8
    Ö # Matches without Study
    \x{d6}
@ -185,45 +108,6 @@ can't tell the difference.) --/
    Ö <-- Same with Study
    \x{d6} 

-/[Ã]/8
-
-/Ã/8
-
-/ÃÃÃxxx/8
-
-/ÃÃÃxxx/8?DZ
-
-/abc/8
-    Ã]
-    Ã
-    ÃÃÃ
-    ÃÃÃ\?
-
-/anything/8
-    \xc0\x80
-    \xc1\x8f 
-    \xe0\x9f\x80
-    \xf0\x8f\x80\x80 
-    \xf8\x87\x80\x80\x80  
-    \xfc\x83\x80\x80\x80\x80
-    \xfe\x80\x80\x80\x80\x80  
-    \xff\x80\x80\x80\x80\x80  
-    \xc3\x8f
-    \xe0\xaf\x80
-    \xe1\x80\x80
-    \xf0\x9f\x80\x80 
-    \xf1\x8f\x80\x80 
-    \xf8\x88\x80\x80\x80  
-    \xf9\x87\x80\x80\x80  
-    \xfc\x84\x80\x80\x80\x80
-    \xfd\x83\x80\x80\x80\x80
-    \?\xf8\x88\x80\x80\x80  
-    \?\xf9\x87\x80\x80\x80  
-    \?\xfc\x84\x80\x80\x80\x80
-    \?\xfd\x83\x80\x80\x80\x80
-
-/\x{100}abc(xyz(?1))/8DZ
-
 /[^\x{100}]abc(xyz(?1))/8DZ

 /[ab\x{100}]abc(xyz(?1))/8DZ
@ -243,17 +127,8 @@ can't tell the difference.) --/
 /\w/8
    \x{100}X   

-/a\x{1234}b/P8
-    a\x{1234}b
-
 /^\ሴ/8DZ

-/\777/I
-
-/\777/8I
-  \x{1ff}
-  \777 
-  
 /\x{100}*\d/8DZ

 /\x{100}*\s/8DZ
@ -266,12 +141,6 @@ can't tell the difference.) --/

 /\x{100}*\W/8DZ

-/\x{100}+\x{200}/8DZ
-
-/\x{100}+X/8DZ
-
-/X+\x{200}/8DZ
-
 /()()()()()()()()()()
 ()()()()()()()()()()
 ()()()()()()()()()()
@ -283,8 +152,6 @@ can't tell the difference.) --/

 /^[\QĀ\E-\QŐ\E]/BZ8

-/^[\QÄ€\E-\QÅ<51>\E/BZ8
-
 /^abc./mgx8<any>
    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK

@ -379,23 +246,6 @@ can't tell the difference.) --/
 /.*$/8<any>
    \x{1ec5} 
    
-/-- This tests the stricter UTF-8 check according to RFC 3629. --/ 
-    
-/X/8
-    \x{0}\x{d7ff}\x{e000}\x{10ffff}
-    \x{d800}
-    \x{d800}\?
-    \x{da00}
-    \x{da00}\?
-    \x{dfff}
-    \x{dfff}\?
-    \x{110000}    
-    \x{110000}\?    
-    \x{2000000} 
-    \x{2000000}\? 
-    \x{7fffffff} 
-    \x{7fffffff}\? 
-
 /a\Rb/I8<bsr_anycrlf>
    a\rb
    a\nb
@ -454,16 +304,10 @@ can't tell the difference.) --/

 /(\x{de})\1/
    \x{de}\x{de}
-    \x{123} 

 /X/8f<any> 
    A\x{1ec5}ABCXYZ

-/(*UTF8)\x{1234}/
-  abcd\x{1234}pqr
-
-/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I
-
 /Xa{2,4}b/8
    X\P
    Xa\P
@ -745,53 +589,184 @@ can't tell the difference.) --/
 /X\W{3}X/8
    \PX

-/\h/SI
-
-/\h/SI8
-    ABC\x{09}
-    ABC\x{20}
-    ABC\x{a0}
-    ABC\x{1680}
-    ABC\x{180e}
-    ABC\x{2000}
-    ABC\x{202f} 
-    ABC\x{205f} 
-    ABC\x{3000} 
-
-/\v/SI
-
-/\v/SI8
-    ABC\x{0a}
-    ABC\x{0b}
-    ABC\x{0c}
-    ABC\x{0d}
-    ABC\x{85}
-    ABC\x{2028}
-
-/\R/SI
-
-/\R/SI8
-
-/\h*A/SI8
-    CDBABC
-    
-/\v+A/SI8
-
-/\s?xxx\s/8SI
-
 /\sxxx\s/8T1
    AB\x{85}xxx\x{a0}XYZ
    AB\x{a0}xxx\x{85}XYZ

-/\sxxx\s/I8ST1
-    AB\x{85}xxx\x{a0}XYZ
-    AB\x{a0}xxx\x{85}XYZ
-
 /\S \S/8T1
    \x{a2} \x{84} 

-/\S \S/I8ST1
-    \x{a2} \x{84} 
-    A Z 
+'A#хц'8x<any>BZ
+
+'A#хц
+  PQ'8x<any>BZ
+  
+/a+#хaa
+  z#XX?/8x<any>BZ 
+
+/a+#хaa
+  z#х?/8x<any>BZ 
+
+/\g{A}xxx#bXX(?'A'123)
(?'A'456)/8x<any>BZ
+
+/\g{A}xxx#bх(?'A'123)
(?'A'456)/8x<any>BZ
+
+/^\cģ/8
+
+/(\R*)(.)/s8
+    \r\n
+    \r\r\n\n\r 
+    \r\r\n\n\r\n 
+
+/(\R)*(.)/s8
+    \r\n
+    \r\r\n\n\r 
+    \r\r\n\n\r\n 
+
+/[^\x{1234}]+/iS8I   
+
+/[^\x{1234}]+?/iS8I   
+
+/[^\x{1234}]++/iS8I   
+
+/[^\x{1234}]{2}/iS8I
+
+//<bsr_anycrlf><bsr_unicode>
+
+/f.*/
+    \P\Pfor
+
+/f.*/s
+    \P\Pfor
+
+/f.*/8
+    \P\Pfor
+
+/f.*/8s
+    \P\Pfor
+    
+/\x{d7ff}\x{e000}/8
+
+/\x{d800}/8
+
+/\x{dfff}/8 
+
+/\h+/8
+    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
+    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
+
+/[\h\x{e000}]+/8BZ
+    \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
+    \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
+
+/\H+/8
+    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
+    \x{2000}\x{200a}\x{1fff}\x{200b}
+    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
+    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
+
+/[\H\x{d7ff}]+/8BZ
+    \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
+    \x{2000}\x{200a}\x{1fff}\x{200b}
+    \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
+    \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
+
+/\v+/8
+    \x{2027}\x{2030}\x{2028}\x{2029}
+    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
+
+/[\v\x{e000}]+/8BZ
+    \x{2027}\x{2030}\x{2028}\x{2029}
+    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
+
+/\V+/8
+    \x{2028}\x{2029}\x{2027}\x{2030}
+    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
+
+/[\V\x{d7ff}]+/8BZ
+    \x{2028}\x{2029}\x{2027}\x{2030}
+    \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
+
+/\R+/8<bsr_unicode>
+    \x{2027}\x{2030}\x{2028}\x{2029}
+    \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
+
+/(..)\1/8
+    ab\P
+    aba\P
+    abab\P
+
+/(..)\1/8i
+    ab\P
+    abA\P
+    aBAb\P
+
+/(..)\1{2,}/8
+    ab\P
+    aba\P
+    abab\P
+    ababa\P
+    ababab\P
+    ababab\P\P
+    abababa\P
+    abababa\P\P
+
+/(..)\1{2,}/8i
+    ab\P
+    aBa\P
+    aBAb\P
+    AbaBA\P
+    abABAb\P
+    aBAbaB\P\P
+    abABabA\P
+    abaBABa\P\P
+
+/(..)\1{2,}?x/8i
+    ab\P
+    abA\P
+    aBAb\P
+    abaBA\P
+    abAbaB\P
+    abaBabA\P
+    abAbABaBx\P
+
+/./8<CRLF>
+    \r\P
+    \r\P\P 
+  
+/.{2,3}/8<CRLF>
+    \r\P 
+    \r\P\P
+    \r\r\P
+    \r\r\P\P
+    \r\r\r\P
+    \r\r\r\P\P     
+
+/.{2,3}?/8<CRLF>
+    \r\P 
+    \r\P\P
+    \r\r\P
+    \r\r\P\P
+    \r\r\r\P
+    \r\r\r\P\P     
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZ
+
+/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/8BZi
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZ
+
+/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/8BZi
+
+/(?<=\x{1234}\x{1234})\bxy/I8
+
+/(?<!^)ETA/8
+    ETA
+
+/\u0100/<JS>8BZ
+
+/[\u0100-\u0200]/<JS>8BZ
+
+/\ud800/<JS>8

 /-- End of testinput5 --/
--- a/ext/pcre/pcrelib/testdata/testinput6
+++ b/ext/pcre/pcrelib/testdata/testinput6
@ -655,6 +655,7 @@
    A\x80

 /^[\p{Arabic}]/8
+    \x{604}
    \x{60e} 
    \x{656} 
    \x{657} 
@ -670,7 +671,6 @@
    \x{6ef}
    \x{6fa}  
    ** Failers
-    \x{600}
    \x{650}
    \x{651}  
    \x{652}  
@ -688,7 +688,6 @@
    \x{61f}  
    \x{964}
    \x{965}  
-    \x{970}  

 /^\p{Inherited}/8
    \x{64b}
@ -802,4 +801,18 @@
    ** Failers 
    a\xFCb   

+/ⱥ/8i
+    ⱥ
+    Ⱥx 
+    Ⱥ 
+
+/[ⱥ]/8i
+    ⱥ
+    Ⱥx 
+    Ⱥ 
+
+/Ⱥ/8i
+    Ⱥ
+    ⱥ
+
 /-- End of testinput6 --/
--- a/ext/pcre/pcrelib/testdata/testinput7
+++ b/ext/pcre/pcrelib/testdata/testinput7
--- a/ext/pcre/pcrelib/testdata/testinput8
+++ b/ext/pcre/pcrelib/testdata/testinput8
--- a/ext/pcre/pcrelib/testdata/testinput9
+++ b/ext/pcre/pcrelib/testdata/testinput9
--- a/ext/pcre/pcrelib/testdata/testoutput1
+++ b/ext/pcre/pcrelib/testdata/testoutput1
--- a/ext/pcre/pcrelib/testdata/testoutput10
+++ b/ext/pcre/pcrelib/testdata/testoutput10
--- a/ext/pcre/pcrelib/testdata/testoutput2
+++ b/ext/pcre/pcrelib/testdata/testoutput2
--- a/ext/pcre/pcrelib/testdata/testoutput4
+++ b/ext/pcre/pcrelib/testdata/testoutput4
@ -1,5 +1,6 @@
-/-- This set of tests if for UTF-8 support, excluding Unicode properties. It is
-    compatible with all versions of Perl 5. --/
+/-- This set of tests is for UTF support, excluding Unicode properties. It is
+    compatible with all versions of Perl >= 5.10 and both the 8-bit and 16-bit
+    PCRE libraries. --/
   
 /a.b/8
    acb
@ -255,46 +256,6 @@ No match
    XYZ 
 No match

-/X(\C{3})/8
-    X\x{1234}
- 0: X\x{1234}
- 1: \x{1234}
-
-/X(\C{4})/8
-    X\x{1234}YZ
- 0: X\x{1234}Y
- 1: \x{1234}Y
-    
-/X\C*/8
-    XYZabcdce
- 0: XYZabcdce
-    
-/X\C*?/8
-    XYZabcde
- 0: X
-    
-/X\C{3,5}/8
-    Xabcdefg   
- 0: Xabcde
-    X\x{1234} 
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}YZ
-    X\x{1234}\x{512}  
- 0: X\x{1234}\x{512}
-    X\x{1234}\x{512}YZ
- 0: X\x{1234}\x{512}
-
-/X\C{3,5}?/8
-    Xabcdefg   
- 0: Xabc
-    X\x{1234} 
- 0: X\x{1234}
-    X\x{1234}YZ
- 0: X\x{1234}
-    X\x{1234}\x{512}  
- 0: X\x{1234}
-
 /[^a]+/8g
    bcd
 0: bcd
@ -791,22 +752,6 @@ No match
    \x{200}X   
 No match

-/a\Cb/
-    aXb
- 0: aXb
-    a\nb
- 0: a\x0ab
-  
-/a\Cb/8
-    aXb
- 0: aXb
-    a\nb
- 0: a\x{0a}b
-    
-/a\C\Cb/8 
-    a\x{100}b 
- 0: a\x{100}b
-
 /[z-\x{100}]/8i
    z
 0: z
@ -1128,4 +1073,22 @@ No match
 0: AA
 0: 

+/(abc)\1/8i
+   abc
+No match
+
+/(abc)\1/8
+   abc
+No match
+
+/a(*:a\x{1234}b)/8K
+    abc
+ 0: a
+MK: a\x{1234}b
+
+/a(*:a£b)/8K 
+    abc
+ 0: a
+MK: a\x{a3}b
+
 /-- End of testinput4 --/
--- a/ext/pcre/pcrelib/testdata/testoutput5
+++ b/ext/pcre/pcrelib/testdata/testoutput5
--- a/ext/pcre/pcrelib/testdata/testoutput6
+++ b/ext/pcre/pcrelib/testdata/testoutput6
@ -1114,6 +1114,8 @@ No match
 0: A\x80

 /^[\p{Arabic}]/8
+    \x{604}
+ 0: \x{604}
    \x{60e} 
 0: \x{60e}
    \x{656} 
@ -1143,8 +1145,6 @@ No match
    \x{6fa}  
 0: \x{6fa}
    ** Failers
-No match
-    \x{600}
 No match
    \x{650}
 No match
@ -1176,8 +1176,6 @@ No match
 0: \x{964}
    \x{965}  
 0: \x{965}
-    \x{970}  
- 0: \x{970}

 /^\p{Inherited}/8
    \x{64b}
@ -1353,4 +1351,26 @@ No match
    a\xFCb   
 No match

+/ⱥ/8i
+    ⱥ
+ 0: \x{2c65}
+    Ⱥx 
+ 0: \x{23a}
+    Ⱥ 
+ 0: \x{23a}
+
+/[ⱥ]/8i
+    ⱥ
+ 0: \x{2c65}
+    Ⱥx 
+ 0: \x{23a}
+    Ⱥ 
+ 0: \x{23a}
+
+/Ⱥ/8i
+    Ⱥ
+ 0: \x{23a}
+    ⱥ
+ 0: \x{2c65}
+
 /-- End of testinput6 --/
--- a/ext/pcre/pcrelib/testdata/testoutput7
+++ b/ext/pcre/pcrelib/testdata/testoutput7
--- a/ext/pcre/pcrelib/testdata/testoutput8
+++ b/ext/pcre/pcrelib/testdata/testoutput8
--- a/ext/pcre/pcrelib/testdata/testoutput9
+++ b/ext/pcre/pcrelib/testdata/testoutput9
--- a/ext/pcre/pcrelib/ucp.h
+++ b/ext/pcre/pcrelib/ucp.h
@ -153,7 +153,19 @@ enum {
  ucp_Old_Turkic,
  ucp_Samaritan,
  ucp_Tai_Tham,
-  ucp_Tai_Viet
+  ucp_Tai_Viet,
+  /* New for Unicode 6.0.0: */
+  ucp_Batak,
+  ucp_Brahmi,
+  ucp_Mandaic,
+  /* New for Unicode 6.1.0: */
+  ucp_Chakma,
+  ucp_Meroitic_Cursive,
+  ucp_Meroitic_Hieroglyphs,
+  ucp_Miao,
+  ucp_Sharada,
+  ucp_Sora_Sompeng,
+  ucp_Takri
 };

 #endif