diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4
index d316bddf3a7..6f2f0c534ed 100644
--- a/ext/mbstring/config.m4
+++ b/ext/mbstring/config.m4
@@ -96,6 +96,12 @@ int main() { return foo(10, "", 3.14); }
oniguruma/reggnu.c
oniguruma/regparse.c
oniguruma/regenc.c
+ oniguruma/regext.c
+ oniguruma/regsyntax.c
+ oniguruma/regtrav.c
+ oniguruma/regversion.c
+ oniguruma/st.c
+ oniguruma/enc/unicode.c
oniguruma/enc/ascii.c
oniguruma/enc/utf8.c
oniguruma/enc/euc_jp.c
@@ -120,6 +126,10 @@ int main() { return foo(10, "", 3.14); }
oniguruma/enc/koi8.c
oniguruma/enc/koi8_r.c
oniguruma/enc/big5.c
+ oniguruma/enc/utf16_be.c
+ oniguruma/enc/utf16_le.c
+ oniguruma/enc/utf32_be.c
+ oniguruma/enc/utf32_le.c
])
fi
])
@@ -164,6 +174,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_iso8859_13.c
libmbfl/filters/mbfilter_iso8859_14.c
libmbfl/filters/mbfilter_iso8859_15.c
+ libmbfl/filters/mbfilter_iso8859_16.c
libmbfl/filters/mbfilter_iso8859_2.c
libmbfl/filters/mbfilter_iso8859_3.c
libmbfl/filters/mbfilter_iso8859_4.c
@@ -261,3 +272,5 @@ if test "$PHP_MBSTRING" != "no"; then
PHP_MBSTRING_SETUP_LIBMBFL
PHP_MBSTRING_EXTENSION
fi
+
+# vim600: sts=2 sw=2 et
diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32
index 8484b462317..665f4b4ba03 100644
--- a/ext/mbstring/config.w32
+++ b/ext/mbstring/config.w32
@@ -24,13 +24,14 @@ if (PHP_MBSTRING == "yes") {
mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \
mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \
mbfilter_iso8859_1.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c \
- mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_2.c \
- mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c \
- mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c \
- mbfilter_iso8859_9.c mbfilter_jis.c mbfilter_koi8r.c mbfilter_qprint.c \
- mbfilter_sjis.c mbfilter_ucs2.c mbfilter_ucs4.c mbfilter_uhc.c \
- mbfilter_utf16.c mbfilter_utf32.c mbfilter_utf7.c mbfilter_utf7imap.c \
- mbfilter_utf8.c mbfilter_uuencode.c", "mbstring");
+ mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c \
+ mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
+ mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
+ mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
+ mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
+ mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
+ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
+ mbfilter_uuencode.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
@@ -51,13 +52,15 @@ if (PHP_MBSTRING == "yes") {
AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h');
AC_DEFINE('HAVE_MBREGEX', 1);
ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \
- regenc.c regexec.c reggnu.c regparse.c regposerr.c", "mbstring");
+ regenc.c regexec.c reggnu.c regparse.c regposerr.c \
+ regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring");
ADD_SOURCES("ext/mbstring/oniguruma/enc", "ascii.c big5.c \
euc_jp.c euc_kr.c euc_tw.c iso8859_1.c iso8859_2.c \
iso8859_3.c iso8859_4.c iso8859_5.c iso8859_6.c \
iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \
iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \
- koi8.c koi8_r.c sjis.c utf8.c", "mbstring");
+ koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \
+ utf32_be.c utf32_le.c", "mbstring");
ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring");
}
}
diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am
index d3e5cfd257e..aab009b9d5d 100644
--- a/ext/mbstring/libmbfl/filters/Makefile.am
+++ b/ext/mbstring/libmbfl/filters/Makefile.am
@@ -1,5 +1,104 @@
-EXTRA_DIST=Makefile.bcc32
+EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_uhc.h
+libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_uhc.h
+
+mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
+
+mbfilter_iso8859_3.c: unicode_table_iso8859_3.h
+
+mbfilter_iso8859_4.c: unicode_table_iso8859_4.h
+
+mbfilter_iso8859_5.c: unicode_table_iso8859_5.h
+
+mbfilter_iso8859_6.c: unicode_table_iso8859_6.h
+
+mbfilter_iso8859_7.c: unicode_table_iso8859_7.h
+
+mbfilter_iso8859_8.c: unicode_table_iso8859_8.h
+
+mbfilter_iso8859_9.c: unicode_table_iso8859_9.h
+
+mbfilter_iso8859_10.c: unicode_table_iso8859_10.h
+
+mbfilter_iso8859_11.c: unicode_table_iso8859_11.h
+
+mbfilter_iso8859_13.c: unicode_table_iso8859_13.h
+
+mbfilter_iso8859_14.c: unicode_table_iso8859_13.h
+
+mbfilter_iso8859_15.c: unicode_table_iso8859_15.h
+
+mbfilter_iso8859_16.c: unicode_table_iso8859_16.h
+
+8859-1.TXT 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT 8859-6.TXT \
+8859-7.TXT 8859-8.TXT 8859-9.TXT 8859-10.TXT 8859-11.TXT 8859-13.TXT \
+8859-14.TXT 8859-15.TXT 8859-16.TXT:
+ $(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/$@
+
+unicode_table_iso8859_1.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_1_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_1_H -f mk_sb_tbl.awk 8859-1.TXT > $@
+
+unicode_table_iso8859_2.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_2_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_2_H -f mk_sb_tbl.awk 8859-2.TXT > $@
+
+unicode_table_iso8859_3.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_3_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_3_H -f mk_sb_tbl.awk 8859-3.TXT > $@
+
+unicode_table_iso8859_4.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_4_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_4_H -f mk_sb_tbl.awk 8859-4.TXT > $@
+
+unicode_table_iso8859_5.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_5_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_5_H -f mk_sb_tbl.awk 8859-5.TXT > $@
+
+unicode_table_iso8859_6.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_6_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_6_H -f mk_sb_tbl.awk 8859-6.TXT > $@
+
+unicode_table_iso8859_7.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_7_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_7_H -f mk_sb_tbl.awk 8859-7.TXT > $@
+
+unicode_table_iso8859_8.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_8_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_8_H -f mk_sb_tbl.awk 8859-8.TXT > $@
+
+unicode_table_iso8859_9.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_9_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_9_H -f mk_sb_tbl.awk 8859-9.TXT > $@
+
+unicode_table_iso8859_10.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_10_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_10_H -f mk_sb_tbl.awk 8859-10.TXT > $@
+
+unicode_table_iso8859_11.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_11_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_11_H -f mk_sb_tbl.awk 8859-11.TXT > $@
+
+unicode_table_iso8859_13.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_13_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_13_H -f mk_sb_tbl.awk 8859-13.TXT > $@
+
+unicode_table_iso8859_14.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_14_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_14_H -f mk_sb_tbl.awk 8859-14.TXT > $@
+
+unicode_table_iso8859_15.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_15_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLE_ISO8859_15_H -f mk_sb_tbl.awk 8859-15.TXT > $@
+
+unicode_table_iso8859_16.h: mk_sb_tbl.awk
+ $(AWK) -v TABLE_NAME=iso8859_16_ucs_table \
+ -v IFNDEF_NAME=UNICODE_TABLEISO8859_16_H -f mk_sb_tbl.awk 8859-16.TXT > $@
+
+unidata: 8859-1.TXT 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT 8859-6.TXT \
+8859-7.TXT 8859-8.TXT 8859-9.TXT 8859-10.TXT 8859-11.TXT 8859-13.TXT \
+8859-14.TXT 8859-15.TXT 8859-16.TXT
+
+.PHONY: unidata
diff --git a/ext/mbstring/libmbfl/filters/Makefile.bcc32 b/ext/mbstring/libmbfl/filters/Makefile.bcc32
index 4219ed9061d..32bd161093c 100644
--- a/ext/mbstring/libmbfl/filters/Makefile.bcc32
+++ b/ext/mbstring/libmbfl/filters/Makefile.bcc32
@@ -1,6 +1,6 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I../mbfl
-OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj
+OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj
all: $(OBJS)
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c
new file mode 100755
index 00000000000..8f12feb47e8
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.c
@@ -0,0 +1,136 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter.c
+ * by moriyoshi koizumi on 4 dec 2002.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_iso8859_16.h"
+#include "unicode_table_iso8859_16.h"
+
+static const char *mbfl_encoding_8859_16_aliases[] = {"ISO_8859-16", NULL};
+
+const mbfl_encoding mbfl_encoding_8859_16 = {
+ mbfl_no_encoding_8859_16,
+ "ISO-8859-16",
+ "ISO-8859-16",
+ (const char *(*)[])&mbfl_encoding_8859_16_aliases,
+ NULL,
+ MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_8859_16 = {
+ mbfl_no_encoding_8859_16,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_true
+};
+
+const struct mbfl_convert_vtbl vtbl_8859_16_wchar = {
+ mbfl_no_encoding_8859_16,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_8859_16_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_8859_16 = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_8859_16,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_8859_16,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * ISO-8859-16 => wchar
+ */
+int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s;
+
+ if (c >= 0 && c < 0xa0) {
+ s = c;
+ } else if (c >= 0xa0 && c < 0x100) {
+ s = iso8859_16_ucs_table[c - 0xa0];
+ if (s <= 0) {
+ s = c;
+ s &= MBFL_WCSPLANE_MASK;
+ s |= MBFL_WCSPLANE_8859_16;
+ }
+ } else {
+ s = c;
+ s &= MBFL_WCSGROUP_MASK;
+ s |= MBFL_WCSGROUP_THROUGH;
+ }
+
+ CK((*filter->output_function)(s, filter->data));
+
+ return c;
+}
+
+/*
+ * wchar => ISO-8859-16
+ */
+int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter)
+{
+ int s, n;
+
+ if (c >= 0 && c < 0xa0) {
+ s = c;
+ } else {
+ s = -1;
+ n = 95;
+ while (n >= 0) {
+ if (c == iso8859_16_ucs_table[n]) {
+ s = 0xa0 + n;
+ break;
+ }
+ n--;
+ }
+ if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_16) {
+ s = c & MBFL_WCSPLANE_MASK;
+ }
+ }
+
+ if (s >= 0) {
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h
new file mode 100755
index 00000000000..a5e2d2fbef6
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_16.h
@@ -0,0 +1,23 @@
+/*
+ * COPYRIGHT NOTICE
+ *
+ * This file is a portion of "streamable kanji code filter and converter"
+ * library, which is distributed under GNU Lesser General Public License
+ * version 2.1.
+ *
+ */
+
+#ifndef MBFL_MBFILTER_ISO8859_16_H
+#define MBFL_MBFILTER_ISO8859_16_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_8859_16;
+extern const struct mbfl_identify_vtbl vtbl_identify_8859_16;
+extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16;
+
+int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_ISO8859_16_H */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c
index 9b18a1cb2b9..188d088ed0a 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_qprint.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_qprint.c
@@ -155,11 +155,30 @@ int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
*/
int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
{
- int n;
+ int n, m;
+
+ static int hex2code_map[] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
switch (filter->status) {
case 1:
- if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46)) { /* 0 - 9 or A - F */
+ if (hex2code_map[c & 0xff] >= 0) {
filter->cache = c;
filter->status = 2;
} else if (c == 0x0d) { /* soft line feed */
@@ -173,21 +192,13 @@ int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
}
break;
case 2:
- n = filter->cache;
- if (n >= 0x30 && n <= 0x39) { /* '0' - '9' */
- n -= 48; /* 48 = '0' */
- } else {
- n -= 55; /* 55 = 'A' - 10 */
- }
- n <<= 4;
- if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
- n += (c - 48);
- } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
- n += (c - 55);
- } else {
+ m = hex2code_map[c & 0xff];
+ if (m < 0) {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(filter->cache, filter->data));
n = c;
+ } else {
+ n = hex2code_map[filter->cache] << 4 | m;
}
CK((*filter->output_function)(n, filter->data));
filter->status = 0;
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_10.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_10.h
index 0de0d17eb47..48b1301b729 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_10.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_10.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_10_H
-#define UNICODE_TABLE_ISO8859_10_H
-
-static const unsigned short iso8859_10_ucs_table[] = {
- 0x00A0,0x0104,0x0112,0x0122,0x0124,0x0128,0x0136,0x00A7,
- 0x013B,0x0110,0x0160,0x0166,0x017D,0x00AD,0x016A,0x014A,
- 0x00B0,0x0105,0x0113,0x0123,0x012B,0x0129,0x0137,0x00B7,
- 0x013C,0x0111,0x0161,0x0167,0x017E,0x2015,0x016B,0x014B,
- 0x0100,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x012E,
- 0x010C,0x00C9,0x0118,0x00CB,0x0116,0x00CD,0x00CE,0x00CF,
- 0x00D0,0x0145,0x014C,0x00D3,0x00D4,0x00D5,0x00D6,0x0168,
- 0x00D8,0x0172,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
- 0x0101,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x012F,
- 0x010D,0x00E9,0x0119,0x00EB,0x0117,0x00ED,0x00EE,0x00EF,
- 0x00F0,0x0146,0x014D,0x00F3,0x00F4,0x00F5,0x00F6,0x0169,
- 0x00F8,0x0173,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x0138
+static const unsigned int iso8859_10_ucs_table[] = {
+ 0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
+ 0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
+ 0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
+ 0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
+ 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
+ 0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
+ 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+ 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
+ 0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
+ 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138
};
-
#endif /* UNICODE_TABLE_ISO8859_10_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_13.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_13.h
index ee69310bdb7..e7991f8e284 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_13.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_13.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_13_H
-#define UNICODE_TABLE_ISO8859_13_H
-
-static const unsigned short iso8859_13_ucs_table[] = {
- 0x00A0,0x201D,0x00A2,0x00A3,0x00A4,0x201E,0x00A6,0x00A7,
- 0x00D8,0x00A9,0x0156,0x00AB,0x00AC,0x00AD,0x00AE,0x00C6,
- 0x00B0,0x00B1,0x00B2,0x00B3,0x201C,0x00B5,0x00B6,0x00B7,
- 0x00F8,0x00B9,0x0157,0x00BB,0x00BC,0x00BD,0x00BE,0x00E6,
- 0x0104,0x012E,0x0100,0x0106,0x00C4,0x00C5,0x0118,0x0112,
- 0x010C,0x00C9,0x0179,0x0116,0x0122,0x0136,0x012A,0x013B,
- 0x0160,0x0143,0x0145,0x00D3,0x014C,0x00D5,0x00D6,0x00D7,
- 0x0172,0x0141,0x015A,0x016A,0x00DC,0x017B,0x017D,0x00DF,
- 0x0105,0x012F,0x0101,0x0107,0x00E4,0x00E5,0x0119,0x0113,
- 0x010D,0x00E9,0x017A,0x0117,0x0123,0x0137,0x012B,0x013C,
- 0x0161,0x0144,0x0146,0x00F3,0x014D,0x00F5,0x00F6,0x00F7,
- 0x0173,0x0142,0x015B,0x016B,0x00FC,0x017C,0x017E,0x2019
+static const unsigned int iso8859_13_ucs_table[] = {
+ 0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
+ 0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
+ 0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
+ 0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
+ 0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
+ 0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
+ 0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
+ 0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
+ 0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
+ 0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
+ 0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019
};
-
#endif /* UNICODE_TABLE_ISO8859_13_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_14.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_14.h
index f0633963ff2..8c0bfe133ae 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_14.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_14.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_14_H
-#define UNICODE_TABLE_ISO8859_14_H
-
-static const unsigned short iso8859_14_ucs_table[] = {
- 0x00A0,0x1E02,0x1E03,0x00A3,0x010A,0x010B,0x1E0A,0x00A7,
- 0x1E80,0x00A9,0x1E82,0x1E0B,0x1EF2,0x00AD,0x00AE,0x0178,
- 0x1E1E,0x1E1F,0x0120,0x0121,0x1E40,0x1E41,0x00B6,0x1E56,
- 0x1E81,0x1E57,0x1E83,0x1E60,0x1EF3,0x1E84,0x1E85,0x1E61,
- 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
- 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
- 0x0174,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x1E6A,
- 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x0176,0x00DF,
- 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
- 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
- 0x0175,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x1E6B,
- 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x0177,0x00FF
+static const unsigned int iso8859_14_ucs_table[] = {
+ 0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
+ 0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
+ 0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
+ 0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff
};
-
#endif /* UNICODE_TABLE_ISO8859_14_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_15.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_15.h
index ab2f1a82b63..82743da2b5e 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_15.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_15.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_15_H
-#define UNICODE_TABLE_ISO8859_15_H
-
-static const unsigned short iso8859_15_ucs_table[] = {
- 0x00A0,0x00A1,0x00A2,0x00A3,0x20AC,0x00A5,0x0160,0x00A7,
- 0x0161,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
- 0x00B0,0x00B1,0x00B2,0x00B3,0x017D,0x00B5,0x00B6,0x00B7,
- 0x017E,0x00B9,0x00BA,0x00BB,0x0152,0x0153,0x0178,0x00BF,
- 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
- 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
- 0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
- 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
- 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
- 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
- 0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
- 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
+static const unsigned int iso8859_15_ucs_table[] = {
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
+ 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
+ 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
};
-
#endif /* UNICODE_TABLE_ISO8859_15_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_16.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_16.h
new file mode 100644
index 00000000000..256865fd616
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_16.h
@@ -0,0 +1,17 @@
+/* This file is automatically generated. Do not edit! */
+#ifndef UNICODE_TABLEISO8859_16_H
+static const unsigned int iso8859_16_ucs_table[] = {
+ 0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
+ 0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
+ 0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
+ 0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
+ 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
+ 0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
+ 0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff
+};
+#endif /* UNICODE_TABLEISO8859_16_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_2.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_2.h
index 148fdbfff52..ab1ca6eea83 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_2.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_2.h
@@ -1,45 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_2_H
-#define UNICODE_TABLE_ISO8859_2_H
-
-static const unsigned short iso8859_2_ucs_table[] = {
- 0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
- 0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
- 0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
- 0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
- 0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
- 0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
- 0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
- 0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
- 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
- 0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
- 0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
- 0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
+static const unsigned int iso8859_2_ucs_table[] = {
+ 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
+ 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
+ 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
+ 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
+ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
+ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
+ 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
+ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
+ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
+ 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
};
-
-
#endif /* UNICODE_TABLE_ISO8859_2_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_3.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_3.h
index 40aa5d635a1..f0501787e3d 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_3.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_3.h
@@ -1,43 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_3_H
-#define UNICODE_TABLE_ISO8859_3_H
-
-static const unsigned short iso8859_3_ucs_table[] = {
- 0x00A0,0x0126,0x02D8,0x00A3,0x00A4,0x0000,0x0124,0x00A7,
- 0x00A8,0x0130,0x015E,0x011E,0x0134,0x00AD,0x0000,0x017B,
- 0x00B0,0x0127,0x00B2,0x00B3,0x00B4,0x00B5,0x0125,0x00B7,
- 0x00B8,0x0131,0x015F,0x011F,0x0135,0x00BD,0x0000,0x017C,
- 0x00C0,0x00C1,0x00C2,0x0000,0x00C4,0x010A,0x0108,0x00C7,
- 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
- 0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x0120,0x00D6,0x00D7,
- 0x011C,0x00D9,0x00DA,0x00DB,0x00DC,0x016C,0x015C,0x00DF,
- 0x00E0,0x00E1,0x00E2,0x0000,0x00E4,0x010B,0x0109,0x00E7,
- 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
- 0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x0121,0x00F6,0x00F7,
- 0x011D,0x00F9,0x00FA,0x00FB,0x00FC,0x016D,0x015D,0x02D9
+static const unsigned int iso8859_3_ucs_table[] = {
+ 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
+ 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
+ 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
+ 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
+ 0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
+ 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
+ 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9
};
-
#endif /* UNICODE_TABLE_ISO8859_3_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_4.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_4.h
index 4aaa276c00d..01f90fb3e9d 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_4.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_4.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_4_H
-#define UNICODE_TABLE_ISO8859_4_H
-
-static const unsigned short iso8859_4_ucs_table[] = {
- 0x00A0,0x0104,0x0138,0x0156,0x00A4,0x0128,0x013B,0x00A7,
- 0x00A8,0x0160,0x0112,0x0122,0x0166,0x00AD,0x017D,0x00AF,
- 0x00B0,0x0105,0x02DB,0x0157,0x00B4,0x0129,0x013C,0x02C7,
- 0x00B8,0x0161,0x0113,0x0123,0x0167,0x014A,0x017E,0x014B,
- 0x0100,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x012E,
- 0x010C,0x00C9,0x0118,0x00CB,0x0116,0x00CD,0x00CE,0x012A,
- 0x0110,0x0145,0x014C,0x0136,0x00D4,0x00D5,0x00D6,0x00D7,
- 0x00D8,0x0172,0x00DA,0x00DB,0x00DC,0x0168,0x016A,0x00DF,
- 0x0101,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x012F,
- 0x010D,0x00E9,0x0119,0x00EB,0x0117,0x00ED,0x00EE,0x012B,
- 0x0111,0x0146,0x014D,0x0137,0x00F4,0x00F5,0x00F6,0x00F7,
- 0x00F8,0x0173,0x00FA,0x00FB,0x00FC,0x0169,0x016B,0x02D9
+static const unsigned int iso8859_4_ucs_table[] = {
+ 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
+ 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
+ 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
+ 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
+ 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
+ 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
+ 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+ 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
+ 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
+ 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
+ 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+ 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9
};
-
#endif /* UNICODE_TABLE_ISO8859_4_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_5.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_5.h
index 58fc0e2adaa..70c9f38c5e8 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_5.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_5.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_5_H
-#define UNICODE_TABLE_ISO8859_5_H
-
-static const unsigned short iso8859_5_ucs_table[] = {
- 0x00A0,0x0401,0x0402,0x0403,0x0404,0x0405,0x0406,0x0407,
- 0x0408,0x0409,0x040A,0x040B,0x040C,0x00AD,0x040E,0x040F,
- 0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
- 0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,0x041F,
- 0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
- 0x0428,0x0429,0x042A,0x042B,0x042C,0x042D,0x042E,0x042F,
- 0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
- 0x0438,0x0439,0x043A,0x043B,0x043C,0x043D,0x043E,0x043F,
- 0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
- 0x0448,0x0449,0x044A,0x044B,0x044C,0x044D,0x044E,0x044F,
- 0x2116,0x0451,0x0452,0x0453,0x0454,0x0455,0x0456,0x0457,
- 0x0458,0x0459,0x045A,0x045B,0x045C,0x00A7,0x045E,0x045F
+static const unsigned int iso8859_5_ucs_table[] = {
+ 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
+ 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+ 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+ 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
+ 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
+ 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
+ 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
+ 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f
};
-
#endif /* UNICODE_TABLE_ISO8859_5_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_6.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_6.h
index 9dba3ad895a..c9d66243370 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_6.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_6.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_6_H
-#define UNICODE_TABLE_ISO8859_6_H
-
-static const unsigned short iso8859_6_ucs_table[] = {
- 0x00A0,0x0000,0x0000,0x0000,0x00A4,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x060C,0x00AD,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x061B,0x0000,0x0000,0x0000,0x061F,
- 0x0000,0x0621,0x0622,0x0623,0x0624,0x0625,0x0626,0x0627,
- 0x0628,0x0629,0x062A,0x062B,0x062C,0x062D,0x062E,0x062F,
- 0x0630,0x0631,0x0632,0x0633,0x0634,0x0635,0x0636,0x0637,
- 0x0638,0x0639,0x063A,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0640,0x0641,0x0642,0x0643,0x0644,0x0645,0x0646,0x0647,
- 0x0648,0x0649,0x064A,0x064B,0x064C,0x064D,0x064E,0x064F,
- 0x0650,0x0651,0x0652,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x060C,0x00AD,0x0000,0x0000
+static const unsigned int iso8859_6_ucs_table[] = {
+ 0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
+ 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
+ 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
+ 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
+ 0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
+ 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
+ 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
-
#endif /* UNICODE_TABLE_ISO8859_6_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_7.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_7.h
index 0dfde9bb84e..4961c30f928 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_7.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_7.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_7_H
-#define UNICODE_TABLE_ISO8859_7_H
-
-static const unsigned short iso8859_7_ucs_table[] = {
- 0x00A0,0x2018,0x2019,0x00A3,0x0000,0x0000,0x00A6,0x00A7,
- 0x00A8,0x00A9,0x0000,0x00AB,0x00AC,0x00AD,0x0000,0x2015,
- 0x00B0,0x00B1,0x00B2,0x00B3,0x0384,0x0385,0x0386,0x00B7,
- 0x0388,0x0389,0x038A,0x00BB,0x038C,0x00BD,0x038E,0x038F,
- 0x0390,0x0391,0x0392,0x0393,0x0394,0x0395,0x0396,0x0397,
- 0x0398,0x0399,0x039A,0x039B,0x039C,0x039D,0x039E,0x039F,
- 0x03A0,0x03A1,0x0000,0x03A3,0x03A4,0x03A5,0x03A6,0x03A7,
- 0x03A8,0x03A9,0x03AA,0x03AB,0x03AC,0x03AD,0x03AE,0x03AF,
- 0x03B0,0x03B1,0x03B2,0x03B3,0x03B4,0x03B5,0x03B6,0x03B7,
- 0x03B8,0x03B9,0x03BA,0x03BB,0x03BC,0x03BD,0x03BE,0x03BF,
- 0x03C0,0x03C1,0x03C2,0x03C3,0x03C4,0x03C5,0x03C6,0x03C7,
- 0x03C8,0x03C9,0x03CA,0x03CB,0x03CC,0x03CD,0x03CE,0x0000
+static const unsigned int iso8859_7_ucs_table[] = {
+ 0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7,
+ 0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
+ 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
+ 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+ 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
+ 0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
+ 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
+ 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
+ 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
+ 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
+ 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000
};
-
#endif /* UNICODE_TABLE_ISO8859_7_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_8.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_8.h
index 756004f913e..3a52badb36b 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_8.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_8.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_8_H
-#define UNICODE_TABLE_ISO8859_8_H
-
-static const unsigned short iso8859_8_ucs_table[] = {
- 0x00A0,0x0000,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
- 0x00A8,0x00A9,0x00D7,0x00AB,0x00AC,0x00AD,0x00AE,0x203E,
- 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
- 0x00B8,0x00B9,0x00F7,0x00BB,0x00BC,0x00BD,0x00BE,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2017,
- 0x05D0,0x05D1,0x05D2,0x05D3,0x05D4,0x05D5,0x05D6,0x05D7,
- 0x05D8,0x05D9,0x05DA,0x05DB,0x05DC,0x05DD,0x05DE,0x05DF,
- 0x05E0,0x05E1,0x05E2,0x05E3,0x05E4,0x05E5,0x05E6,0x05E7,
- 0x05E8,0x05E9,0x05EA,0x0000,0x0000,0x0000,0x0000,0x0000
+static const unsigned int iso8859_8_ucs_table[] = {
+ 0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+ 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
+ 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
+ 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
+ 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
+ 0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000
};
-
#endif /* UNICODE_TABLE_ISO8859_8_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_9.h b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_9.h
index f39e83f45ee..eaac8c6723d 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_iso8859_9.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_iso8859_9.h
@@ -1,44 +1,17 @@
-/*
- * "streamable kanji code filter and converter"
- * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
- *
- * LICENSE NOTICES
- *
- * This file is part of "streamable kanji code filter and converter",
- * which is distributed under the terms of GNU Lesser General Public
- * License (version 2) as published by the Free Software Foundation.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with "streamable kanji code filter and converter";
- * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
- * Suite 330, Boston, MA 02111-1307 USA
- *
- * The authors of this file: PHP3 internationalization team
- * You can contact the primary author 金本 茂 .
- *
- */
-
+/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_9_H
-#define UNICODE_TABLE_ISO8859_9_H
-
-static const unsigned short iso8859_9_ucs_table[] = {
- 0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
- 0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
- 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
- 0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
- 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
- 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
- 0x011E,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
- 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0130,0x015E,0x00DF,
- 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
- 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
- 0x011F,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
- 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x0131,0x015F,0x00FF
+static const unsigned int iso8859_9_ucs_table[] = {
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff
};
-
#endif /* UNICODE_TABLE_ISO8859_9_H */
diff --git a/ext/mbstring/libmbfl/libmbfl.dsp b/ext/mbstring/libmbfl/libmbfl.dsp
index bf3fe065571..e041df02c53 100644
--- a/ext/mbstring/libmbfl/libmbfl.dsp
+++ b/ext/mbstring/libmbfl/libmbfl.dsp
@@ -199,6 +199,10 @@ SOURCE=.\filters\mbfilter_iso8859_15.c
# End Source File
# Begin Source File
+SOURCE=.\filters\mbfilter_iso8859_16.c
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\mbfilter_iso8859_2.c
# End Source File
# Begin Source File
@@ -500,6 +504,10 @@ SOURCE=.\filters\mbfilter_iso8859_15.h
# End Source File
# Begin Source File
+SOURCE=.\filters\mbfilter_iso8859_16.h
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\mbfilter_iso8859_2.h
# End Source File
# Begin Source File
@@ -708,6 +716,10 @@ SOURCE=.\filters\unicode_table_iso8859_15.h
# End Source File
# Begin Source File
+SOURCE=.\filters\unicode_table_iso8859_16.h
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\unicode_table_iso8859_2.h
# End Source File
# Begin Source File
diff --git a/ext/mbstring/libmbfl/libmbfl.sln b/ext/mbstring/libmbfl/libmbfl.sln
new file mode 100755
index 00000000000..f49f0c0d868
--- /dev/null
+++ b/ext/mbstring/libmbfl/libmbfl.sln
@@ -0,0 +1,21 @@
+Microsoft Visual Studio Solution File, Format Version 7.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
+EndProject
+Global
+ GlobalSection(SolutionConfiguration) = preSolution
+ ConfigName.0 = Debug
+ ConfigName.1 = Release
+ EndGlobalSection
+ GlobalSection(ProjectDependencies) = postSolution
+ EndGlobalSection
+ GlobalSection(ProjectConfiguration) = postSolution
+ {B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ EndGlobalSection
+ GlobalSection(ExtensibilityAddIns) = postSolution
+ EndGlobalSection
+EndGlobal
diff --git a/ext/mbstring/libmbfl/libmbfl.vcproj b/ext/mbstring/libmbfl/libmbfl.vcproj
new file mode 100755
index 00000000000..29e0af0a270
--- /dev/null
+++ b/ext/mbstring/libmbfl/libmbfl.vcproj
@@ -0,0 +1,650 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c
index 01e4787a962..ea2ca5632d5 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c
@@ -1345,7 +1345,6 @@ mbfl_strcut(
}
-#include
/*
* strwidth
*/
@@ -1435,6 +1434,7 @@ collector_strimwidth(int c, void* data)
default:
if (pc->outchar >= pc->from) {
pc->outwidth += (is_fullwidth(c) ? 2: 1);
+
if (pc->outwidth > pc->width) {
if (pc->status == 0) {
pc->endpos = pc->device.pos;
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
index d907512ec20..d20b3ceea96 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h
@@ -74,6 +74,7 @@
#define MBFL_WCSPLANE_CP1251 0x70f70000
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_KOI8R 0x70f90000
+#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
index df279336ce2..21fb6319db7 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
@@ -80,6 +80,7 @@
#include "filters/mbfilter_iso8859_13.h"
#include "filters/mbfilter_iso8859_14.h"
#include "filters/mbfilter_iso8859_15.h"
+#include "filters/mbfilter_iso8859_16.h"
#include "filters/mbfilter_base64.h"
#include "filters/mbfilter_qprint.h"
#include "filters/mbfilter_uuencode.h"
@@ -163,6 +164,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_8859_13,
&mbfl_encoding_8859_14,
&mbfl_encoding_8859_15,
+ &mbfl_encoding_8859_16,
&mbfl_encoding_euc_cn,
&mbfl_encoding_cp936,
&mbfl_encoding_hz,
@@ -295,7 +297,3 @@ mbfl_is_support_encoding(const char *name)
return 1;
}
}
-
-
-
-
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
index c99af89a020..c9b51dd3608 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
@@ -97,6 +97,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866,
mbfl_no_encoding_koi8r,
+ mbfl_no_encoding_8859_16,
mbfl_no_encoding_charset_max
};
diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING
index 7913cbf23f8..ed3fa53b253 100644
--- a/ext/mbstring/oniguruma/COPYING
+++ b/ext/mbstring/oniguruma/COPYING
@@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby.
It follows the BSD license in the case of the one except for it.
/*-
- * Copyright (c) 2002 K.Kosako
+ * Copyright (c) 2002-2004 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
index a7a817dd2de..65ef03f50fa 100644
--- a/ext/mbstring/oniguruma/HISTORY
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -1,18 +1,479 @@
History
+2005/02/19: Version 3.7.0
+
+2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
+2005/02/19: [new] (thanks Minero Aoki)
+ add onig_region_set().
+2005/02/19: [API] change onig_region_init() to extern.
+2005/02/19: [dist] remove reggnu.c from MANIFEST-RUBY.
+ remove reggnu.c from make 19.
+2005/02/19: [dist] update doc/API and doc/API.ja.
+2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
+2005/02/19: [impl] (thanks Alexey Zakhlestin)
+ change UChar* to const UChar* in oniguruma.h,
+ regenc.h and regparse.h.
+2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
+ onigposix.h and st.h.
+2005/02/12: [test] success in ruby 1.9.0 (2005-02-11) [i386-cygwin].
+2005/02/12: [bug] (thanks nobu) [ruby-dev:25676]
+ type_cclass_hash() fix overrun.
+2005/02/09: [test] success in ruby 1.9.0 (2005-02-09) [i686-linux].
+2005/02/09: [spec] add RE_OPTION_FIND_NOT_EMPTY etc.. to oniggnu.h.
+2005/02/09: [dist] remove hash.c.patch.
+2005/02/07: [impl] remove re_mbctab, mbctab_ascii etc...
+ (USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY)
+
+2005/02/04: Version 3.6.0
+
+2005/02/04: [test] success in ruby 1.9.0 (2005-02-04) [i686-linux].
+2005/02/01: [bug] add key_free() call to st_free_table().
+2005/02/01: [new] add onig_get_default_ambig_flag() and
+ onig_set_default_ambig_flag().
+2005/02/01: [dist] update MANIFEST-RUBY.
+2005/01/31: [test] success in ruby 1.9.0 (2005-01-29) [i686-linux].
+2005/01/31: [spec] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND
+ from ONIGENC_AMBIGUOUS_MATCH_DEFAULT.
+2005/01/31: [dist] update Makefile.in (make 19).
+2005/01/29: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.5.4 was merged to Ruby 1.9.0.
+2005/01/28: [impl] (thanks UK-taniyama)
+ add extern "C" { } directive to oniguruma.h, oniggnu.h
+ and onigposix.h for C++.
+2005/01/25: [impl] remove nested function call for xxx_code_to_mbclen().
+ (euc_kr.c, euc_tw.c, big5.c)
+
+2005/01/19: Version 3.5.4
+
+2005/01/19: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/19: [bug] (thanks Isao Sonobe)
+ callback function argument name_end of onig_foreach_name()
+ was wrong.
+ name key of name table should be null terminated for
+ character encoding length.
+ add strdup_with_null(), rename onig_strdup() to k_strdup().
+ use e->name_len in i_names().
+2005/01/17: [impl] (thanks UK-taniyama)
+ add HAVE_SYS_TYPES_H to config.h.in.
+
+2005/01/13: Version 3.5.3
+
+2005/01/13: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/13: [bug] ignore case match bug.
+ ex. /s+/iu.match("SSSSS") ==> [4..5]
+ fix OP_EXACT1_IC, OP_EXACTN_IC process.
+2005/01/13: [bug] (thanks Isao Sonobe)
+ ignore case match bug.
+ ex. /is/iu.match("ss") fail.
+ fix str_lower_case_match() etc.
+
+2005/01/05: Version 3.5.2
+
+2005/01/05: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
+2005/01/05: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
+2005/01/05: [bug] (thanks Isao Sonobe)
+ ignore case match bug.
+ ex. /s+/iu.match("sssss") ==> [4..5]
+ fix OP_EXACT1_IC, OP_EXACTN_IC process.
+2005/01/05: [bug] (thanks Isao Sonobe)
+ group name table should be renumbered.
+ add onig_renumber_name_table().
+2004/12/24: [dist] remove file onigcmpt200.h.
+
+2004/12/17: Version 3.5.1
+
+2004/12/17: [dist] add INSTALL-RUBY to archive.
+2004/12/16: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
+2004/12/16: [dist] update hash.c.patch.
+2004/12/15: [bug] (thanks matz)
+ char > 127 should be casted to unsigned char. (utf8.c)
+2004/12/13: [impl] add HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES definition
+ to oniguruma.h in the case __cplusplus.
+2004/12/06: [dist] update doc/RE and doc/RE.ja.
+2004/12/03: [impl] (thanks nobu)
+ st.h fix prototype for C++.
+
+2004/12/03: Version 3.5.0
+
+2004/12/02: [test] success in ruby 1.9.0 (2004-12-02) [i686-linux].
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-mswin32].
+2004/12/01: [dist] add make targets 19 and 19up to win32/Makefile.
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-cygwin].
+2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i686-linux].
+2004/12/01: [impl] double cast for escape warning in Cygwin.
+ (HashDataType* )((void* )(&e)) in regparse.c
+2004/12/01: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
+2004/12/01: [tune] change implementation of clear_opt_map_info().
+ (which was 10-16% cost in gprof result for my test program)
+2004/12/01: [dist] remove regex.c from distribution files.
+2004/11/30: [memo] remove targets 16 and 18 from Makefile.in.
+2004/11/30: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
+2004/11/30: [inst] add "cp -p st.[ch] st.[ch].ruby_orig" to "make 19".
+2004/11/30: [tune] map_position_value() return 20 if code is 0
+ and minimum enclen > 1.
+2004/11/30: [test] success in ruby 1.9.0 (2004-11-29) [i686-linux].
+2004/11/30: [impl] minor changes for multi-thread in regexec.c and regcomp.c.
+2004/11/30: [impl] change THREAD_PASS_LIMIT_COUNT value from 10 to 8.
+2004/11/30: [impl] add THREAD_ATOMIC_XXX to FreeNodeList access in regparse.c
+2004/11/29: [impl] add USE_MULTI_THREAD_SYSTEM.
+2004/11/29: [memo] add hash.c.patch to CVS.
+2004/11/29: [dist] change mail address to 'sndgk393 AT ...'
+2004/11/29: [dist] add -s option (silent mode) to test.rb.
+2004/11/29: [tune] change THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS value
+ from 20 to 8.
+2004/11/29: [inst] add make target "19up".
+2004/11/29: [dist] change Oniguruma Home Page URL.
+2004/11/29: [impl] remove onig_is_in_code_range_array().
+2004/11/29: [dist] fix doc/RE and RE.ja (character types).
+2004/11/26: [dist] fix win32/Makefile.
+2004/11/26: [dist] fix doc/RE and RE.ja (multibyte character types).
+2004/11/26: [impl] add onig_free_shared_cclass_table().
+2004/11/26: [impl] move definition USE_UNICODE_FULL_RANGE_CTYPE to regenc.h.
+2004/11/26: [impl] add opcode OP_CCLASS_NODE.
+2004/11/26: [impl] move definition of CClassNode to regint.h.
+2004/11/26: [impl] add type PointerType in regint.h.
+2004/11/25: [impl] remove ONIGENC_CTYPE_MOD_NOT.
+2004/11/25: [impl] rename onig_node_new_cclass_by_codepoint_range to
+ node_new_cclass_by_codepoint_range.
+2004/11/25: [impl] remove get_type_cc_node method from OnigEncodingType.
+2004/11/25: [impl] move implementation of shared char-class from enc/*.c
+ to regparse.c.
+2004/11/25: [dist] add hash.c.patch for Ruby 1.9 hash.c change.
+2004/11/22: [impl] change utf8_get_type_node().
+2004/11/22: [impl] add ONIGENC_CTYPE_MOD_NOT.
+2004/11/22: [bug] (thanks MIYAMUKO Katsuyuki)
+ ruby make test fail in HP-UX B.11.23 ia64.
+ should use tok->u.code instead of tok->u.c in
+ the case of TK_CODE_POINT.
+2004/11/19: [bug] (thanks Yoshida Masato)
+ invalid multibyte code causes segmentation fault.
+ ex. /[\xFF-\xFF]/u
+2004/11/19: [bug] (thanks Yoshida Masato)
+ illegal check in char-class range in UTF-8.
+ ex. s = "[\xC2\xA0-\xC3\xBE]"
+ p(Regexp.new(s, nil, "u") =~ "\xC3\xBE")
+2004/11/18: [impl] add onig_node_new_cclass_by_codepoint_range().
+2004/11/18: [impl] remove OnigCodePointRange type. (use OnigCodePoint[].)
+2004/11/17: [bug] (thanks nobu)
+ abort in "a".gsub(/a\Z/, "")
+ fix ONIGENC_STEP_BACK() argument in onig_search().
+2004/11/16: [impl] add key2 member to st_table_entry in st.[ch].
+ change API of st for non-null terminated string key.
+2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType.
+2004/11/15: [impl] add st.h and st.c from Ruby 1.9.
+ use st-hash always.
+2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'.
+ add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE.
+2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c.
+2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c.
+2004/11/11: [bug] fix enc/utf8.c.
+ size 0 array initializer was compile error in VC++.
+2004/11/09: [inst] (thanks Hiroki YAGITA)
+ change installed file mode to 0644.
+2004/11/09: [bug] (thanks UK-taniyama)
+ wrong definitions GET_RELADDR_INC(), GET_ABSADDR_INC()
+ etc... (NOT PLATFORM_UNALIGNED_WORD_ACCESS)
+2004/11/09: [impl] type cast in regexec() for remove compile time warning.
+ (WIN32, regposix.c)
+2004/11/08: [spec] fix Unicode character types.
+ 0x00ad (soft hyphen) should be [:cntrl:] and [:space:] type.
+ [0x0009..0x000d], 0x0085 should be [:print:] type.
+ 0x00ad should not be [:punct:] type.
+2004/11/08: [inst] fix Makefile.in. (for make ctest/ptest/testcu)
+2004/11/06: [impl] (thanks Kazuo Saito)
+ too many alternatives pattern causes core dump.
+ change implementation of onig_node_free().
+2004/11/05: [spec] rename ONIGERR_END_PATTERN_AT_BACKSLASH to
+ ONIGERR_END_PATTERN_AT_ESCAPE.
+2004/11/05: [impl] (thanks matz)
+ escape compile time warnings for x86-64 Linux.
+ StackIndex type int -> long
+2004/11/05: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.4.0 was merged to Ruby 1.9.0.
+
+2004/10/30: Version 3.4.0
+
+2004/10/30: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
+2004/10/30: [new] add hexadecimal digit char type. (\h, \H)
+ syntax: ONIG_SYN_OP2_ESC_H_XDIGIT
+2004/10/30: [bug] (thanks Guy Decoux)
+ reluctant infinite repeat bug.
+ ex. /^[a-z]{2,}?$/.match("aaa") fail.
+ fix OP_REPEAT_INC_NG process in match_at().
+
+2004/10/18: Version 3.3.1
+
+2004/10/18: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
+2004/10/18: [impl] (thanks Imai Yasumasa)
+ enclose #include by #ifndef __BORLANDC__.
+2004/10/18: [bug] (thanks Imai Yasumasa)
+ memory acess violation in select_opt_exact_info().
+2004/09/25: [dist] fix doc/API and doc/API.ja.
+2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for
+ the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ is not defined.
+
+2004/09/17: Version 3.3.0
+
+2004/09/17: [dist] add COPYING to program source files.
+2004/09/17: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/09/17: [bug] (thanks Isao Sonobe)
+ memory access violations in xxx_mbc_enc_len(),
+ and xxx_mbc_to_normalize() and
+ xxx_left_adjust_char_head().
+ add string range check in match_at() and onig_search().
+2004/09/08: [dist] change mail address format.(kosako AT sofnec ...)
+
+2004/09/04: Version 3.2.9
+
+2004/09/04: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/09/04: [bug] (thanks Bob Kerstetter and Richard Koch)
+ search fail in ignore case mode.
+ fix str_lower_case_match().
+2004/09/04: [inst] (thanks Isao Sonobe)
+ clear sample directory in 'make clean'.
+2004/09/04: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
+ meanings in XXXXX_mbc_to_normalize() and
+ XXXXX_is_mbc_ambiguous().
+2004/08/28: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
+ meanings in iso_8859_XX_mbc_to_normalize() and
+ iso_8859_XX_is_mbc_ambiguous().
+
+2004/08/24: Version 3.2.8
+
+2004/08/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/08/24: [spec] add ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY.
+ /a{n}?/ == /(?:a{n})?/
+2004/08/24: [dist] fix doc/RE and doc/RE.ja.
+2004/08/24: [bug] (thanks starfish)
+ memory leak in set_optimize_exact_info().
+
+2004/08/21: Version 3.2.7
+
+2004/08/21: [test] success in ruby 1.8.2 (2004-07-28) [i686-linux].
+ (1.8.2 preview2)
+2004/08/21: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/08/21: [bug] (thanks Isao Sonobe) (thanks kage)
+ memory access violation in bm_search_notrev().
+ (forgotten to merge from 2.X)
+
+2004/07/24: Version 3.2.6
+
+2004/07/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
+2004/07/24: [test] success in ruby 1.8.2 (2004-07-16) [i686-linux].
+2004/07/24: [bug] fix warnings for regexec.c. (gcc 2.91.66)
+2004/07/24: [memo] change version control system from Subversion
+ to CVS 1.11.17.
+2004/07/20: [bug] (thanks Isao Sonobe)
+ illegal result in negative character class in ignore case
+ mode. fix pair-ambig-codes process in parse_exp().
+ ex. /[^a]/i.match("A")
+2004/07/20: [bug] (thanks Isao Sonobe)
+ undefined bytecode error happens in UTF-16BE etc..
+ compile_length_cclass_node() was not consistent with
+ compile_cclass_node().
+
+2004/07/01: Version 3.2.5
+
+2004/07/01: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/07/01: [new] add onig_get_syntax_{op,op2,behavior,options}.
+2004/07/01: [bug] (thanks Isao Sonobe)
+ invalid result in onig_capture_tree_traverse().
+ fix make_capture_history_tree().
+
+2004/06/29: Version 3.2.4
+
+2004/06/29: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/29: [new] (thanks Isao Sonobe)
+ add onig_number_of_captures().
+
+2004/06/25: Version 3.2.3
+
+2004/06/25: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/25: [bug] (thanks Isao Sonobe)
+ invalid result in onig_capture_tree_traverse().
+ fix make_capture_history_tree().
+
+2004/06/24: Version 3.2.2
+
+2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/06/24: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
+2004/06/24: [new] (thanks Isao Sonobe)
+ add onig_number_of_capture_histories().
+2004/06/24: [bug] (thanks Isao Sonobe)
+ invalid char position match in UTF-16 and UTF-32.
+ add onigenc_always_false_is_allowed_reverse_match().
+
+2004/06/17: Version 3.2.1
+
+2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/06/17: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/06/17: [impl] should not use OP_REPEAT for (...)? even if target size
+ is long.
+2004/06/17: [bug] (thanks nobu) [ruby-dev:23703]
+ should use STACK_AT() instead of stkp in OP_REPEAT_INC.
+ add IN_VAR_REPEAT flag in setup_tree().
+2004/06/16: [impl] change select_opt_exact_info() to use ByteValTable[].
+2004/06/16: [impl] change map_position_value() table values.
+2004/06/14: [impl] (thanks John Carter)
+ RelAddrType, AbsAddrType and LengthType change
+ from short int to int type for the very long string match.
+2004/06/14: [bug] (thanks Greg A. Woods)
+ fix nmatch argument of regexec() is smaller than
+ reg->num_mem + 1 case. (POSIX API)
+2004/06/14: [spec] (thanks Greg A. Woods)
+ set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API)
+
+2004/06/10: Version 3.2.0
+
+2004/06/10: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/06/10: [test] success in ruby 1.9.0 (2004-05-27) [i386-mswin32].
+2004/06/10: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/06/10: [dist] add README.ja.
+2004/06/10: [new] add onig_copy_encoding().
+2004/06/10: [API] add encoding argument to onig_set_meta_char().
+ add meta_char_table member to OnigEncodingType.
+2004/06/08: [dist] add doc/API.ja.
+2004/06/07: [API] add num_of_elements member to OnigCompileInfo.
+2004/05/29: [memo] (thanks Kazuo Saito)
+ Oniguruma 3.1.0 was merged to Ruby 1.9.0.
+2004/05/26: [impl] rename NST_SIMPLE_REPEAT to NST_STOP_BT_SIMPLE_REPEAT.
+2004/05/26: [impl] doesn't need to check that target's simple repeat-ness
+ for EFFECT_MEMORY type node in setup_tree().
+
+2004/05/25: Version 3.1.0
+
+2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/05/25: [test] success in ruby 1.9.0 (2004-05-23) [i686-linux].
+2004/05/25: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
+2004/05/25: [bug] (thanks Masahiro Sakai) [ruby-dev:23560]
+ ruby -ruri -ve 'URI::ABS_URI =~
+ "http://example.org/Andr\xC3\xA9"'
+ nested STK_REPEAT type stack can't backtrack repeat_stk[].
+ add OP_REPEAT_INC_SG and OP_REPEAT_INC_NG_SG.
+2004/05/25: [new] support UTF-32LE. (ONIG_ENCODING_UTF32_LE)
+2004/05/25: [new] support UTF-32BE. (ONIG_ENCODING_UTF32_BE)
+2004/05/24: [impl] divide enc/utf16.c to utf16_be.c and utf16_le.c.
+2004/05/24: [impl] add enc/unicode.c.
+2004/05/24: [API] change calling sequences of onig_new_deluxe() and
+ onig_recompile_deluxe().
+ define OnigCompileInfo type.
+2004/05/21: [impl] perform ensure process for rb_trap_exec() in match_at().
+ add onig_exec_trap() and CHECK_INTERRUPT_IN_MATCH_AT.
+2004/05/21: [impl] add regex status check to onig_match().
+2004/05/21: [new] add onig_get_capture_tree() and
+ onig_capture_tree_traverse().
+2004/05/20: [spec] (thanks Isao Sonobe)
+ capture history return capture data tree.
+ (see sample/listcap.c)
+2004/05/19: [bug] (thanks Simon Strandgaard)
+ Control-C does not work in matching process on Ruby.
+ add calling of CHECK_INTERRUPT into match_at().
+ ex. /<(?:[^">]+|"[^"]*")+>/.match('')
+2004/05/19: [bug] (thanks Simon Strandgaard)
+ define virtual codepoint values for invalid encoding
+ byte 0xfe and 0xff in UTF-8.
+ ex. /\w+/u.match("%a\xffb\xfec%") ==> "a"
+2004/05/19: [spec] (thanks Simon Strandgaard)
+ too big backref number should be treated as a sequence of
+ an octal char and number digits.
+ ex. /b\3777\c/.match("b\3777\c")
+2004/05/17: [spec] rename encoding names "UTF-16 BE" and "UTF-16 LE"
+ to "UTF-16BE" and "UTF-16LE".
+2004/05/17: [impl] move ismbchar() and mbclen() from oniguruma.h to oniggnu.h.
+2004/05/17: [impl] rename onigenc_single_byte_is_allowed_reverse_match() to
+ onigenc_always_true_is_allowed_reverse_match().
+
+2004/05/14: Version 3.0.0
+
+2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
+2004/05/14: [test] success in ruby 1.9.0 (2004-05-14) [i686-linux].
+2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+ (* need to edit parse.y:
+ register int c; ---> int c; in yylex())
+2004/05/14: [impl] add regext.c.
+2004/05/14: [spec] KOI8 is not included in library archive by default setup.
+2004/05/14: [impl] implementation changes are completed for all encoding files.
+2004/05/12: [impl] add divide_ambig_string_node().
+ ambiguous string is divided and normalized before
+ optimization and compilation process.
+2004/05/11: [dist] remove INSTALL-RUBY from distribution.
+2004/04/28: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.8 was merged to Ruby 1.9.0.
+2004/04/26: [spec] change value DEFAULT_MATCH_STACK_LIMIT_SIZE = 0 : unlimited
+2004/04/26: [new] add onig_get_match_stack_limit_size() and
+ onig_set_match_stack_limit_size().
+2004/04/26: [bug] add error check to re.c.181.patch and re.c.168.patch.
+2004/04/23: [impl] remove ctype_support_level from OnigEncodingType.
+2004/04/22: [spec] allow the range from single byte char to multibyte char in
+ character class for implementation reason.
+ ex. /[a-\xbb\xcc]/ in EUC-JP encoding.
+2004/04/21: [impl] remove max_enc_len_by_first_byte() from OnigEncodingType.
+2004/04/20: [new] add onig_copyright().
+2004/04/20: [impl] add regversion.c.
+2004/04/15: [new] add onig_get_ambig_flag().
+2004/04/14: [bug] (thanks Isao Sonobe)
+ undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST
+ is setted.
+ should finish matching process if find-condition
+ is fail at OP_END in match_at().
+2004/04/12: [impl] add ambig_flag to regex_t.
+2004/04/09: [impl] move onig_set_meta_char() to regsyntax.c.
+2004/04/09: [bug] (thanks HIROSE Masaaki) fix onig_version().
+2004/04/08: [impl] add regsyntax.c.
+2004/04/07: [new] support UTF-16 LE. (ONIG_ENCODING_UTF16_LE)
+2004/04/05: [impl] add ONIGENC_CTYPE_NEWLINE.
+2004/04/05: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.6 was merged to Ruby 1.9.0.
+2004/04/02: [memo] Version 2.2.6 was released.
+2004/03/26: [new] support UTF-16 BE. (ONIG_ENCODING_UTF16_BE)
+2004/03/25: [spec] support non 8-bit encodings.
+2004/03/16: [memo] 2.X branch for 8-bit encodings only.
+
+2004/03/16: Version 2.2.5
+
+2004/03/16: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
+2004/03/16: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
+2004/03/16: [impl] add property name to error message of
+ ONIGERR_INVALID_CHAR_PROPERTY_NAME.
+2004/03/16: [spec] allow prefix 'Is' for \p{...} in ONIG_SYNTAX_PERL.
+ add syntax op. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS.
+2004/03/15: [dist] add sample/syntax.c.
+2004/03/15: [spec] support NOT op. in char property. \p{^...}, \P{^...}.
+ add syntax op. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT.
+2004/03/15: [spec] rename ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY to
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY.
+2004/03/10: [impl] move ONIGERR_XXX from regenc.h to oniguruma.h,
+ rename ONIGERR_XXX to ONIGENCERR_XXX in regenc.h.
+2004/03/08: [impl] (thanks eban)
+ replace defined(__CYGWIN__) to defined(__GNUC__).
+2004/03/08: [bug] (thanks eban) [ruby-dev:23172]
+ need to separate initialization for bcc32.
+2004/03/06: [memo] (thanks Kazuo Saito)
+ Oniguruma 2.2.4 was merged to Ruby 1.9.0.
+2004/03/05: [API] change second argument type of onig_set_meta_char()
+ from unsigned int to OnigCodePoint.
+2004/03/05: [dist] (thanks Kazuo Saito)
+ add MANIFEST-RUBY.
+
2004/03/04: Version 2.2.4
2004/03/04: [impl] (thanks Moriyoshi Koizumi)
- fix many warning in Win32 VC++ with /W3 option.
+ fix many warnings in Win32 VC++ with /W3 option.
2004/03/02: Version 2.2.3
2004/03/02: [bug] (thanks Isao Sonobe)
return invalid capture region value if capture history
is used. (OP_MEMORY_END_PUSH_REC bug)
- ex. /\\g(?@
\\(\\g\\)){0}(?(?:\\g
)*|){0}/
+ ex. /\g
(?@
\(\g\)){0}(?(?:\g
)*|){0}/
.match("((())())")
-2004/03/02: [impl] add :nodoc: to onig_stat_print() for RDoc.
+2004/03/02: [impl] (thanks Kazuo Saito)
+ add :nodoc: to onig_stat_print() for RDoc.
2004/03/02: [impl] don't use ONIG_SOURCE_IS_WRAPPED.
2004/02/27: Version 2.2.2
@@ -91,14 +552,14 @@ History
(0x80 - 0xff is not ASCII)
2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2)
2004/01/23: [dist] add enc/isotable.c.
-2004/01/22; [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
+2004/01/22: [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and
ALIGNMENT_RIGHT() was wrong.
type casting should be unsigned int, not int.
2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__)
to unaligned word access condition. (AMD64 ?)
2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c.
-2004/01/21; [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
+2004/01/21: [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/01/20: [dist] change Makefile.in.
2004/01/20: [spec] add \p{...}, \P{...} in char class.
@@ -883,5 +1344,16 @@ History
[test: test]
[memo: memo]
--
+
+svn mkdir http://localhost/repos/branches -m ""
+svn mkdir http://localhost/repos/branches/oniguruma -m ""
+svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only"
+
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
+
+
+cvs history -T
+
+
+cvs rtag "VERSION_X_X_X" oniguruma
diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README
index 3880423f03b..dc4fb3b64b8 100644
--- a/ext/mbstring/oniguruma/README
+++ b/ext/mbstring/oniguruma/README
@@ -1,7 +1,8 @@
-README 2004/02/25
+README 2005/02/04
-Oniguruma ---- (C) K.Kosako
+Oniguruma ---- (C) K.Kosako
+http://www.geocities.jp/kosako3/oniguruma/
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
@@ -11,45 +12,38 @@ for every regular expression object can be specified.
Supported character encodings:
- ASCII, UTF-8,
+ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
- Shift_JIS, Big5, KOI8, KOI8-R,
+ Shift_JIS, Big5, KOI8-R, KOI8 (*),
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-
-There are two ways of using of it in this program.
-
- * Built-in regular expression engine of Ruby
- * C library (supported APIs: GNU regex, POSIX, Oniguruma native)
-
+* KOI8 is not included in library archive by default setup.
+ (need to edit Makefile if you want to use it.)
------------------------------------------------------------
Install
-(A) Install into Ruby
-
- See INSTALL-RUBY.
-
- (character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
-
-
-(B) Install C library
-
- (B-1) Unix and Cygwin platform
+ Case 1: Unix and Cygwin platform
1. ./configure
2. make
3. make install
- (* uninstall: make uninstall)
+ library file: libonig.a
- * test (ASCII/EUC-JP)
- 4. make ctest
+ test (ASCII/EUC-JP)
+
+ make ctest
+
+ uninstall
+
+ make uninstall
- (B-2) Win32 platform (VC++)
+
+ Case 2: Win32 platform (VC++)
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
@@ -77,6 +71,16 @@ Regular Expressions
See doc/RE (or doc/RE.ja for Japanese).
+Usage
+
+ Include oniguruma.h in your program. (native API)
+ See doc/API for native API.
+
+ If you want to use static link library(onig_s.lib) in Win32,
+ add option -DONIG_EXTERN=extern to C compiler.
+
+
+
Sample Programs
sample/simple.c example of the minimum (native API)
@@ -86,14 +90,12 @@ Sample Programs
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
+ sample/syntax.c Perl and Java syntax test.
Source Files
oniguruma.h Oniguruma API header file. (public)
- oniggnu.h GNU regex API header file. (public)
- onigcmpt200.h Oniguruma API backward compatibility header file. (public)
- (for 2.0.0 or more older version)
regenc.h character encodings framework header file.
regint.h internal definitions
@@ -101,17 +103,31 @@ Source Files
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
- regex.c source files wrapper for Ruby
+ regext.c extended API functions. (deluxe version API)
regexec.c search and match functions
regparse.c parsing functions.
+ regsyntax.c pattern syntax functions and built-in syntax definitions.
+ regtrav.c capture history tree data traverse functions.
+ regversion.c version info function.
+ st.h hash table functions header file
+ st.c hash table functions
+
+ oniggnu.h GNU regex API header file. (public)
reggnu.c GNU regex API functions
onigposix.h POSIX API header file. (public)
regposerr.c POSIX error message function.
- regposix.c POSIX functions.
+ regposix.c POSIX API functions.
enc/mktable.c character type table generator.
enc/ascii.c ASCII encoding.
+ enc/euc_jp.c EUC-JP encoding.
+ enc/euc_tw.c EUC-TW encoding.
+ enc/euc_kr.c EUC-KR, EUC-CN encoding.
+ enc/sjis.c Shift_JIS encoding.
+ enc/big5.c Big5 encoding.
+ enc/koi8.c KOI8 encoding.
+ enc/koi8_r.c KOI8-R encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
@@ -128,18 +144,19 @@ Source Files
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
enc/iso8859_16.c ISO-8859-16 encoding.
(Latin-10 or South-Eastern European with Euro)
- enc/utf8.c UTF-8 encoding.
- enc/euc_jp.c EUC-JP encoding.
- enc/euc_tw.c EUC-TW encoding.
- enc/euc_kr.c EUC-KR, EUC-CN encoding.
- enc/sjis.c Shift_JIS encoding.
- enc/koi8.c KOI8 encoding.
- enc/koi8_r.c KOI8-R encoding.
- enc/big5.c Big5 encoding.
+ enc/utf8.c UTF-8 encoding.
+ enc/utf16_be.c UTF-16BE encoding.
+ enc/utf16_le.c UTF-16LE encoding.
+ enc/utf32_be.c UTF-32BE encoding.
+ enc/utf32_le.c UTF-32LE encoding.
+ enc/unicode.c Unicode information data.
+
+ win32/Makefile Makefile for Win32 (VC++)
+ win32/config.h config.h for Win32
-API differences with Japanized GNU regex(version 0.12) of Ruby
+API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
+ re_compile_fastmap() is removed.
+ re_recompile_pattern() is added.
@@ -148,18 +165,17 @@ API differences with Japanized GNU regex(version 0.12) of Ruby
ToDo
- 1 support 16-bit encodings. (UTF-16)
- 2 different encoding pattern with target.
- (ex. ASCII/UTF-16, UTF-16 BE and UTF-16 LE)
- 3 add enc/name.c (onigenc_get_enc_by_name(name))
-
- ? transmission stopper. (return ONIG_STOP from match_at())
- ? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
- ? better acess to hash table (st.c).
- non null-terminated key version st_lookup().
- ? grep-like tool 'onigrep'.
- ? return parse tree of regexp pattern to application.
- ?? /a{n}?/ should be interpreted as /(?:a{n})?/.
- ?? \h hexadecimal digit char ([0-9a-fA-F]), \H not \h.
+ ? ignore case in full code point range of Unicode.
+ ? Unicode Property.
+ ? ambig-flag Katakana <-> Hiragana.
+ ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
+ ? add ONIG_SYNTAX_ASIS.
+ ?? \X (== \PM\pM*)
+ ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
+ ?? variable line separator.
+ ?? transmission stopper. (return ONIG_STOP from match_at())
and I'm thankful to Akinori MUSHA.
+
+
+Mail Address: K.Kosako
diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja
new file mode 100644
index 00000000000..44553abfefd
--- /dev/null
+++ b/ext/mbstring/oniguruma/README.ja
@@ -0,0 +1,177 @@
+README.ja 2005/02/04
+
+µ´¼Ö ---- (C) K.Kosako
+
+http://www.geocities.jp/kosako3/oniguruma/
+http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
+http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
+
+µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£
+¤³¤Î¥é¥¤¥Ö¥é¥ê¤ÎÆÃĹ¤Ï¡¢¤½¤ì¤¾¤ì¤ÎÀµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤´¤È¤Ë
+ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ò»ØÄê¤Ç¤¤ë¤³¤È¤Ç¤¢¤ë¡£
+
+¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤ëʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°:
+
+ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
+ EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+ Shift_JIS, Big5, KOI8-R, KOI8 (*),
+ ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+
+* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
+ (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
+------------------------------------------------------------
+
+¥¤¥ó¥¹¥È¡¼¥ë
+
+ ¥±¡¼¥¹£±: Unix¤ÈCygwin´Ä¶
+
+ 1. ./configure
+ 2. make
+ 3. make install
+
+ ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a
+
+ Æ°ºî¥Æ¥¹¥È (ASCII/EUC-JP)
+
+ make ctest
+
+ ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
+
+ make uninstall
+
+
+
+ ¥±¡¼¥¹£²: Win32(VC++)´Ä¶
+
+ 1. copy win32\Makefile Makefile
+ 2. copy win32\config.h config.h
+ 3. nmake
+
+ onig_s.lib: static link library
+ onig.dll: dynamic link library
+
+ * Æ°ºî¥Æ¥¹¥È (ASCII/Shift_JIS)
+ 4. copy win32\testc.c testc.c
+ 5. nmake ctest
+
+
+¥é¥¤¥»¥ó¥¹
+
+ ¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤¬Ruby¤È°ì½ï¤Ë»ÈÍѤޤ¿¤ÏÇÛÉÛ¤µ¤ì¤ë¾ì¹ç¤Ë¤Ï¡¢
+ Ruby¤Î¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
+ ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¡¢BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
+
+
+Àµµ¬É½¸½
+
+ doc/RE.ja¤ò»²¾È
+
+
+»ÈÍÑÊýË¡
+
+ »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£
+ Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+
+ Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
+ ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£
+
+
+»ÈÍÑÎã¥×¥í¥°¥é¥à
+
+ sample/simple.c ºÇ¾®Îã (native API)
+ sample/names.c ̾Á°ÉÕ¤¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã
+ sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã
+ sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
+ sample/posix.c POSIX API»ÈÍÑÎã
+ sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
+ sample/syntax.c Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È
+
+
+¥½¡¼¥¹¥Õ¥¡¥¤¥ë
+
+ oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«)
+
+ regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ
+ regint.h ÆâÉôÀë¸À
+ regparse.h regparse.c¤Èregcomp.c¤Î¤¿¤á¤ÎÆâÉôÀë¸À
+ regcomp.c ¥³¥ó¥Ñ¥¤¥ë¡¢ºÇŬ²½´Ø¿ô
+ regenc.c ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤß
+ regerror.c ¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô
+ regext.c ³ÈÄ¥API´Ø¿ô
+ regexec.c ¸¡º÷¡¢¾È¹ç´Ø¿ô
+ regparse.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥ó²òÀÏ´Ø¿ô
+ regsyntax.c Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡ´Ø¿ô¡¢Áȹþ¤ßʸˡÄêµÁ
+ regtrav.c Êá³ÍÍúÎòÌÚ½ä²ó´Ø¿ô
+ regversion.c ÈǾðÊó´Ø¿ô
+ st.h ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ôÀë¸À
+ st.c ¥Ï¥Ã¥·¥å¥Æ¡¼¥Ö¥ë´Ø¿ô
+
+ oniggnu.h GNU regex API¥Ø¥Ã¥À (¸ø³«)
+ reggnu.c GNU regex API´Ø¿ô
+
+ onigposix.h POSIX API¥Ø¥Ã¥À (¸ø³«)
+ regposerr.c POSIX API¥¨¥é¡¼¥á¥Ã¥»¡¼¥¸´Ø¿ô
+ regposix.c POSIX API´Ø¿ô
+
+ enc/mktable.c ʸ»ú¥¿¥¤¥×¥Æ¡¼¥Ö¥ëÀ¸À®¥×¥í¥°¥é¥à
+ enc/ascii.c ASCII ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_jp.c EUC-JP ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_tw.c EUC-TW ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/iso8859_1.c ISO-8859-1 (Latin-1)
+ enc/iso8859_2.c ISO-8859-2 (Latin-2)
+ enc/iso8859_3.c ISO-8859-3 (Latin-3)
+ enc/iso8859_4.c ISO-8859-4 (Latin-4)
+ enc/iso8859_5.c ISO-8859-5 (Cyrillic)
+ enc/iso8859_6.c ISO-8859-6 (Arabic)
+ enc/iso8859_7.c ISO-8859-7 (Greek)
+ enc/iso8859_8.c ISO-8859-8 (Hebrew)
+ enc/iso8859_9.c ISO-8859-9 (Latin-5 ¤Þ¤¿¤Ï Turkish)
+ enc/iso8859_10.c ISO-8859-10 (Latin-6 ¤Þ¤¿¤Ï Nordic)
+ enc/iso8859_11.c ISO-8859-11 (Thai)
+ enc/iso8859_13.c ISO-8859-13 (Latin-7 ¤Þ¤¿¤Ï Baltic Rim)
+ enc/iso8859_14.c ISO-8859-14 (Latin-8 ¤Þ¤¿¤Ï Celtic)
+ enc/iso8859_15.c ISO-8859-15 (Latin-9 ¤Þ¤¿¤Ï West European with Euro)
+ enc/iso8859_16.c ISO-8859-16
+ (Latin-10 ¤Þ¤¿¤Ï South-Eastern European with Euro)
+ enc/utf8.c UTF-8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf16_be.c UTF-16BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf16_le.c UTF-16LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf32_be.c UTF-32BE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/utf32_le.c UTF-32LE ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/unicode.c Unicode¾ðÊó
+
+ win32/Makefile Win32ÍÑ Makefile (for VC++)
+ win32/config.h Win32ÍÑ config.h
+
+
+
+Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
+
+ + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£
+ + re_recompile_pattern() ¤¬Äɲ䵤줿¡£
+ + re_alloc_pattern() ¤¬Äɲ䵤줿¡£
+
+
+»Ä·ï
+
+ ? UnicodeÁ´¥³¡¼¥É¥Ý¥¤¥ó¥ÈÎΰè¤Ç¤ÎÂçʸ»ú¾®Ê¸»ú¾È¹ç
+ ? Unicode¥×¥í¥Ñ¥Æ¥£
+ ? ambig-flag Katakana <-> Hiragana
+ ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z)
+ ? ONIG_SYNTAX_ASISÄɲÃ
+ ?? \X (== \PM\pM*)
+ ?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ
+ ?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤¤ë
+ ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹)
+
+and I'm thankful to Akinori MUSHA.
+
+
+Mail Address: K.Kosako
diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in
index 1a59a45dc07..5ca2056fb39 100644
--- a/ext/mbstring/oniguruma/config.h.in
+++ b/ext/mbstring/oniguruma/config.h.in
@@ -49,6 +49,9 @@
/* Define if you have the header file. */
#undef HAVE_STRINGS_H
+/* Define if you have the header file. */
+#undef HAVE_SYS_TYPES_H
+
/* Define if you have the header file. */
#undef HAVE_SYS_TIME_H
diff --git a/ext/mbstring/oniguruma/enc/ascii.c b/ext/mbstring/oniguruma/enc/ascii.c
index 44cc78f77c3..64be21d7fff 100644
--- a/ext/mbstring/oniguruma/enc/ascii.c
+++ b/ext/mbstring/oniguruma/enc/ascii.c
@@ -1,14 +1,36 @@
/**********************************************************************
-
ascii.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
static int
-ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
@@ -17,38 +39,29 @@ ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingASCII = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"US-ASCII", /* name */
1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_to_lower,
- onigenc_ascii_mbc_is_case_ambig,
- ascii_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ ascii_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
index 8aad7f53547..763872e963a 100644
--- a/ext/mbstring/oniguruma/enc/big5.c
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -1,14 +1,61 @@
/**********************************************************************
-
big5.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_BIG5[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+big5_mbc_enc_len(const UChar* p)
+{
+ return EncLen_BIG5[*p];
+}
+
static OnigCodePoint
-big5_mbc_to_code(UChar* p, UChar* end)
+big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
@@ -20,15 +67,23 @@ big5_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-big5_mbc_to_lower(UChar* p, UChar* lower)
+big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
{
- return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_BIG5, p, lower);
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
+ pp, end, lower);
}
static int
-big5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- return onigenc_mb2_code_is_ctype(ONIG_ENCODING_BIG5, code, ctype);
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
+}
+
+static int
+big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
}
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
@@ -50,16 +105,16 @@ static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
-#define BIG5_ISMB_FIRST(byte) (OnigEncodingBIG5.len_table[byte] > 1)
+#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
static UChar*
-big5_left_adjust_char_head(UChar* start, UChar* s)
+big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
if (BIG5_ISMB_TRAIL(*p)) {
@@ -70,53 +125,44 @@ big5_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
- len = enc_len(ONIG_ENCODING_BIG5, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_BIG5, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-big5_is_allowed_reverse_match(UChar* s, UChar* end)
+big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingBIG5 = {
+ big5_mbc_enc_len,
+ "Big5", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "Big5", /* name */
- 2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
- big5_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- big5_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ big5_mbc_to_normalize,
+ big5_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ big5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
- big5_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ big5_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c
index 848016ba5a0..5f13e33eb4c 100644
--- a/ext/mbstring/oniguruma/enc/euc_jp.c
+++ b/ext/mbstring/oniguruma/enc/euc_jp.c
@@ -1,23 +1,69 @@
/**********************************************************************
-
euc_jp.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
+static int EncLen_EUCJP[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+eucjp_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCJP[*p];
+}
+
static OnigCodePoint
-eucjp_mbc_to_code(UChar* p, UChar* end)
+eucjp_mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
- c = *p++;
- len = enc_len(ONIG_ENCODING_EUC_JP, c);
- n = c;
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ n = (OnigCodePoint )*p++;
if (len == 1) return n;
for (i = 1; i < len; i++) {
@@ -31,11 +77,13 @@ eucjp_mbc_to_code(UChar* p, UChar* end)
static int
eucjp_code_to_mbclen(OnigCodePoint code)
{
- if ((code & 0xff0000) != 0) return 3;
+ if (ONIGENC_IS_CODE_ASCII(code)) return 1;
+ else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
- else return 1;
+ else return 0;
}
+#if 0
static int
eucjp_code_to_mbc_first(OnigCodePoint code)
{
@@ -43,27 +91,16 @@ eucjp_code_to_mbc_first(OnigCodePoint code)
if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
- /*
- if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
- */
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
- /*
- if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
- */
}
else {
- /*
- if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
- */
return (int )code;
}
return first;
}
+#endif
static int
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
@@ -75,44 +112,57 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
-eucjp_mbc_to_lower(UChar* p, UChar* lower)
+eucjp_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
int len;
+ const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
return 1;
}
else {
- len = enc_len(ONIG_ENCODING_EUC_JP, *p);
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
if (lower != p) {
- /* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
+ (*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
-eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
+}
+
+static int
+eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- int first = eucjp_code_to_mbc_first(code);
- return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
- }
+ else
+ return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@@ -125,28 +175,28 @@ eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
static UChar*
-eucjp_left_adjust_char_head(UChar* start, UChar* s)
+eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
{
- /* Assumed in this encoding,
- mb-trail bytes don't mix with single bytes.
+ /* In this encoding
+ mb-trail bytes doesn't mix with single bytes.
*/
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
while (!eucjp_islead(*p) && p > start) p--;
- len = enc_len(ONIG_ENCODING_EUC_JP, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_EUC_JP, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
+eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
return TRUE;
else
@@ -154,38 +204,29 @@ eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
}
OnigEncodingType OnigEncodingEUC_JP = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
- },
+ eucjp_mbc_enc_len,
"EUC-JP", /* name */
- 3, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 3, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
eucjp_mbc_to_code,
eucjp_code_to_mbclen,
eucjp_code_to_mbc,
- eucjp_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- eucjp_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ eucjp_mbc_to_normalize,
+ eucjp_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ eucjp_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
eucjp_left_adjust_char_head,
- eucjp_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ eucjp_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c
index 1a0fc996dcd..c1e83b7e660 100644
--- a/ext/mbstring/oniguruma/enc/euc_kr.c
+++ b/ext/mbstring/oniguruma/enc/euc_kr.c
@@ -1,14 +1,61 @@
/**********************************************************************
-
euc_kr.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_EUCKR[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euckr_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCKR[*p];
+}
+
static OnigCodePoint
-euckr_mbc_to_code(UChar* p, UChar* end)
+euckr_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
}
@@ -20,117 +67,107 @@ euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-euckr_mbc_to_lower(UChar* p, UChar* lower)
+euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
{
- return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_EUC_KR, p, lower);
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
+ pp, end, lower);
}
static int
-euckr_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- return onigenc_mb2_code_is_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
+}
+
+static int
+euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
}
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
static UChar*
-euckr_left_adjust_char_head(UChar* start, UChar* s)
+euckr_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
while (!euckr_islead(*p) && p > start) p--;
- len = enc_len(ONIG_ENCODING_EUC_KR, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_EUC_KR, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-euckr_is_allowed_reverse_match(UChar* s, UChar* end)
+euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_KR = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
- },
+ euckr_mbc_enc_len,
"EUC-KR", /* name */
- 2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
- euckr_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- euckr_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
- euckr_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ euckr_is_allowed_reverse_match
};
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingType OnigEncodingEUC_CN = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
- },
+ euckr_mbc_enc_len,
"EUC-CN", /* name */
- 2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
- euckr_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- euckr_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ euckr_mbc_to_normalize,
+ euckr_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
- euckr_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ euckr_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c
index b39a9a5b7ac..4e5851a4515 100644
--- a/ext/mbstring/oniguruma/enc/euc_tw.c
+++ b/ext/mbstring/oniguruma/enc/euc_tw.c
@@ -1,14 +1,61 @@
/**********************************************************************
-
euc_tw.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_EUCTW[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
+};
+
+static int
+euctw_mbc_enc_len(const UChar* p)
+{
+ return EncLen_EUCTW[*p];
+}
+
static OnigCodePoint
-euctw_mbc_to_code(UChar* p, UChar* end)
+euctw_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
}
@@ -20,79 +67,78 @@ euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-euctw_mbc_to_lower(UChar* p, UChar* lower)
+euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
{
- return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_EUC_TW, p, lower);
+ return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
+ pp, end, lower);
}
static int
-euctw_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- return onigenc_mb4_code_is_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
+}
+
+static int
+euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
}
#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
static UChar*
-euctw_left_adjust_char_head(UChar* start, UChar* s)
+euctw_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
while (!euctw_islead(*p) && p > start) p--;
- len = enc_len(ONIG_ENCODING_EUC_TW, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_EUC_TW, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-euctw_is_allowed_reverse_match(UChar* s, UChar* end)
+euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_TW = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
- },
+ euctw_mbc_enc_len,
"EUC-TW", /* name */
- 4, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 4, /* max enc length */
+ 1, /* min enc length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
euctw_mbc_to_code,
onigenc_mb4_code_to_mbclen,
euctw_code_to_mbc,
- euctw_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- euctw_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ euctw_mbc_to_normalize,
+ euctw_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ euctw_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
- euctw_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ euctw_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c
index 662f0e2c079..53ad52ee13d 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_1.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_1.c
@@ -1,112 +1,145 @@
/**********************************************************************
-
iso8859_1.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
-#define ENC_ISO_8859_1_TO_LOWER_CASE(c) EncISO_8859_1_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_1_ToLowerCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
-};
-
static unsigned short EncISO_8859_1_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
- 0x0050, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x00d0,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_1_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_1_mbc_is_case_ambig(UChar* p)
+iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_1_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
+ }
}
- return (v != 0 ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_1_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_1_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
@@ -115,38 +148,31 @@ iso_8859_1_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_1 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-1", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_1_mbc_to_lower,
- iso_8859_1_mbc_is_case_ambig,
- iso_8859_1_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_1_mbc_to_normalize,
+ iso_8859_1_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_1_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c
index ac493037144..a9331cebf35 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_10.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_10.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_10.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
@@ -47,69 +69,114 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_10_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
- 0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_10_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_10_mbc_is_case_ambig(UChar* p)
+iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_10_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
- }
- else if (v != 0) {
- return TRUE;
+ }
}
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_10_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
return FALSE;
}
static int
-iso_8859_10_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
@@ -117,39 +184,144 @@ iso_8859_10_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb2, 0xa2 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xa4 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_10 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-10", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_10_mbc_to_lower,
- iso_8859_10_mbc_is_case_ambig,
- iso_8859_10_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_10_mbc_to_normalize,
+ iso_8859_10_is_mbc_ambiguous,
+ iso_8859_10_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_10_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c
index ebe81d325fc..bb1098807ac 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_11.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_11.c
@@ -1,52 +1,74 @@
/**********************************************************************
-
iso8859_11.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_11_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
-iso_8859_11_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
@@ -55,38 +77,29 @@ iso_8859_11_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_11 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-11", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "ISO-8859-11", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_to_lower,
- onigenc_ascii_mbc_is_case_ambig,
- iso_8859_11_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_11_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c
index 8de7251d2f0..827ca508e8b 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_13.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_13.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_13.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
@@ -47,69 +69,114 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_13_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x00d0, 0x0050, 0x0050,
- 0x0a51, 0x0050, 0x0a51, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0a51,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x00d0, 0x0871, 0x0050, 0x00d0,
- 0x0871, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0
};
static int
-iso_8859_13_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_13_mbc_is_case_ambig(UChar* p)
+iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_13_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf || *p == 0xb5)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
- }
- else if (v != 0) {
- return TRUE;
+ }
}
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_13_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xb5 are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
return FALSE;
}
static int
-iso_8859_13_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
@@ -117,39 +184,114 @@ iso_8859_13_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_13 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-13", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "ISO-8859-13", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_13_mbc_to_lower,
- iso_8859_13_mbc_is_case_ambig,
- iso_8859_13_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_13_mbc_to_normalize,
+ iso_8859_13_is_mbc_ambiguous,
+ iso_8859_13_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_13_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c
index 333deeba320..4fe5ab29d1e 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_14.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_14.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_14.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
@@ -47,69 +69,114 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_14_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0871, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x0050,
- 0x0a51, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x00d0, 0x0050, 0x0a51,
- 0x0a51, 0x0871, 0x0a51, 0x0871, 0x0a51, 0x0871, 0x0050, 0x0a51,
- 0x0871, 0x0871, 0x0871, 0x0a51, 0x0871, 0x0a51, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0,
+ 0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2,
+ 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_14_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_14_mbc_is_case_ambig(UChar* p)
+iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_14_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
- }
- else if (v != 0) {
- return TRUE;
+ }
}
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_14_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
return FALSE;
}
static int
-iso_8859_14_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
@@ -117,39 +184,144 @@ iso_8859_14_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa4, 0xa5 },
+ { 0xa5, 0xa4 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xab, 0xa6 },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb1, 0xb0 },
+ { 0xb2, 0xb3 },
+ { 0xb3, 0xb2 },
+ { 0xb4, 0xb5 },
+ { 0xb5, 0xb4 },
+ { 0xb7, 0xb9 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xb7 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xbf },
+ { 0xbc, 0xac },
+ { 0xbd, 0xbe },
+ { 0xbe, 0xbd },
+ { 0xbf, 0xbb },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_14 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-14", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "ISO-8859-14", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_14_mbc_to_lower,
- iso_8859_14_mbc_is_case_ambig,
- iso_8859_14_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_14_mbc_to_normalize,
+ iso_8859_14_is_mbc_ambiguous,
+ iso_8859_14_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_14_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c
index 49cb266058b..1a8bd7b4c5b 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_15.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_15.c
@@ -1,15 +1,33 @@
/**********************************************************************
-
iso8859_15.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
-#include "regenc.h"
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
-/* reference
- http://en.wikipedia.org/wiki/ISO_8859-15
-*/
+#include "regenc.h"
#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
@@ -51,65 +69,114 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_15_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0a51, 0x0050,
- 0x0871, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x0a51, 0x0871, 0x0050, 0x00d0,
- 0x0871, 0x0850, 0x0871, 0x00d0, 0x0a51, 0x0871, 0x0a51, 0x00d0,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0,
+ 0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_15_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_15_mbc_is_case_ambig(UChar* p)
+iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_15_CtypeTable[*p]
- & (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
+ }
}
- return (v != 0 ? TRUE : FALSE);
+
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_15_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_15_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
@@ -117,39 +184,124 @@ iso_8859_15_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_15 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-15", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_15_mbc_to_lower,
- iso_8859_15_mbc_is_case_ambig,
- iso_8859_15_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_15_mbc_to_normalize,
+ iso_8859_15_is_mbc_ambiguous,
+ iso_8859_15_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_15_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c
index e59ea0f42c4..e283db17ccf 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_16.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_16.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_16.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
@@ -47,69 +69,114 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_16_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0871, 0x0a51, 0x0050, 0x00d0, 0x0a51, 0x0050,
- 0x0871, 0x0050, 0x0a51, 0x00d0, 0x0a51, 0x00d0, 0x0871, 0x0a51,
- 0x0050, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x00d0, 0x0050, 0x00d0,
- 0x0871, 0x0871, 0x0871, 0x00d0, 0x0a51, 0x0871, 0x0a51, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0,
+ 0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2,
+ 0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_16_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_16_mbc_is_case_ambig(UChar* p)
+iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_16_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
- }
- else if (v != 0) {
- return TRUE;
+ }
}
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_16_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
return FALSE;
}
static int
-iso_8859_16_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
@@ -117,39 +184,138 @@ iso_8859_16_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xae, 0xac },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xb9, 0xb2 },
+ { 0xba, 0xaa },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_16 = {
+ onigenc_single_byte_mbc_enc_len,
+ "ISO-8859-16", /* name */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- "ISO-8859-16", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_16_mbc_to_lower,
- iso_8859_16_mbc_is_case_ambig,
- iso_8859_16_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_16_mbc_to_normalize,
+ iso_8859_16_is_mbc_ambiguous,
+ iso_8859_16_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_16_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c
index 3d6b0a6573d..e86415b9c96 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_2.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_2.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_2.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
@@ -47,66 +69,218 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_2_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0050, 0x0a51, 0x0050, 0x0a51, 0x0a51, 0x0050,
- 0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
- 0x0050, 0x0871, 0x0050, 0x0871, 0x0050, 0x0871, 0x0871, 0x0050,
- 0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
-iso_8859_2_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_2_mbc_is_case_ambig(UChar* p)
+iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_2_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
+ }
}
- return (v != 0 ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_2_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf is lower case letter, but can't convert. */
+ if (*p == 0xdf)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_2_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+static int
+iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
@@ -115,38 +289,31 @@ iso_8859_2_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_2 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-2", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_2_mbc_to_lower,
- iso_8859_2_mbc_is_case_ambig,
- iso_8859_2_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_2_mbc_to_normalize,
+ iso_8859_2_is_mbc_ambiguous,
+ iso_8859_2_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_2_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c
index 37a3089aabd..76d2bec8a87 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_3.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_3.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_3.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
@@ -47,66 +69,114 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_3_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0050, 0x0050, 0x0050, 0x0000, 0x0a51, 0x0050,
- 0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0000, 0x0a51,
- 0x0050, 0x0871, 0x0850, 0x0850, 0x0050, 0x0871, 0x0871, 0x00d0,
- 0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0850, 0x0000, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0000, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0000, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2,
+ 0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
-iso_8859_3_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_3_mbc_is_case_ambig(UChar* p)
+iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_3_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf || *p == 0xb5)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
+ }
}
- return (v != 0 ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_3_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xb5)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_3_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
@@ -114,39 +184,125 @@ iso_8859_3_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xaf, 0xbf },
+ { 0xb1, 0xa1 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbf, 0xaf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_3 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-3", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_3_mbc_to_lower,
- iso_8859_3_mbc_is_case_ambig,
- iso_8859_3_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_3_mbc_to_normalize,
+ iso_8859_3_is_mbc_ambiguous,
+ iso_8859_3_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_3_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c
index 897eec957fd..7569006725c 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_4.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_4.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_4.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
@@ -47,69 +69,114 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_4_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0871, 0x0a51, 0x0050, 0x0a51, 0x0a51, 0x0050,
- 0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0050,
- 0x0050, 0x0871, 0x0050, 0x0871, 0x0050, 0x0871, 0x0871, 0x0050,
- 0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0a51, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
+ 0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0,
+ 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
-iso_8859_4_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_4_mbc_is_case_ambig(UChar* p)
+iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_4_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf is lower case letter, but can't convert. */
- if (*p == 0xdf || *p == 0xa2)
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
- }
- else if (v != 0) {
- return TRUE;
+ }
}
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_4_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (*p == 0xdf || *p == 0xa2)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
return FALSE;
}
static int
-iso_8859_4_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
@@ -117,39 +184,134 @@ iso_8859_4_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+
+ { 0xb1, 0xa1 },
+ { 0xb3, 0xa3 },
+ { 0xb5, 0xa5 },
+ { 0xb6, 0xa6 },
+ { 0xb9, 0xa9 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xab },
+ { 0xbc, 0xac },
+ { 0xbe, 0xae },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_4 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-4", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_4_mbc_to_lower,
- iso_8859_4_mbc_is_case_ambig,
- iso_8859_4_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_4_mbc_to_normalize,
+ iso_8859_4_is_mbc_ambiguous,
+ iso_8859_4_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_4_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c
index 34e0f9db3ee..2f7677b3e7a 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_5.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_5.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_5.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
@@ -47,58 +69,80 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_5_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2
};
static int
-iso_8859_5_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_5_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_5_mbc_is_case_ambig(UChar* p)
+iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_5_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- return (v != 0 ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_5_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
@@ -106,39 +150,147 @@ iso_8859_5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xf1 },
+ { 0xa2, 0xf2 },
+ { 0xa3, 0xf3 },
+ { 0xa4, 0xf4 },
+ { 0xa5, 0xf5 },
+ { 0xa6, 0xf6 },
+ { 0xa7, 0xf7 },
+ { 0xa8, 0xf8 },
+ { 0xa9, 0xf9 },
+ { 0xaa, 0xfa },
+ { 0xab, 0xfb },
+ { 0xac, 0xfc },
+ { 0xae, 0xfe },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xd0 },
+ { 0xb1, 0xd1 },
+ { 0xb2, 0xd2 },
+ { 0xb3, 0xd3 },
+ { 0xb4, 0xd4 },
+ { 0xb5, 0xd5 },
+ { 0xb6, 0xd6 },
+ { 0xb7, 0xd7 },
+ { 0xb8, 0xd8 },
+ { 0xb9, 0xd9 },
+ { 0xba, 0xda },
+ { 0xbb, 0xdb },
+ { 0xbc, 0xdc },
+ { 0xbd, 0xdd },
+ { 0xbe, 0xdf },
+ { 0xbf, 0xdf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xb0 },
+ { 0xd1, 0xb1 },
+ { 0xd2, 0xb2 },
+ { 0xd3, 0xb3 },
+ { 0xd4, 0xb4 },
+ { 0xd5, 0xb5 },
+ { 0xd6, 0xb6 },
+ { 0xd7, 0xb7 },
+ { 0xd8, 0xb8 },
+ { 0xd9, 0xb9 },
+ { 0xda, 0xba },
+ { 0xdb, 0xbb },
+ { 0xdc, 0xbc },
+ { 0xdd, 0xbd },
+ { 0xde, 0xbe },
+ { 0xdf, 0xbf },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf1, 0xa1 },
+ { 0xf2, 0xa2 },
+ { 0xf3, 0xa3 },
+ { 0xf4, 0xa4 },
+ { 0xf5, 0xa5 },
+ { 0xf6, 0xa6 },
+ { 0xf7, 0xa7 },
+ { 0xf8, 0xa8 },
+ { 0xf9, 0xa9 },
+ { 0xfa, 0xaa },
+ { 0xfb, 0xab },
+ { 0xfc, 0xac },
+ { 0xfe, 0xae },
+ { 0xff, 0xaf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_5 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-5", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_5_mbc_to_lower,
- iso_8859_5_mbc_is_case_ambig,
- iso_8859_5_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_5_mbc_to_normalize,
+ iso_8859_5_is_mbc_ambiguous,
+ iso_8859_5_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_5_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c
index 956e982878a..0fcb9e8b836 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_6.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_6.c
@@ -1,52 +1,74 @@
/**********************************************************************
-
iso8859_6.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_6_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0000, 0x0000, 0x0000, 0x0050, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x00d0, 0x00d0, 0x0000, 0x0000,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x00d0, 0x0000, 0x0000, 0x0000, 0x00d0,
- 0x0000, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
-iso_8859_6_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
@@ -55,38 +77,29 @@ iso_8859_6_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_6 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-6", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_to_lower,
- onigenc_ascii_mbc_is_case_ambig,
- iso_8859_6_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_6_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c
index 1ea225007fe..8b2cb9ec592 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_7.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_7.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_7.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
@@ -47,65 +69,87 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_7_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x00d0, 0x00d0, 0x0050, 0x0000, 0x0000, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0000, 0x00d0, 0x0050, 0x00d0, 0x0000, 0x00d0,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0050, 0x0a51, 0x00d0,
- 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0850, 0x0a51, 0x0a51,
- 0x0871, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0000
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2,
+ 0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000
};
static int
-iso_8859_7_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_7_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_7_mbc_is_case_ambig(UChar* p)
+iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_7_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- if (*p == 0xc0 || *p == 0xe0)
- return FALSE;
- else
- return TRUE;
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_7_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ if (*p == 0xc0 || *p == 0xe0)
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
}
-
- return (v != 0 ? TRUE : FALSE);
+ return FALSE;
}
static int
-iso_8859_7_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
@@ -113,39 +157,122 @@ iso_8859_7_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xb6, 0xdc },
+ { 0xb8, 0xdd },
+ { 0xb9, 0xde },
+ { 0xba, 0xdf },
+ { 0xbc, 0xfc },
+ { 0xbe, 0xfd },
+ { 0xbf, 0xfe },
+
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xb6 },
+ { 0xdd, 0xb8 },
+ { 0xde, 0xb9 },
+ { 0xdf, 0xba },
+
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xbc },
+ { 0xfd, 0xbe },
+ { 0xfe, 0xbf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_7 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-7", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_7_mbc_to_lower,
- iso_8859_7_mbc_is_case_ambig,
- iso_8859_7_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_7_mbc_to_normalize,
+ iso_8859_7_is_mbc_ambiguous,
+ iso_8859_7_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_7_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c
index d87774a8327..3c95b9b1375 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_8.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_8.c
@@ -1,52 +1,74 @@
/**********************************************************************
-
iso8859_8.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_8_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0000, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
- 0x0050, 0x0850, 0x0050, 0x00d0, 0x0850, 0x0850, 0x0850, 0x0000,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d0,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
- 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
+ 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
-iso_8859_8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
@@ -55,38 +77,29 @@ iso_8859_8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_8 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-8", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_to_lower,
- onigenc_ascii_mbc_is_case_ambig,
- iso_8859_8_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ onigenc_ascii_mbc_to_normalize,
+ onigenc_ascii_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ iso_8859_8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c
index 9638b929943..1b061ff6ea7 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_9.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_9.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
iso8859_9.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
@@ -47,66 +69,114 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_9_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
- 0x0050, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x00d0,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
-iso_8859_9_mbc_to_lower(UChar* p, UChar* lower)
+iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ *lower = 0xdf;
+ (*pp) += 2;
+ return 1;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-iso_8859_9_mbc_is_case_ambig(UChar* p)
+iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- int v = (EncISO_8859_9_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- if ((v | ONIGENC_CTYPE_LOWER) != 0) {
- /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
- return FALSE;
- else
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 1) {
+ if ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S'))) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (*p == 0xdf) {
+ (*pp)++;
return TRUE;
+ }
}
- return (v != 0 ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncISO_8859_9_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xdf etc.. are lower case letter, but can't convert. */
+ if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
+ return FALSE;
+ else
+ return TRUE;
+ }
+
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-iso_8859_9_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
@@ -114,39 +184,114 @@ iso_8859_9_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingISO_8859_9 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"ISO-8859-9", /* name */
- 1, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_9_mbc_to_lower,
- iso_8859_9_mbc_is_case_ambig,
- iso_8859_9_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ iso_8859_9_mbc_to_normalize,
+ iso_8859_9_is_mbc_ambiguous,
+ iso_8859_9_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ iso_8859_9_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_get_all_fold_match_code_ss_0xdf,
- onigenc_get_fold_match_info_ss_0xdf
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c
index 9dba2641c96..84afa1afef3 100644
--- a/ext/mbstring/oniguruma/enc/koi8.c
+++ b/ext/mbstring/oniguruma/enc/koi8.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
koi8.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
@@ -47,58 +69,79 @@ static UChar EncKOI8_ToLowerCaseTable[256] = {
};
static unsigned short EncKOI8_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
- 0x0142, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
};
static int
-koi8_mbc_to_lower(UChar* p, UChar* lower)
+koi8_mbc_to_normalize(OnigAmbigType flag,
+ UChar** pp, UChar* end, UChar* lower)
{
- *lower = ENC_KOI8_TO_LOWER_CASE(*p);
+ UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-koi8_mbc_is_case_ambig(UChar* p)
+koi8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end)
{
- int v = (EncKOI8_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ UChar* p = *pp;
- return ((v != 0) ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
+
static int
-koi8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_CTYPE(code, ctype);
@@ -106,39 +149,116 @@ koi8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingKOI8 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"KOI8", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- koi8_mbc_to_lower,
- koi8_mbc_is_case_ambig,
- koi8_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ koi8_mbc_to_normalize,
+ koi8_is_mbc_ambiguous,
+ koi8_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c
index 20e7d73a12b..7c626df6161 100644
--- a/ext/mbstring/oniguruma/enc/koi8_r.c
+++ b/ext/mbstring/oniguruma/enc/koi8_r.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
koi8_r.c - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
@@ -47,58 +69,78 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = {
};
static unsigned short EncKOI8_R_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
- 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0142, 0x0050, 0x0050, 0x0850, 0x00d0, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0871, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0a51, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
- 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
+ 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
};
static int
-koi8_r_mbc_to_lower(UChar* p, UChar* lower)
+koi8_r_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
+ const UChar* p = *pp;
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
-koi8_r_mbc_is_case_ambig(UChar* p)
+koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- int v = (EncKOI8_R_CtypeTable[*p] &
- (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ const UChar* p = *pp;
- return ((v != 0) ? TRUE : FALSE);
+ (*pp)++;
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ int v = (EncKOI8_R_CtypeTable[*p] &
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ return (v != 0 ? TRUE : FALSE);
+ }
+ return FALSE;
}
static int
-koi8_r_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_R_CTYPE(code, ctype);
@@ -106,39 +148,116 @@ koi8_r_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
+static int
+koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ static OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return 52;
+ }
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
+}
+
OnigEncodingType OnigEncodingKOI8_R = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- },
+ onigenc_single_byte_mbc_enc_len,
"KOI8-R", /* name */
- 1, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* max enc length */
+ 1, /* min enc length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- koi8_r_mbc_to_lower,
- koi8_r_mbc_is_case_ambig,
- koi8_r_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ koi8_r_mbc_to_normalize,
+ koi8_r_is_mbc_ambiguous,
+ koi8_r_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ koi8_r_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
- onigenc_single_byte_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c
index 67e9a61313b..6b9ef4c5b5b 100644
--- a/ext/mbstring/oniguruma/enc/mktable.c
+++ b/ext/mbstring/oniguruma/enc/mktable.c
@@ -1,32 +1,55 @@
/**********************************************************************
-
mktable.c
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include
#define NOT_RUBY
#include "regenc.h"
-#define ISO_8859_1 0
-#define ISO_8859_2 1
-#define ISO_8859_3 2
-#define ISO_8859_4 3
-#define ISO_8859_5 4
-#define ISO_8859_6 5
-#define ISO_8859_7 6
-#define ISO_8859_8 7
-#define ISO_8859_9 8
-#define ISO_8859_10 9
-#define ISO_8859_11 10
-#define ISO_8859_13 11
-#define ISO_8859_14 12
-#define ISO_8859_15 13
-#define ISO_8859_16 14
-#define KOI8 15
-#define KOI8_R 16
+#define UNICODE_ISO_8859_1 0
+#define ISO_8859_1 1
+#define ISO_8859_2 2
+#define ISO_8859_3 3
+#define ISO_8859_4 4
+#define ISO_8859_5 5
+#define ISO_8859_6 6
+#define ISO_8859_7 7
+#define ISO_8859_8 8
+#define ISO_8859_9 9
+#define ISO_8859_10 10
+#define ISO_8859_11 11
+#define ISO_8859_13 12
+#define ISO_8859_14 13
+#define ISO_8859_15 14
+#define ISO_8859_16 15
+#define KOI8 16
+#define KOI8_R 17
typedef struct {
int num;
@@ -34,6 +57,7 @@ typedef struct {
} ENC_INFO;
static ENC_INFO Info[] = {
+ { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
{ ISO_8859_1, "ISO_8859_1" },
{ ISO_8859_2, "ISO_8859_2" },
{ ISO_8859_3, "ISO_8859_3" },
@@ -60,6 +84,7 @@ static int IsAlpha(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@@ -232,6 +257,7 @@ static int IsBlank(int enc, int c)
if (c == 0x09 || c == 0x20) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@@ -267,6 +293,9 @@ static int IsCntrl(int enc, int c)
if (c >= 0x00 && c <= 0x1F) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0xad) return 1;
+ /* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@@ -286,6 +315,7 @@ static int IsCntrl(int enc, int c)
if (c >= 0x7f && c <= 0x9F) return 1;
break;
+
case KOI8_R:
if (c == 0x7f) return 1;
break;
@@ -308,6 +338,7 @@ static int IsGraph(int enc, int c)
if (c >= 0x21 && c <= 0x7e) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_4:
@@ -376,6 +407,7 @@ static int IsLower(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@@ -504,6 +536,10 @@ static int IsPrint(int enc, int c)
if (c >= 0x20 && c <= 0x7e) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c >= 0x09 && c <= 0x0d) return 1;
+ if (c == 0x85) return 1;
+ /* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_4:
@@ -572,11 +608,11 @@ static int IsPrint(int enc, int c)
static int IsPunct(int enc, int c)
{
-#ifndef BY_UNICODE_PROPERTY
- if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
- c == 0x7c || c == 0x7e) return 1;
- if (c >= 0x3c && c <= 0x3e) return 1;
-#endif
+ if (enc == UNICODE_ISO_8859_1) {
+ if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
+ c == 0x7c || c == 0x7e) return 1;
+ if (c >= 0x3c && c <= 0x3e) return 1;
+ }
if (c >= 0x21 && c <= 0x23) return 1;
if (c >= 0x25 && c <= 0x2a) return 1;
@@ -592,9 +628,11 @@ static int IsPunct(int enc, int c)
case ISO_8859_1:
case ISO_8859_9:
case ISO_8859_15:
+ if (c == 0xad) return 1;
+ /* fall */
+ case UNICODE_ISO_8859_1:
if (c == 0xa1) return 1;
if (c == 0xab) return 1;
- if (c == 0xad) return 1;
if (c == 0xb7) return 1;
if (c == 0xbb) return 1;
if (c == 0xbf) return 1;
@@ -675,6 +713,9 @@ static int IsSpace(int enc, int c)
if (c == 0x20) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
+ if (c == 0x85) return 1;
+ /* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@@ -710,6 +751,7 @@ static int IsUpper(int enc, int c)
if (c >= 0x41 && c <= 0x5a) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c >= 0xc0 && c <= 0xd6) return 1;
@@ -844,6 +886,7 @@ static int IsWord(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
+ case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@@ -1019,6 +1062,12 @@ static int IsAscii(int enc, int c)
return 0;
}
+static int IsNewline(int enc, int c)
+{
+ if (c == 0x0a) return 1;
+ return 0;
+}
+
static int exec(FILE* fp, ENC_INFO* einfo)
{
#define NCOL 8
@@ -1032,19 +1081,20 @@ static int exec(FILE* fp, ENC_INFO* einfo)
for (c = 0; c < 256; c++) {
val = 0;
- if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
- if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
- if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
- if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
- if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
- if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
- if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
- if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
- if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
- if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
- if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
- if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
- if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
+ if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
+ if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
+ if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
+ if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
+ if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
+ if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
+ if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
+ if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
+ if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
+ if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
+ if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
+ if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
+ if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
+ if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
if (c % NCOL == 0) fputs(" ", fp);
fprintf(fp, "0x%04x", val);
diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c
index 8485910e696..e13407bccfd 100644
--- a/ext/mbstring/oniguruma/enc/sjis.c
+++ b/ext/mbstring/oniguruma/enc/sjis.c
@@ -1,12 +1,53 @@
/**********************************************************************
-
sjis.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+static int EncLen_SJIS[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
+};
+
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -26,17 +67,39 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
-#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
+#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
+static int
+sjis_mbc_enc_len(const UChar* p)
+{
+ return EncLen_SJIS[*p];
+}
+
+extern int
+sjis_code_to_mbclen(OnigCodePoint code)
+{
+ if (code < 256) {
+ if (EncLen_SJIS[(int )code] == 1)
+ return 1;
+ else
+ return 0;
+ }
+ else if (code <= 0xffff) {
+ return 2;
+ }
+ else
+ return 0;
+}
+
static OnigCodePoint
-sjis_mbc_to_code(UChar* p, UChar* end)
+sjis_mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
+ len = enc_len(ONIG_ENCODING_SJIS, p);
c = *p++;
- len = enc_len(ONIG_ENCODING_SJIS, c);
n = c;
if (len == 1) return n;
@@ -57,43 +120,58 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 0
- if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
+ if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
return REGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
-sjis_mbc_to_lower(UChar* p, UChar* lower)
+sjis_mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
- int len;
+ const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp)++;
return 1;
}
else {
- len = enc_len(ONIG_ENCODING_SJIS, *p);
+ int len = enc_len(ONIG_ENCODING_SJIS, p);
+
if (lower != p) {
- /* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
+ (*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
-sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
+sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
+
+}
+
+static int
+sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
- int first = onigenc_mb2_code_to_mbc_first(code);
- return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
+ return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
@@ -107,12 +185,12 @@ sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
static UChar*
-sjis_left_adjust_char_head(UChar* start, UChar* s)
+sjis_left_adjust_char_head(const UChar* start, const UChar* s)
{
- UChar *p;
+ const UChar *p;
int len;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
if (SJIS_ISMB_TRAIL(*p)) {
@@ -123,52 +201,43 @@ sjis_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
- len = enc_len(ONIG_ENCODING_SJIS, *p);
- if (p + len > s) return p;
+ len = enc_len(ONIG_ENCODING_SJIS, p);
+ if (p + len > s) return (UChar* )p;
p += len;
- return p + ((s - p) & ~1);
+ return (UChar* )(p + ((s - p) & ~1));
}
static int
-sjis_is_allowed_reverse_match(UChar* s, UChar* end)
+sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
- UChar c = *s;
+ const UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingSJIS = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
- },
+ sjis_mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
- FALSE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
- FALSE, /* is continuous sb mb codepoint */
+ 1, /* min byte length */
+ ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
sjis_mbc_to_code,
- onigenc_mb2_code_to_mbclen,
+ sjis_code_to_mbclen,
sjis_code_to_mbc,
- sjis_mbc_to_lower,
- onigenc_mbn_mbc_is_case_ambig,
- sjis_code_is_ctype,
- onigenc_nothing_get_ctype_code_range,
+ sjis_mbc_to_normalize,
+ sjis_is_mbc_ambiguous,
+ onigenc_ascii_get_all_pair_ambig_codes,
+ onigenc_nothing_get_all_comp_ambig_codes,
+ sjis_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
sjis_left_adjust_char_head,
- sjis_is_allowed_reverse_match,
- onigenc_nothing_get_all_fold_match_code,
- onigenc_nothing_get_fold_match_info
+ sjis_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c
new file mode 100644
index 00000000000..e3be9450a51
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/unicode.c
@@ -0,0 +1,3400 @@
+/**********************************************************************
+ unicode.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+
+unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
+
+static OnigCodePoint CRAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 414,
+#else
+ 9,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlnum */
+
+static OnigCodePoint CRAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 396,
+#else
+ 8,
+#endif
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRAlpha */
+
+static OnigCodePoint CRBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 3,
+#endif
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRBlank */
+
+static OnigCodePoint CRCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 19,
+#else
+ 3,
+#endif
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRCntrl */
+
+static OnigCodePoint CRDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 23,
+#else
+ 1,
+#endif
+ 0x0030, 0x0039
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRDigit */
+
+static OnigCodePoint CRGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 2,
+#endif
+ 0x0021, 0x007e,
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRGraph */
+
+static OnigCodePoint CRLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 424,
+#else
+ 6,
+#endif
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRLower */
+
+static OnigCodePoint CRPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 405,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x007e,
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPrint */
+
+static OnigCodePoint CRPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 86,
+#else
+ 14,
+#endif
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRPunct */
+
+static OnigCodePoint CRSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 11,
+#else
+ 4,
+#endif
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRSpace */
+
+static OnigCodePoint CRUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 421,
+#else
+ 3,
+#endif
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRUpper */
+
+static OnigCodePoint CRXDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 3,
+#else
+ 3,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static OnigCodePoint CRASCII[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 1,
+#else
+ 1,
+#endif
+ 0x0000, 0x007f
+};
+
+static OnigCodePoint CRWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 436,
+#else
+ 12,
+#endif
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of CRWord */
+
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ return onig_is_in_code_range((UChar* )CRAlpha, code);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ return onig_is_in_code_range((UChar* )CRBlank, code);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ return onig_is_in_code_range((UChar* )CRCntrl, code);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ return onig_is_in_code_range((UChar* )CRDigit, code);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ return onig_is_in_code_range((UChar* )CRGraph, code);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ return onig_is_in_code_range((UChar* )CRLower, code);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ return onig_is_in_code_range((UChar* )CRPrint, code);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ return onig_is_in_code_range((UChar* )CRPunct, code);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ return onig_is_in_code_range((UChar* )CRSpace, code);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ return onig_is_in_code_range((UChar* )CRUpper, code);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ return onig_is_in_code_range((UChar* )CRWord, code);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ return onig_is_in_code_range((UChar* )CRAlnum, code);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+ return TRUE;
+ }
+ return FALSE;
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}
+
+extern int
+onigenc_unicode_get_ctype_code_range(int ctype,
+ OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+{
+ static OnigCodePoint EmptyRange[] = { 0 };
+
+#define CR_SET(list) do { \
+ *mbr = list; \
+} while (0)
+
+ *sbr = EmptyRange;
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ CR_SET(CRAlpha);
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ CR_SET(CRBlank);
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ CR_SET(CRCntrl);
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ CR_SET(CRDigit);
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ CR_SET(CRGraph);
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ CR_SET(CRLower);
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ CR_SET(CRPrint);
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ CR_SET(CRPunct);
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ CR_SET(CRSpace);
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ CR_SET(CRUpper);
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ CR_SET(CRXDigit);
+ break;
+ case ONIGENC_CTYPE_WORD:
+ CR_SET(CRWord);
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ CR_SET(CRASCII);
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ CR_SET(CRAlnum);
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return 0;
+}
diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c
new file mode 100755
index 00000000000..ad33ddbeeb3
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_be.c
@@ -0,0 +1,253 @@
+/**********************************************************************
+ utf16_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16be_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*p];
+}
+
+static int
+utf16be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*(p+1) == 0x0a && *p == 0x00)
+ return 1;
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+
+ if (UTF16_IS_SURROGATE_FIRST(*p)) {
+ code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ + ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ + p[3];
+ }
+ else {
+ code = p[0] * 256 + p[1];
+ }
+ return code;
+}
+
+static int
+utf16be_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ *p++ = (plane >> 2) + 0xd8;
+ high = (code & 0xff00) >> 8;
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (high & 0x02) + 0xdc;
+ *p = (UChar )(code & 0xff);
+ return 4;
+ }
+ else {
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )(code & 0xff);
+ return 2;
+ }
+}
+
+static int
+utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*p == 0) {
+ p++;
+ if (end > p + 2 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+2) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+2) == 'S'))) &&
+ *(p+1) == 0) {
+ *lower++ = '\0';
+ *lower = 0xdf;
+ (*pp) += 4;
+ return 2;
+ }
+
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len;
+ len = EncLen_UTF16[*p];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*p];
+
+ if (*p == 0) {
+ int c, v;
+
+ p++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 2 &&
+ ((*p == 's' && *(p+2) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+2) == 'S'))) &&
+ *(p+1) == 0) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ else if (*p == 0xdf) {
+ return TRUE;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_BE = {
+ utf16be_mbc_enc_len,
+ "UTF-16BE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16be_is_mbc_newline,
+ utf16be_mbc_to_code,
+ utf16be_code_to_mbclen,
+ utf16be_code_to_mbc,
+ utf16be_mbc_to_normalize,
+ utf16be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c
new file mode 100755
index 00000000000..db892dcd14d
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf16_le.c
@@ -0,0 +1,248 @@
+/**********************************************************************
+ utf16_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
+#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+
+static int EncLen_UTF16[] = {
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static int
+utf16le_code_to_mbclen(OnigCodePoint code)
+{
+ return (code > 0xffff ? 4 : 2);
+}
+
+static int
+utf16le_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF16[*(p+1)];
+}
+
+static int
+utf16le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 1 < end) {
+ if (*p == 0x0a && *(p+1) == 0x00)
+ return 1;
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf16le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ OnigCodePoint code;
+ UChar c0 = *p;
+ UChar c1 = *(p+1);
+
+ if (UTF16_IS_SURROGATE_FIRST(c1)) {
+ code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ + ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ + p[2];
+ }
+ else {
+ code = c1 * 256 + p[0];
+ }
+ return code;
+}
+
+static int
+utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ if (code > 0xffff) {
+ unsigned int plane, high;
+
+ plane = code >> 16;
+ high = (code & 0xff00) >> 8;
+
+ *p++ = ((plane & 0x03) << 6) + (high >> 2);
+ *p++ = (plane >> 2) + 0xd8;
+ *p++ = (UChar )(code & 0xff);
+ *p = (high & 0x02) + 0xdc;
+ return 4;
+ }
+ else {
+ *p++ = (UChar )(code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ return 2;
+ }
+}
+
+static int
+utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0) {
+ if (end > p + 3 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+2) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+2) == 'S'))) &&
+ *(p+3) == 0) {
+ *lower++ = 0xdf;
+ *lower = '\0';
+ (*pp) += 4;
+ return 2;
+ }
+
+ *(lower+1) = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp) += 2;
+ return 2; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = EncLen_UTF16[*(p+1)];
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += EncLen_UTF16[*(p+1)];
+
+ if (*(p+1) == 0) {
+ int c, v;
+
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 3 &&
+ ((*p == 's' && *(p+2) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+2) == 'S'))) &&
+ *(p+3) == 0) {
+ (*pp) += 2;
+ return TRUE;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ if (s <= start) return (UChar* )s;
+
+ if ((s - start) % 2 == 1) {
+ s--;
+ }
+
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ s -= 2;
+
+ return (UChar* )s;
+}
+
+OnigEncodingType OnigEncodingUTF16_LE = {
+ utf16le_mbc_enc_len,
+ "UTF-16LE", /* name */
+ 4, /* max byte length */
+ 2, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf16le_is_mbc_newline,
+ utf16le_mbc_to_code,
+ utf16le_code_to_mbclen,
+ utf16le_code_to_mbc,
+ utf16le_mbc_to_normalize,
+ utf16le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf16le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c
new file mode 100755
index 00000000000..60feb040b82
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_be.c
@@ -0,0 +1,208 @@
+/**********************************************************************
+ utf32_be.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32be_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32be_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32be_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
+}
+
+static int
+utf32be_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar ) (code & 0xff);
+ return 4;
+}
+
+static int
+utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ p += 3;
+ if (end > p + 4 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+4) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+4) == 'S'))) &&
+ *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower = 0xdf;
+ (*pp) += 8;
+ return 4;
+ }
+
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower++ = '\0';
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+ int c, v;
+
+ p += 3;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 4 &&
+ ((*p == 's' && *(p+4) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+4) == 'S'))) &&
+ *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
+ (*pp) += 4;
+ return TRUE;
+ }
+ else if (*p == 0xdf) {
+ return TRUE;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_BE = {
+ utf32be_mbc_enc_len,
+ "UTF-32BE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32be_is_mbc_newline,
+ utf32be_mbc_to_code,
+ utf32be_code_to_mbclen,
+ utf32be_code_to_mbc,
+ utf32be_mbc_to_normalize,
+ utf32be_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32be_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c
new file mode 100755
index 00000000000..bba9689f761
--- /dev/null
+++ b/ext/mbstring/oniguruma/enc/utf32_le.c
@@ -0,0 +1,206 @@
+/**********************************************************************
+ utf32_le.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+static int
+utf32le_mbc_enc_len(const UChar* p)
+{
+ return 4;
+}
+
+static int
+utf32le_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p + 3 < end) {
+ if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+static OnigCodePoint
+utf32le_mbc_to_code(const UChar* p, const UChar* end)
+{
+ return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
+}
+
+static int
+utf32le_code_to_mbclen(OnigCodePoint code)
+{
+ return 4;
+}
+
+static int
+utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
+{
+ UChar* p = buf;
+
+ *p++ = (UChar ) (code & 0xff);
+ *p++ = (UChar )((code & 0xff00) >> 8);
+ *p++ = (UChar )((code & 0xff0000) >>16);
+ *p++ = (UChar )((code & 0xff000000) >>24);
+ return 4;
+}
+
+static int
+utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
+ UChar* lower)
+{
+ const UChar* p = *pp;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ if (end > p + 7 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+4) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+4) == 'S'))) &&
+ *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
+ *lower++ = 0xdf;
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower = '\0';
+ (*pp) += 8;
+ return 4;
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower++ = *p;
+ }
+ *lower++ = '\0';
+ *lower++ = '\0';
+ *lower = '\0';
+
+ (*pp) += 4;
+ return 4; /* return byte length of converted char to lower */
+ }
+ else {
+ int len = 4;
+ if (lower != p) {
+ int i;
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted char to lower */
+ }
+}
+
+static int
+utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+{
+ const UChar* p = *pp;
+
+ (*pp) += 4;
+
+ if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+ int c, v;
+
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ if (end > p + 7 &&
+ ((*p == 's' && *(p+4) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+4) == 'S'))) &&
+ *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
+ (*pp) += 4;
+ return TRUE;
+ }
+ else if (*p == 0xdf) {
+ return TRUE;
+ }
+ }
+
+ if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ ONIGENC_IS_MBC_ASCII(p)) ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+ !ONIGENC_IS_MBC_ASCII(p))) {
+ c = *p;
+ v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
+ (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+ if ((v | ONIGENC_CTYPE_LOWER) != 0) {
+ /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+ if (c >= 0xaa && c <= 0xba)
+ return FALSE;
+ else
+ return TRUE;
+ }
+ return (v != 0 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+static UChar*
+utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
+{
+ int rem;
+
+ if (s <= start) return (UChar* )s;
+
+ rem = (s - start) % 4;
+ return (UChar* )(s - rem);
+}
+
+OnigEncodingType OnigEncodingUTF32_LE = {
+ utf32le_mbc_enc_len,
+ "UTF-32LE", /* name */
+ 4, /* max byte length */
+ 4, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ utf32le_is_mbc_newline,
+ utf32le_mbc_to_code,
+ utf32le_code_to_mbclen,
+ utf32le_code_to_mbc,
+ utf32le_mbc_to_normalize,
+ utf32le_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ onigenc_unicode_is_code_ctype,
+ onigenc_unicode_get_ctype_code_range,
+ utf32le_left_adjust_char_head,
+ onigenc_always_false_is_allowed_reverse_match
+};
diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c
index 604cfac2ef9..592bebfe8f2 100644
--- a/ext/mbstring/oniguruma/enc/utf8.c
+++ b/ext/mbstring/oniguruma/enc/utf8.c
@@ -1,60 +1,78 @@
/**********************************************************************
-
utf8.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regenc.h"
+#define USE_INVALID_CODE_SCHEME
+
+#ifdef USE_INVALID_CODE_SCHEME
+/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
+#define INVALID_CODE_FE 0xfffffffe
+#define INVALID_CODE_FF 0xffffffff
+#define VALID_CODE_LIMIT 0x7fffffff
+#endif
+
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
- ((EncUnicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
-
-static unsigned short EncUnicode_ISO_8859_1_CtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x1050, 0x1050, 0x1050, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x1050, 0x18d0,
- 0x1050, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050, 0x1050,
- 0x1050, 0x1050, 0x1871, 0x10d0, 0x1050, 0x10d0, 0x1050, 0x1050,
- 0x1050, 0x1050, 0x1850, 0x1850, 0x1050, 0x1871, 0x1050, 0x10d0,
- 0x1050, 0x1850, 0x1871, 0x10d0, 0x1850, 0x1850, 0x1850, 0x10d0,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1050,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1050,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871
+static int EncLen_UTF8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
};
+static int
+utf8_mbc_enc_len(const UChar* p)
+{
+ return EncLen_UTF8[*p];
+}
+
static OnigCodePoint
-utf8_mbc_to_code(UChar* p, UChar* end)
+utf8_mbc_to_code(const UChar* p, const UChar* end)
{
int c, len;
OnigCodePoint n;
+ len = enc_len(ONIG_ENCODING_UTF8, p);
c = *p++;
- len = enc_len(ONIG_ENCODING_UTF8, c);
if (len > 1) {
len--;
n = c & ((1 << (6 - len)) - 1);
@@ -64,8 +82,14 @@ utf8_mbc_to_code(UChar* p, UChar* end)
}
return n;
}
- else
+ else {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (c > 0xfd) {
+ return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
+ }
+#endif
return (OnigCodePoint )c;
+ }
}
static int
@@ -81,8 +105,12 @@ utf8_code_to_mbclen(OnigCodePoint code)
else if ((code & 0xffe00000) == 0) return 4;
else if ((code & 0xfc000000) == 0) return 5;
else if ((code & 0x80000000) == 0) return 6;
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) return 1;
+ else if (code == INVALID_CODE_FF) return 1;
+#endif
else
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
}
#if 0
@@ -103,7 +131,7 @@ utf8_code_to_mbc_first(OnigCodePoint code)
else if ((code & 0x80000000) == 0)
return ((code>>30) & 0x01) | 0xfc;
else {
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
}
}
}
@@ -147,8 +175,18 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) {
+ *p = 0xfe;
+ return 1;
+ }
+ else if (code == INVALID_CODE_FF) {
+ *p = 0xff;
+ return 1;
+ }
+#endif
else {
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);
@@ -157,221 +195,3377 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
-utf8_mbc_to_lower(UChar* p, UChar* lower)
+utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
{
- int len;
+ const UChar* p = *pp;
- /* !!! U+0080 - U+00ff is treated by fold match. !!! */
if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if (end > p + 1 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S')))) {
+ *lower++ = '\303';
+ *lower = '\237';
+ (*pp) += 2;
+ return 2;
+ }
+
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1; /* return byte length of converted char to lower */
}
else {
- len = enc_len(ONIG_ENCODING_UTF8, *p);
+ int len;
+
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if (c <= (UChar )'\236' && /* upper */
+ (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c != (UChar )'\227') {
+ *lower++ = *p;
+ *lower = (UChar )(c + 32);
+ (*pp) += 2;
+ return 2;
+ }
+ }
+#if 0
+ else if (c == (UChar )'\237' &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ *lower++ = '\303';
+ *lower = '\237';
+ (*pp) += 2;
+ return 2;
+ }
+#endif
+ }
+ }
+
+ len = enc_len(ONIG_ENCODING_UTF8, p);
if (lower != p) {
- /* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
+ (*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
-utf8_mbc_is_case_ambig(UChar* p)
+utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
- /* !!! U+0080 - U+00ff ( 0x80[0xc2,0x80] - 0xff[0xc3,0xbf] )
- is treated by fold match. !!! */
+ const UChar* p = *pp;
- if (ONIGENC_IS_MBC_ASCII(p))
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ if (end > p + 1 &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
+ ((*p == 's' && *(p+1) == 's') ||
+ ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+ (*p == 'S' && *(p+1) == 'S')))) {
+ (*pp) += 2;
+ return TRUE;
+ }
- return FALSE;
-}
-
-static int
-utf8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
-{
- if (code < 256) {
- return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
}
+ else {
+ (*pp) += enc_len(ONIG_ENCODING_UTF8, p);
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- return TRUE;
+ if (*p == 195) { /* 195 == '\303' */
+ int c = *(p + 1);
+ if (c >= 128) {
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
+ if (c <= (UChar )'\236') { /* upper */
+ if (c == (UChar )'\227') return FALSE;
+ return TRUE;
+ }
+ else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
+ if (c == (UChar )'\267') return FALSE;
+ return TRUE;
+ }
+ }
+ else if (c == (UChar )'\237' &&
+ (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ return TRUE;
+ }
+ }
+ }
}
return FALSE;
}
+
+static OnigCodePoint EmptyRange[] = { 0 };
+
+static OnigCodePoint SBAlnum[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 411,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlnum */
+
+static OnigCodePoint SBAlpha[] = {
+ 2,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 394,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlpha */
+
+static OnigCodePoint SBBlank[] = {
+ 2,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020
+};
+
+static OnigCodePoint MBBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 7,
+#else
+ 1,
+#endif
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBBlank */
+
+static OnigCodePoint SBCntrl[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x007f
+};
+
+static OnigCodePoint MBCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 18,
+#else
+ 2,
+#endif
+ 0x0080, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBCntrl */
+
+static OnigCodePoint SBDigit[] = {
+ 1,
+ 0x0030, 0x0039
+};
+
+static OnigCodePoint MBDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 22,
+#else
+ 0
+#endif
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBDigit */
+
+static OnigCodePoint SBGraph[] = {
+ 1,
+ 0x0021, 0x007e
+};
+
+static OnigCodePoint MBGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 404,
+#else
+ 1,
+#endif
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBGraph */
+
+static OnigCodePoint SBLower[] = {
+ 1,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 423,
+#else
+ 5,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBLower */
+
+static OnigCodePoint SBPrint[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e
+};
+
+static OnigCodePoint MBPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 403,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPrint */
+
+static OnigCodePoint SBPunct[] = {
+ 9,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d
+}; /* end of SBPunct */
+
+static OnigCodePoint MBPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 77,
+#else
+ 5,
+#endif
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPunct */
+
+static OnigCodePoint SBSpace[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020
+};
+
+static OnigCodePoint MBSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBSpace */
+
+static OnigCodePoint SBUpper[] = {
+ 1,
+ 0x0041, 0x005a
+};
+
+static OnigCodePoint MBUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 420,
+#else
+ 2,
+#endif
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBUpper */
+
+static OnigCodePoint SBXDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static OnigCodePoint SBASCII[] = {
+ 1,
+ 0x0000, 0x007f
+};
+
+static OnigCodePoint SBWord[] = {
+ 4,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 432,
+#else
+ 8,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBWord */
+
+
static int
-utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
- OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+utf8_get_ctype_code_range(int ctype,
+ OnigCodePoint* sbr[], OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
- *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
- *nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
*sbr = sbl; \
*mbr = mbl; \
} while (0)
#define CR_SB_SET(sbl) do { \
- *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
- *nmb = 0; \
*sbr = sbl; \
+ *mbr = EmptyRange; \
} while (0)
- static OnigCodePointRange SBAlpha[] = {
- { 0x41, 0x5a },
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBAlpha[] = {
- { 0xaa, 0xaa },
- { 0xb5, 0xb5 },
- { 0xba, 0xba },
- { 0xc0, 0xd6 },
- { 0xd8, 0xf6 },
- { 0xf8, 0x220 }
- };
-
- static OnigCodePointRange SBBlank[] = {
- { 0x09, 0x09 },
- { 0x20, 0x20 }
- };
-
- static OnigCodePointRange MBBlank[] = {
- { 0xa0, 0xa0 }
- };
-
- static OnigCodePointRange SBCntrl[] = {
- { 0x00, 0x1f },
- { 0x7f, 0x7f }
- };
-
- static OnigCodePointRange MBCntrl[] = {
- { 0x80, 0x9f }
- };
-
- static OnigCodePointRange SBDigit[] = {
- { 0x30, 0x39 }
- };
-
- static OnigCodePointRange SBGraph[] = {
- { 0x21, 0x7e }
- };
-
- static OnigCodePointRange MBGraph[] = {
- { 0xa1, 0x220 }
- };
-
- static OnigCodePointRange SBLower[] = {
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBLower[] = {
- { 0xaa, 0xaa },
- { 0xb5, 0xb5 },
- { 0xba, 0xba },
- { 0xdf, 0xf6 },
- { 0xf8, 0xff }
- };
-
- static OnigCodePointRange SBPrint[] = {
- { 0x20, 0x7e }
- };
-
- static OnigCodePointRange MBPrint[] = {
- { 0xa0, 0x220 }
- };
-
- static OnigCodePointRange SBPunct[] = {
- { 0x21, 0x23 },
- { 0x25, 0x2a },
- { 0x2c, 0x2f },
- { 0x3a, 0x3b },
- { 0x3f, 0x40 },
- { 0x5b, 0x5d },
- { 0x5f, 0x5f },
- { 0x7b, 0x7b },
- { 0x7d, 0x7d }
- };
-
- static OnigCodePointRange MBPunct[] = {
- { 0xa1, 0xa1 },
- { 0xab, 0xab },
- { 0xad, 0xad },
- { 0xb7, 0xb7 },
- { 0xbb, 0xbb },
- { 0xbf, 0xbf }
- };
-
- static OnigCodePointRange SBSpace[] = {
- { 0x09, 0x0d },
- { 0x20, 0x20 }
- };
-
- static OnigCodePointRange MBSpace[] = {
- { 0xa0, 0xa0 }
- };
-
- static OnigCodePointRange SBUpper[] = {
- { 0x41, 0x5a }
- };
-
- static OnigCodePointRange MBUpper[] = {
- { 0xc0, 0xd6 },
- { 0xd8, 0xde }
- };
-
- static OnigCodePointRange SBXDigit[] = {
- { 0x30, 0x39 },
- { 0x41, 0x46 },
- { 0x61, 0x66 }
- };
-
- static OnigCodePointRange SBWord[] = {
- { 0x30, 0x39 },
- { 0x41, 0x5a },
- { 0x5f, 0x5f },
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBWord[] = {
- { 0xaa, 0xaa },
- { 0xb2, 0xb3 },
- { 0xb5, 0xb5 },
- { 0xb9, 0xba },
- { 0xbc, 0xbe },
- { 0xc0, 0xd6 },
- { 0xd8, 0xf6 },
-#if 0
- { 0xf8, 0x220 }
-#else
- { 0xf8, 0x7fffffff } /* all multibyte code as word */
-#endif
- };
-
- static OnigCodePointRange SBAscii[] = {
- { 0x00, 0x7f }
- };
-
- static OnigCodePointRange SBAlnum[] = {
- { 0x30, 0x39 },
- { 0x41, 0x5a },
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBAlnum[] = {
- { 0xaa, 0xaa },
- { 0xb5, 0xb5 },
- { 0xba, 0xba },
- { 0xc0, 0xd6 },
- { 0xd8, 0xf6 },
- { 0xf8, 0x220 }
- };
-
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
CR_SET(SBAlpha, MBAlpha);
@@ -383,7 +3577,7 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
CR_SET(SBCntrl, MBCntrl);
break;
case ONIGENC_CTYPE_DIGIT:
- CR_SB_SET(SBDigit);
+ CR_SET(SBDigit, MBDigit);
break;
case ONIGENC_CTYPE_GRAPH:
CR_SET(SBGraph, MBGraph);
@@ -410,14 +3604,14 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
CR_SET(SBWord, MBWord);
break;
case ONIGENC_CTYPE_ASCII:
- CR_SB_SET(SBAscii);
+ CR_SB_SET(SBASCII);
break;
case ONIGENC_CTYPE_ALNUM:
CR_SET(SBAlnum, MBAlnum);
break;
default:
- return ONIGERR_TYPE_BUG;
+ return ONIGENCERR_TYPE_BUG;
break;
}
@@ -425,142 +3619,120 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
}
static int
-utf8_get_all_fold_match_code(OnigCodePoint** codes)
+utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- static OnigCodePoint list[] = {
- 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
- 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
- 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
- 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ OnigCodePoint *range;
+#endif
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
- 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6,
- 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe,
- };
-
- *codes = list;
- return sizeof(list) / sizeof(OnigCodePoint);
-}
-
-static int
-utf8_get_fold_match_info(UChar* p, UChar* end, OnigEncFoldMatchInfo** info)
-{
-
- static OnigEncFoldMatchInfo xc[] = {
- { 2, { 2, 2 }, { "\303\200", "\303\240" } }, /* CodePoint 0xc0 */
- { 2, { 2, 2 }, { "\303\201", "\303\241" } },
- { 2, { 2, 2 }, { "\303\202", "\303\242" } },
- { 2, { 2, 2 }, { "\303\203", "\303\243" } },
- { 2, { 2, 2 }, { "\303\204", "\303\244" } },
- { 2, { 2, 2 }, { "\303\205", "\303\245" } },
- { 2, { 2, 2 }, { "\303\206", "\303\246" } },
- { 2, { 2, 2 }, { "\303\207", "\303\247" } },
- { 2, { 2, 2 }, { "\303\210", "\303\250" } },
- { 2, { 2, 2 }, { "\303\211", "\303\251" } },
- { 2, { 2, 2 }, { "\303\212", "\303\252" } },
- { 2, { 2, 2 }, { "\303\213", "\303\253" } },
- { 2, { 2, 2 }, { "\303\214", "\303\254" } },
- { 2, { 2, 2 }, { "\303\215", "\303\255" } },
- { 2, { 2, 2 }, { "\303\216", "\303\256" } },
- { 2, { 2, 2 }, { "\303\217", "\303\257" } },
- { 2, { 2, 2 }, { "\303\220", "\303\260" } }, /* CodePoint 0xd0 */
- { 2, { 2, 2 }, { "\303\221", "\303\261" } },
- { 2, { 2, 2 }, { "\303\222", "\303\262" } },
- { 2, { 2, 2 }, { "\303\223", "\303\263" } },
- { 2, { 2, 2 }, { "\303\224", "\303\264" } },
- { 2, { 2, 2 }, { "\303\225", "\303\265" } },
- { 2, { 2, 2 }, { "\303\226", "\303\266" } },
- { 0, { 0 }, { "" } },
- { 2, { 2, 2 }, { "\303\230", "\303\270" } },
- { 2, { 2, 2 }, { "\303\231", "\303\271" } },
- { 2, { 2, 2 }, { "\303\232", "\303\272" } },
- { 2, { 2, 2 }, { "\303\233", "\303\273" } },
- { 2, { 2, 2 }, { "\303\234", "\303\274" } },
- { 2, { 2, 2 }, { "\303\235", "\303\275" } },
- { 2, { 2, 2 }, { "\303\236", "\303\276" } },
- { 3, { 2, 2, 2 }, { "\303\237", "ss", "SS" }} /* ess-tsett(U+00DF) */
- };
-
- if (p + 1 >= end) return -1;
- if (*p < 0x80) {
- if ((*p == 'S' && *(p+1) == 'S') ||
- (*p == 's' && *(p+1) == 's')) {
- *info = &(xc[0xdf - 0xc0]);
- return 2;
- }
- }
- else if (*p == 195) { /* 195 == '\303' */
- int c = *(p+1);
- if (c >= 128) {
- if (c <= 159) { /* upper */
- if (c == 151) return -1; /* 0xd7 */
- *info = &(xc[c - 128]);
- return 2;
- }
- else { /* lower */
- if (c == 183) return -1; /* 0xf7 */
- *info = &(xc[c - 160]);
- return 2;
- }
- }
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
}
- return -1; /* is not a fold string. */
-}
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ range = MBAlpha;
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ range = MBBlank;
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ range = MBCntrl;
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ range = MBDigit;
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ range = MBGraph;
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ range = MBLower;
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ range = MBPrint;
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ range = MBPunct;
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ range = MBSpace;
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ range = MBUpper;
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ range = MBWord;
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ range = MBAlnum;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return onig_is_in_code_range((UChar* )range, code);
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (code <= VALID_CODE_LIMIT)
+#endif
+ return TRUE;
+ }
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+
+ return FALSE;
+}
static UChar*
-utf8_left_adjust_char_head(UChar* start, UChar* s)
+utf8_left_adjust_char_head(const UChar* start, const UChar* s)
{
- UChar *p;
+ const UChar *p;
- if (s <= start) return s;
+ if (s <= start) return (UChar* )s;
p = s;
while (!utf8_islead(*p) && p > start) p--;
- return p;
-}
-
-static int
-utf8_is_allowed_reverse_match(UChar* s, UChar* end)
-{
- return TRUE;
+ return (UChar* )p;
}
OnigEncodingType OnigEncodingUTF8 = {
- {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
- },
+ utf8_mbc_enc_len,
"UTF-8", /* name */
6, /* max byte length */
- TRUE, /* is_fold_match */
- ONIGENC_CTYPE_SUPPORT_LEVEL_FULL, /* ctype_support_level */
- TRUE, /* is continuous sb mb codepoint */
+ 1, /* min byte length */
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ },
+ onigenc_is_mbc_newline_0x0a,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,
- utf8_mbc_to_lower,
- utf8_mbc_is_case_ambig,
- utf8_code_is_ctype,
+ utf8_mbc_to_normalize,
+ utf8_is_mbc_ambiguous,
+ onigenc_iso_8859_1_get_all_pair_ambig_codes,
+ onigenc_ess_tsett_get_all_comp_ambig_codes,
+ utf8_is_code_ctype,
utf8_get_ctype_code_range,
utf8_left_adjust_char_head,
- utf8_is_allowed_reverse_match,
- utf8_get_all_fold_match_code,
- utf8_get_fold_match_info
+ onigenc_always_true_is_allowed_reverse_match
};
diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html
new file mode 100755
index 00000000000..293ea442a37
--- /dev/null
+++ b/ext/mbstring/oniguruma/index.html
@@ -0,0 +1,173 @@
+
+
+
+ Oniguruma
+
+
+
+Oniguruma
+
+2005/02/19 (C) K.Kosako
+
+
+
+
+
+
+
+
+Oniguruma is a regular expressions library.
+The characteristics of this library is that different character encoding
+
for every regular expression object can be specified.
+
+
+
+- Supported character encodings:
+ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
+EUC-JP, EUC-TW, EUC-KR, EUC-CN,
+Shift_JIS, Big5, KOI8-R, KOI8,
+ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
+ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+
+
+
+
+
+
+- What's new
+
+
+- Character types(\w, \s, \d and POSIX bracket) were supported in full code point range with the Version 4.0.1 of the Unicode Standard. (since Version 3.5.0)
+
+
+
+
+
+
+- There are two ways of using of it in this program.
+
+- (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
+
- (2) Built-in regular expressions engine of Ruby 1.6/1.8/1.9
+ In Ruby 1.9, Oniguruma is already incorporated by Kazuo Saito.
+
+
+
+
+- Platform:
+
+- Unix (include Mac OS X)
+
- Cygwin
+
- Win32
+
+
+
+- License:
+When this software is partly used or it is distributed with Ruby,
+this of Ruby follows the license of Ruby.
+It follows the BSD license in the case of the one except for it.
+
+
+Download:
+
+
+
+
+* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.
+* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
+
+
+
+
+Documents: (version 3.7.0)
+
+
+
+Sample Programs:
+
+
+
+Links:
+
+
+
+References:
+
+
+
+
+
+
+and I'm thankful to Akinori MUSHA.
+
+
+
+
+
+
diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h
index d78dc18b11e..4a695154669 100644
--- a/ext/mbstring/oniguruma/oniggnu.h
+++ b/ext/mbstring/oniguruma/oniggnu.h
@@ -1,15 +1,40 @@
-/**********************************************************************
-
- oniggnu.h - Oniguruma (regular expression library)
-
- Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef ONIGGNU_H
#define ONIGGNU_H
+/**********************************************************************
+ oniggnu.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include "oniguruma.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define MBCTYPE_ASCII 0
#define MBCTYPE_EUC 1
#define MBCTYPE_SJIS 2
@@ -19,14 +44,31 @@
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
-#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
-#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
-#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
-#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
-#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
-#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+
+#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
+#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
+#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
+#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
+#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
+#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
+#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
+#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
+#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
+#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
#ifdef RUBY_PLATFORM
+
+#ifndef ONIG_RUBY_M17N
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+
+#undef ismbchar
+#define ismbchar(c) (mbclen((c)) != 1)
+#define mbclen(c) \
+ ONIGENC_MBC_ENC_LEN(OnigEncDefaultCharEncoding, (UChar* )(&c))
+
+#endif /* ifndef ONIG_RUBY_M17N */
+
#define re_mbcinit ruby_re_mbcinit
#define re_compile_pattern ruby_re_compile_pattern
#define re_recompile_pattern ruby_re_recompile_pattern
@@ -74,4 +116,8 @@ void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
+#ifdef __cplusplus
+}
+#endif
+
#endif /* ONIGGNU_H */
diff --git a/ext/mbstring/oniguruma/onigposix.h b/ext/mbstring/oniguruma/onigposix.h
index 3793ae6bd99..cfeb88a2928 100644
--- a/ext/mbstring/oniguruma/onigposix.h
+++ b/ext/mbstring/oniguruma/onigposix.h
@@ -1,12 +1,38 @@
-/**********************************************************************
-
- onigposix.h - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef ONIGPOSIX_H
#define ONIGPOSIX_H
+/**********************************************************************
+ onigposix.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/* options */
#define REG_ICASE (1<<0)
@@ -38,12 +64,12 @@
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
-#define REG_POSIX_ENCODING_ASCII 0
-#define REG_POSIX_ENCODING_EUC_JP 1
-#define REG_POSIX_ENCODING_SJIS 2
-#define REG_POSIX_ENCODING_UTF8 3
-
-#include
+#define REG_POSIX_ENCODING_ASCII 0
+#define REG_POSIX_ENCODING_EUC_JP 1
+#define REG_POSIX_ENCODING_SJIS 2
+#define REG_POSIX_ENCODING_UTF8 3
+#define REG_POSIX_ENCODING_UTF16_BE 4
+#define REG_POSIX_ENCODING_UTF16_LE 5
typedef int regoff_t;
@@ -70,7 +96,7 @@ typedef struct {
#endif
#ifndef ONIG_EXTERN
-#if defined(_WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
@@ -103,7 +129,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
-/* predefined syntaxes (see regparse.c) */
+/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@@ -119,6 +145,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
+ONIG_EXTERN const char* onig_version P_((void));
+ONIG_EXTERN const char* onig_copyright P_((void));
+
#endif /* ONIGURUMA_H */
@@ -129,8 +158,12 @@ ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t
/* extended API */
ONIG_EXTERN void reg_set_encoding P_((int enc));
-ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
-ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), void* arg));
+ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
+ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
+#ifdef __cplusplus
+}
+#endif
+
#endif /* ONIGPOSIX_H */
diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h
index 99f6bdab0d2..2106774f717 100644
--- a/ext/mbstring/oniguruma/oniguruma.h
+++ b/ext/mbstring/oniguruma/oniguruma.h
@@ -1,19 +1,53 @@
-/**********************************************************************
-
- oniguruma.h - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
+/**********************************************************************
+ oniguruma.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include "php_onig_compat.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 2
-#define ONIGURUMA_VERSION_MINOR 2
-#define ONIGURUMA_VERSION_TEENY 4
+#define ONIGURUMA_VERSION_MAJOR 3
+#define ONIGURUMA_VERSION_MINOR 7
+#define ONIGURUMA_VERSION_TEENY 0
+
+#ifdef __cplusplus
+# ifndef HAVE_PROTOTYPES
+# define HAVE_PROTOTYPES 1
+# endif
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -32,7 +66,7 @@
#endif
#ifndef ONIG_EXTERN
-#if defined(_WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
@@ -53,17 +87,60 @@ typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+/* ambiguous match flag */
+typedef unsigned int OnigAmbigType;
+
+ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
+
+#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
+#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
+#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
+/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
+/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
+/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
+
+#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
+#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
+
+#define ONIGENC_AMBIGUOUS_MATCH_FULL \
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
+ ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
+#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag
+
+
+#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
+#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
+
+/* code range */
+#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
+#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
+#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
+
+typedef struct {
+ int len;
+ OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
+} OnigCompAmbigCodeItem;
+
+typedef struct {
+ int n;
+ OnigCodePoint code;
+ OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
+} OnigCompAmbigCodes;
+
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
-} OnigCodePointRange;
+} OnigPairAmbigCodes;
-#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
typedef struct {
- int target_num;
- int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
- UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
-} OnigEncFoldMatchInfo;
+ OnigCodePoint esc;
+ OnigCodePoint anychar;
+ OnigCodePoint anytime;
+ OnigCodePoint zero_or_one_time;
+ OnigCodePoint one_or_more_time;
+ OnigCodePoint anychar_anytime;
+} OnigMetaCharTableType;
#if defined(RUBY_PLATFORM) && defined(M17N_H)
@@ -74,23 +151,24 @@ typedef m17n_encoding* OnigEncoding;
#else
typedef struct {
- const char len_table[256];
- const char* name;
- int max_enc_len;
- int is_fold_match;
- int ctype_support_level; /* sb-only/full */
- int is_continuous_sb_mb; /* code point is continuous from sb to mb */
- OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
+ int (*mbc_enc_len)(const UChar* p);
+ const char* name;
+ int max_enc_len;
+ int min_enc_len;
+ OnigAmbigType support_ambig_flag;
+ OnigMetaCharTableType meta_char_table;
+ int (*is_mbc_newline)(const UChar* p, const UChar* end);
+ OnigCodePoint (*mbc_to_code)(const UChar* p, const UChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
- int (*mbc_to_lower)(UChar* p, UChar* lower);
- int (*mbc_is_case_ambig)(UChar* p);
- int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
- int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
- UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
- int (*is_allowed_reverse_match)(UChar* p, UChar* e);
- int (*get_all_fold_match_code)(OnigCodePoint** codes);
- int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
+ int (*mbc_to_normalize)(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* to);
+ int (*is_mbc_ambiguous)(OnigAmbigType flag, const UChar** pp, const UChar* end);
+ int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
+ int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
+ int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
+ int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
+ UChar* (*left_adjust_char_head)(const UChar* start, const UChar* p);
+ int (*is_allowed_reverse_match)(const UChar* p, const UChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
@@ -112,6 +190,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
+ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
@@ -138,6 +220,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
+#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
+#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
+#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
+#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
@@ -153,35 +239,31 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
/* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN 7
-#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
/* character types */
-#define ONIGENC_CTYPE_ALPHA (1<< 0)
-#define ONIGENC_CTYPE_BLANK (1<< 1)
-#define ONIGENC_CTYPE_CNTRL (1<< 2)
-#define ONIGENC_CTYPE_DIGIT (1<< 3)
-#define ONIGENC_CTYPE_GRAPH (1<< 4)
-#define ONIGENC_CTYPE_LOWER (1<< 5)
-#define ONIGENC_CTYPE_PRINT (1<< 6)
-#define ONIGENC_CTYPE_PUNCT (1<< 7)
-#define ONIGENC_CTYPE_SPACE (1<< 8)
-#define ONIGENC_CTYPE_UPPER (1<< 9)
-#define ONIGENC_CTYPE_XDIGIT (1<<10)
-#define ONIGENC_CTYPE_WORD (1<<11)
-#define ONIGENC_CTYPE_ASCII (1<<12)
+#define ONIGENC_CTYPE_NEWLINE (1<< 0)
+#define ONIGENC_CTYPE_ALPHA (1<< 1)
+#define ONIGENC_CTYPE_BLANK (1<< 2)
+#define ONIGENC_CTYPE_CNTRL (1<< 3)
+#define ONIGENC_CTYPE_DIGIT (1<< 4)
+#define ONIGENC_CTYPE_GRAPH (1<< 5)
+#define ONIGENC_CTYPE_LOWER (1<< 6)
+#define ONIGENC_CTYPE_PRINT (1<< 7)
+#define ONIGENC_CTYPE_PUNCT (1<< 8)
+#define ONIGENC_CTYPE_SPACE (1<< 9)
+#define ONIGENC_CTYPE_UPPER (1<<10)
+#define ONIGENC_CTYPE_XDIGIT (1<<11)
+#define ONIGENC_CTYPE_WORD (1<<12)
+#define ONIGENC_CTYPE_ASCII (1<<13)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
-/* ctype support level */
-#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
-#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
-
-
-#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
+#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
-#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
+#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
@@ -194,31 +276,33 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#include /* for isblank(), isgraph() */
-#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
-#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
+#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
+ onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
+#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
+ onigenc_is_mbc_ambiguous(enc,flag,pp,end)
-#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
-#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
-#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
+#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
onigenc_is_allowed_reverse_match(enc, s, end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
onigenc_get_left_adjust_char_head(enc, start, s)
-#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
-#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
+#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
-#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
+#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
: ONIG_INFINITE_DISTANCE)
+#define ONIGENC_MBC_MINLEN(enc) 1
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
-#if 0
-#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
+#if 0 /* !! not supported !! */
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
+#define ONIGENC_STEP_BACK(enc,start,s,n)
#endif
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
@@ -253,42 +337,45 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
ONIG_EXTERN
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN
-int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
+int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* buf));
ONIG_EXTERN
-int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
+int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end));
ONIG_EXTERN
-int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
+int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const UChar* end));
#else /* ONIG_RUBY_M17N */
#define ONIGENC_NAME(enc) ((enc)->name)
-#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
-#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
-
-#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
-#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
-#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
+#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
+ (enc)->mbc_to_normalize(flag,(const UChar** )pp,end,buf)
+#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
+ (enc)->is_mbc_ambiguous(flag,(const UChar** )pp,end)
+#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
-#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
- (enc)->get_all_fold_match_code(codes)
-#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
- (enc)->get_fold_match_info(p,end,info)
+#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
+ (enc)->get_all_pair_ambig_codes(ambig_flag,acs)
+#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
+ (enc)->get_all_comp_ambig_codes(ambig_flag,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
-#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
+#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
-#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
+#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
+#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
+#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
@@ -316,11 +403,11 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
- (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
+ (enc)->get_ctype_code_range(ctype,sbr,mbr)
ONIG_EXTERN
-UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
+UChar* onigenc_step_back P_((OnigEncoding enc, const UChar* start, const UChar* s, int n));
#endif /* is not ONIG_RUBY_M17N */
@@ -333,15 +420,21 @@ int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_(());
ONIG_EXTERN
-void onigenc_set_default_caseconv_table P_((UChar* table));
+void onigenc_set_default_caseconv_table P_((const UChar* table));
ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
+UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const UChar* start, const UChar* s, const UChar** prev));
ONIG_EXTERN
-UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
ONIG_EXTERN
-UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
ONIG_EXTERN
-UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
+UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
+ONIG_EXTERN
+int onigenc_strlen P_((OnigEncoding enc, const UChar* p, const UChar* end));
+ONIG_EXTERN
+int onigenc_strlen_null P_((OnigEncoding enc, const UChar* p));
+ONIG_EXTERN
+int onigenc_str_bytelen_null P_((OnigEncoding enc, const UChar* p));
@@ -355,13 +448,6 @@ UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UC
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
-#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
-ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
-#undef ismbchar
-#define ismbchar(c) (mbclen((c)) != 1)
-#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
-#endif
-
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
@@ -403,7 +489,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
-/* predefined syntaxes (see regparse.c) */
+/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@@ -466,7 +552,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
+#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
@@ -479,6 +568,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?)(?) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
@@ -505,7 +595,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
+
/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
@@ -520,7 +613,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
-#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
+#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
@@ -560,8 +653,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
+
/* errors related to thread */
-#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
@@ -569,6 +666,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
+typedef struct OnigCaptureTreeNodeStruct {
+ int group; /* group number */
+ int beg;
+ int end;
+ int allocated;
+ int num_childs;
+ struct OnigCaptureTreeNodeStruct** childs;
+} OnigCaptureTreeNode;
+
/* match result region type */
struct re_registers {
int allocated;
@@ -576,9 +682,16 @@ struct re_registers {
int* beg;
int* end;
/* extended */
- struct re_registers** list; /* capture history. list[1]-list[31] */
+ OnigCaptureTreeNode* history_root; /* capture history tree root */
};
+/* capture tree traverse */
+#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
+#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
+#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
+ ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
+
+
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
@@ -593,8 +706,8 @@ typedef struct {
int upper;
} OnigRepeatRange;
-typedef void (*OnigWarnFunc) P_((char* s));
-extern void onig_null_warn P_((char* s));
+typedef void (*OnigWarnFunc) P_((const char* s));
+extern void onig_null_warn P_((const char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
@@ -629,6 +742,7 @@ typedef struct re_pattern_buffer {
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
+ OnigAmbigType ambig_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
@@ -640,7 +754,7 @@ typedef struct re_pattern_buffer {
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
- unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigDistance dmin; /* min-distance of exact or map */
@@ -651,6 +765,15 @@ typedef struct re_pattern_buffer {
} regex_t;
+typedef struct {
+ int num_of_elements;
+ OnigEncoding pattern_enc;
+ OnigEncoding target_enc;
+ OnigSyntaxType* syntax;
+ OnigOptionType option;
+ OnigAmbigType ambig_flag;
+} OnigCompileInfo;
+
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
@@ -661,18 +784,24 @@ void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
-int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_new P_((regex_t**, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
+int onig_new_deluxe P_((regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((regex_t*));
ONIG_EXTERN
-int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_recompile P_((regex_t*, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
+int onig_recompile_deluxe P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
-int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
+int onig_search P_((regex_t*, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option));
+ONIG_EXTERN
+int onig_match P_((regex_t*, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
+void onig_region_init P_((OnigRegion* region));
+ONIG_EXTERN
void onig_region_free P_((OnigRegion* region, int free_self));
ONIG_EXTERN
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
@@ -681,25 +810,44 @@ void onig_region_clear P_((OnigRegion* region));
ONIG_EXTERN
int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
-int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
- int** nums));
+int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
-int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
+int onig_name_to_group_numbers P_((regex_t* reg, const UChar* name, const UChar* name_end, int** nums));
ONIG_EXTERN
-int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
+int onig_name_to_backref_number P_((regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region));
+ONIG_EXTERN
+int onig_foreach_name P_((regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((regex_t* reg));
ONIG_EXTERN
+int onig_number_of_captures P_((regex_t* reg));
+ONIG_EXTERN
+int onig_number_of_capture_histories P_((regex_t* reg));
+ONIG_EXTERN
+OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
+ONIG_EXTERN
+int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
+ONIG_EXTERN
OnigEncoding onig_get_encoding P_((regex_t* reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((regex_t* reg));
ONIG_EXTERN
+OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
+ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN
+unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
+OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
+ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
@@ -708,10 +856,26 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
-int onig_set_meta_char P_((unsigned int what, unsigned int c));
+int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
+ONIG_EXTERN
+void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
+ONIG_EXTERN
+OnigAmbigType onig_get_default_ambig_flag P_(());
+ONIG_EXTERN
+int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
+ONIG_EXTERN
+unsigned int onig_get_match_stack_limit_size P_((void));
+ONIG_EXTERN
+int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
+ONIG_EXTERN
+const char* onig_copyright P_((void));
+
+#ifdef __cplusplus
+}
+#endif
#endif /* ONIGURUMA_H */
diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c
index 24d44dd1b81..5171b15a36f 100644
--- a/ext/mbstring/oniguruma/regcomp.c
+++ b/ext/mbstring/oniguruma/regcomp.c
@@ -1,16 +1,75 @@
/**********************************************************************
-
regcomp.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regparse.h"
+OnigAmbigType OnigDefaultAmbigFlag =
+ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
+
+extern OnigAmbigType
+onig_get_default_ambig_flag()
+{
+ return OnigDefaultAmbigFlag;
+}
+
+extern int
+onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+{
+ OnigDefaultAmbigFlag = ambig_flag;
+ return 0;
+}
+
+
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
+static UChar*
+k_strdup(UChar* s, UChar* end)
+{
+ int len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+
+/*
+ Caution: node should not be a string node.
+ (s and end member address break)
+*/
static void
swap_node(Node* a, Node* b)
{
@@ -120,33 +179,6 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
#endif /* USE_SUBEXP_CALL */
-#if 0
-static int
-bitset_mbmaxlen(BitSetRef bs, int negative, OnigEncoding enc)
-{
- int i;
- int len, maxlen = 0;
-
- if (negative) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! BITSET_AT(bs, i)) {
- len = enc_len(enc, i);
- if (len > maxlen) maxlen = len;
- }
- }
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) {
- len = enc_len(enc, i);
- if (len > maxlen) maxlen = len;
- }
- }
- }
- return maxlen;
-}
-#endif
-
static int
add_opcode(regex_t* reg, int opcode)
{
@@ -159,13 +191,7 @@ add_rel_addr(regex_t* reg, int addr)
{
RelAddrType ra = (RelAddrType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_RELADDR);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_RELADDR(ra, buf);
- BBUF_ADD(reg, buf, SIZE_RELADDR);
-#endif
return 0;
}
@@ -174,13 +200,7 @@ add_abs_addr(regex_t* reg, int addr)
{
AbsAddrType ra = (AbsAddrType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &ra, SIZE_ABSADDR);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_ABSADDR(ra, buf);
- BBUF_ADD(reg, buf, SIZE_ABSADDR);
-#endif
return 0;
}
@@ -189,13 +209,7 @@ add_length(regex_t* reg, int len)
{
LengthType l = (LengthType )len;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &l, SIZE_LENGTH);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_LENGTH(l, buf);
- BBUF_ADD(reg, buf, SIZE_LENGTH);
-#endif
return 0;
}
@@ -204,43 +218,23 @@ add_mem_num(regex_t* reg, int num)
{
MemNumType n = (MemNumType )num;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &n, SIZE_MEMNUM);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_MEMNUM(n, buf);
- BBUF_ADD(reg, buf, SIZE_MEMNUM);
-#endif
return 0;
}
-#if 0
static int
-add_repeat_num(regex_t* reg, int num)
+add_pointer(regex_t* reg, void* addr)
{
- RepeatNumType n = (RepeatNumType )num;
+ PointerType ptr = (PointerType )addr;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- BBUF_ADD(reg, &n, SIZE_REPEATNUM);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_REPEATNUM(n, buf);
- BBUF_ADD(reg, buf, SIZE_REPEATNUM);
-#endif
+ BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
-#endif
static int
add_option(regex_t* reg, OnigOptionType option)
{
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_ADD(reg, &option, SIZE_OPTION);
-#else
- UChar buf[SERIALIZE_BUFSIZE];
- SERIALIZE_OPTION(option, buf);
- BBUF_ADD(reg, buf, SIZE_OPTION);
-#endif
return 0;
}
@@ -293,15 +287,15 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
{
int op;
- switch (mb_len) {
- case 1:
- if (ignore_case) {
- switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
- }
+ if (ignore_case) {
+ switch (str_len) {
+ case 1: op = OP_EXACT1_IC; break;
+ default: op = OP_EXACTN_IC; break;
}
- else {
+ }
+ else {
+ switch (mb_len) {
+ case 1:
switch (str_len) {
case 1: op = OP_EXACT1; break;
case 2: op = OP_EXACT2; break;
@@ -310,25 +304,25 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
case 5: op = OP_EXACT5; break;
default: op = OP_EXACTN; break;
}
+ break;
+
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_EXACTMB2N1; break;
+ case 2: op = OP_EXACTMB2N2; break;
+ case 3: op = OP_EXACTMB2N3; break;
+ default: op = OP_EXACTMB2N; break;
+ }
+ break;
+
+ case 3:
+ op = OP_EXACTMB3N;
+ break;
+
+ default:
+ op = OP_EXACTMBN;
+ break;
}
- break;
-
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
-
- case 3:
- op = OP_EXACTMB3N;
- break;
-
- default:
- op = OP_EXACTMBN;
- break;
}
return op;
}
@@ -373,7 +367,7 @@ compile_call(CallNode* node, regex_t* reg)
r = add_opcode(reg, OP_CALL);
if (r) return r;
r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
- node->target);
+ node->target);
if (r) return r;
r = add_abs_addr(reg, 0 /*dummy addr.*/);
return r;
@@ -394,15 +388,14 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
static int
add_compile_string_length(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+ regex_t* reg, int ignore_case)
{
int len;
int op = select_str_opcode(mb_len, str_len, ignore_case);
len = SIZE_OPCODE;
- if (op == OP_EXACTMBN)
- len += SIZE_LENGTH;
+ if (op == OP_EXACTMBN) len += SIZE_LENGTH;
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;
@@ -412,7 +405,7 @@ add_compile_string_length(UChar* s, int mb_len, int str_len,
static int
add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+ regex_t* reg, int ignore_case)
{
int op = select_str_opcode(mb_len, str_len, ignore_case);
add_opcode(reg, op);
@@ -420,8 +413,12 @@ add_compile_string(UChar* s, int mb_len, int str_len,
if (op == OP_EXACTMBN)
add_length(reg, mb_len);
- if (IS_NEED_STR_LEN_OP_EXACT(op))
- add_length(reg, str_len);
+ if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ if (op == OP_EXACTN_IC)
+ add_length(reg, mb_len * str_len);
+ else
+ add_length(reg, str_len);
+ }
add_bytes(reg, s, mb_len * str_len);
return 0;
@@ -429,49 +426,37 @@ add_compile_string(UChar* s, int mb_len, int str_len,
static int
-compile_length_string_node(StrNode* sn, regex_t* reg)
+compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig, ic;
+ int rlen, r, len, prev_len, slen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
+ StrNode* sn;
+ sn = &(NSTRING(node));
if (sn->end <= sn->s)
return 0;
- ic = IS_IGNORECASE(reg->options);
+ ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, *p);
- if (ic != 0 && prev_len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- else
- ambig = 0;
-
+ prev_len = enc_len(enc, p);
p += prev_len;
slen = 1;
rlen = 0;
for (; p < sn->end; ) {
- len = enc_len(enc, *p);
+ len = enc_len(enc, p);
if (len == prev_len) {
slen++;
- if (ic != 0 && ambig == 0 && len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
}
else {
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
rlen += r;
-
- if (ic != 0 && len == 1)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- else
- ambig = 0;
-
prev = p;
slen = 1;
prev_len = len;
}
-
p += len;
}
r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
@@ -489,49 +474,33 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
}
static int
-compile_string_node(StrNode* sn, regex_t* reg)
+compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig, ic;
+ int r, len, prev_len, slen, ambig;
OnigEncoding enc = reg->enc;
- UChar *p, *prev;
+ UChar *p, *prev, *end;
+ StrNode* sn;
+ sn = &(NSTRING(node));
if (sn->end <= sn->s)
return 0;
- ic = IS_IGNORECASE(reg->options);
+ end = sn->end;
+ ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, *p);
- if (ic != 0 && prev_len == 1) {
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0)
- ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
- else
- ambig = 0;
-
+ prev_len = enc_len(enc, p);
p += prev_len;
slen = 1;
- for (; p < sn->end; ) {
- len = enc_len(enc, *p);
+ for (; p < end; ) {
+ len = enc_len(enc, p);
if (len == prev_len) {
slen++;
- if (ic != 0 && len == 1) {
- if (ambig == 0)
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
}
else {
r = add_compile_string(prev, prev_len, slen, reg, ambig);
if (r) return r;
- if (ic != 0 && len == 1) {
- ambig = ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p);
- if (ambig != 0) ONIGENC_MBC_TO_LOWER(reg->enc, p, p);
- }
- else
- ambig = 0;
prev = p;
slen = 1;
@@ -580,12 +549,16 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
+ if (IS_CCLASS_SHARE(cc)) {
+ len = SIZE_OPCODE + SIZE_POINTER;
+ return len;
+ }
+
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
else {
- if (bitset_is_empty(cc->bs)) {
- /* SIZE_BITSET is included in mbuf->used. */
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
len = SIZE_OPCODE;
}
else {
@@ -606,22 +579,34 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
+ if (IS_CCLASS_SHARE(cc)) {
+ add_opcode(reg, OP_CCLASS_NODE);
+ r = add_pointer(reg, cc);
+ return r;
+ }
+
if (IS_NULL(cc->mbuf)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
- else add_opcode(reg, OP_CCLASS);
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_NOT);
+ else
+ add_opcode(reg, OP_CCLASS);
r = add_bitset(reg, cc->bs);
}
else {
- if (bitset_is_empty(cc->bs)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
- else add_opcode(reg, OP_CCLASS_MB);
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MB_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MB);
r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
- if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
- else add_opcode(reg, OP_CCLASS_MIX);
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
if (r) return r;
@@ -649,7 +634,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n);
+ sizeof(OnigRepeatRange) * n);
CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
@@ -665,7 +650,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
static int
compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
- regex_t* reg)
+ regex_t* reg)
{
int r;
int num_repeat = reg->num_repeat;
@@ -684,7 +669,16 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
r = compile_tree_empty_check(qn->target, reg, empty_info);
if (r) return r;
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ if (
+#ifdef USE_SUBEXP_CALL
+ reg->num_call > 0 ||
+#endif
+ IS_QUALIFIER_IN_REPEAT(qn)) {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
+ }
+ else {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ }
if (r) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
return r;
@@ -706,9 +700,9 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
if (NTYPE(qn->target) == N_ANYCHAR) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
}
}
@@ -741,7 +735,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
len = SIZE_OP_JUMP + tlen;
}
else if (!infinite && qn->greedy &&
- (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
len = tlen * qn->lower;
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
}
@@ -865,7 +860,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
r = compile_tree(qn->target, reg);
}
else if (!infinite && qn->greedy &&
- (tlen + SIZE_OP_PUSH) * qn->upper <= QUALIFIER_EXPAND_LIMIT_SIZE) {
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
int n = qn->upper - qn->lower;
r = compile_tree_n_times(qn->target, qn->lower, reg);
@@ -925,18 +921,16 @@ compile_option_node(EffectNode* node, regex_t* reg)
if (r) return r;
r = add_opcode(reg, OP_FAIL);
if (r) return r;
+ }
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
if (r) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
}
- else {
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
- }
return r;
}
@@ -983,7 +977,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
break;
case EFFECT_STOP_BACKTRACK:
- if (IS_EFFECT_SIMPLE_REPEAT(node)) {
+ if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
QualifierNode* qn = &NQUALIFIER(node->target);
tlen = compile_length_tree(qn->target, reg);
if (tlen < 0) return tlen;
@@ -1073,7 +1067,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
break;
case EFFECT_STOP_BACKTRACK:
- if (IS_EFFECT_SIMPLE_REPEAT(node)) {
+ if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
QualifierNode* qn = &NQUALIFIER(node->target);
r = compile_tree_n_times(qn->target, qn->lower, reg);
if (r) return r;
@@ -1258,7 +1252,7 @@ compile_length_tree(Node* node, regex_t* reg)
if (NSTRING_IS_RAW(node))
r = compile_length_string_raw_node(&(NSTRING(node)), reg);
else
- r = compile_length_string_node(&(NSTRING(node)), reg);
+ r = compile_length_string_node(node, reg);
break;
case N_CCLASS:
@@ -1356,7 +1350,7 @@ compile_tree(Node* node, regex_t* reg)
if (NSTRING_IS_RAW(node))
r = compile_string_raw_node(&(NSTRING(node)), reg);
else
- r = compile_string_node(&(NSTRING(node)), reg);
+ r = compile_string_node(node, reg);
break;
case N_CCLASS:
@@ -1412,8 +1406,14 @@ compile_tree(Node* node, regex_t* reg)
}
else {
int* p;
- add_opcode(reg, (IS_IGNORECASE(reg->options) ?
- OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI));
+
+ if (IS_IGNORECASE(reg->options)) {
+ add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ add_opcode(reg, OP_BACKREF_MULTI);
+ }
+
if (r) return r;
add_length(reg, br->back_num);
if (r) return r;
@@ -1455,12 +1455,9 @@ compile_tree(Node* node, regex_t* reg)
}
#ifdef USE_NAMED_GROUP
-typedef struct {
- int new_val;
-} NumMap;
static int
-noname_disable_map(Node** plink, NumMap* map, int* counter)
+noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
int r = 0;
Node* node = *plink;
@@ -1514,7 +1511,7 @@ noname_disable_map(Node** plink, NumMap* map, int* counter)
}
static int
-renumber_node_backref(Node* node, NumMap* map)
+renumber_node_backref(Node* node, GroupNumRemap* map)
{
int i, pos, n, old_num;
int *backs;
@@ -1542,7 +1539,7 @@ renumber_node_backref(Node* node, NumMap* map)
}
static int
-renumber_by_map(Node* node, NumMap* map)
+renumber_by_map(Node* node, GroupNumRemap* map)
{
int r = 0;
@@ -1607,9 +1604,9 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
{
int r, i, pos, counter;
BitStatusType loc;
- NumMap* map;
+ GroupNumRemap* map;
- map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
+ map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
for (i = 1; i <= env->num_mem; i++) {
map[i].new_val = 0;
@@ -1638,7 +1635,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
env->num_mem = env->num_named;
reg->num_mem = env->num_named;
- return 0;
+
+ return onig_renumber_name_table(reg, map);
}
#endif /* USE_NAMED_GROUP */
@@ -1649,9 +1647,6 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
int i, offset;
EffectNode* en;
AbsAddrType addr;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- UChar buf[SERIALIZE_BUFSIZE];
-#endif
for (i = 0; i < uslist->num; i++) {
en = &(NEFFECT(uslist->us[i].target));
@@ -1659,12 +1654,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
addr = en->call_addr;
offset = uslist->us[i].offset;
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
-#else
- SERIALIZE_ABSADDR(addr, buf);
- BBUF_WRITE(reg, offset, buf, SIZE_ABSADDR);
-#endif
}
return 0;
}
@@ -2044,7 +2034,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
StrNode* sn = &(NSTRING(node));
UChar *s = sn->s;
while (s < sn->end) {
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
(*len)++;
}
}
@@ -2135,7 +2125,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
{
int found;
- if (code >= SINGLE_BYTE_SIZE) {
+ if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
if (IS_NULL(cc->mbuf)) {
found = 0;
}
@@ -2147,10 +2137,10 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
}
- if (cc->not == 0)
- return found;
- else
+ if (IS_CCLASS_NOT(cc))
return !found;
+ else
+ return found;
}
/* x is not included y ==> 1 : 0 */
@@ -2213,7 +2203,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case N_CTYPE:
switch (NCTYPE(y).type) {
case CTYPE_WORD:
- if (IS_NULL(xc->mbuf) && xc->not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(xc->bs, i)) {
if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
@@ -2226,7 +2216,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
- if (xc->not == 0) {
+ if (!IS_CCLASS_NOT(xc)) {
if (BITSET_AT(xc->bs, i))
return 0;
}
@@ -2251,14 +2241,16 @@ is_not_included(Node* x, Node* y, regex_t* reg)
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
v = BITSET_AT(xc->bs, i);
- if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
+ if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
+ (v == 0 && IS_CCLASS_NOT(xc))) {
v = BITSET_AT(yc->bs, i);
- if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
+ if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
+ (v == 0 && IS_CCLASS_NOT(yc)))
return 0;
}
}
- if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
- (IS_NULL(yc->mbuf) && yc->not == 0))
+ if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
return 1;
return 0;
}
@@ -2300,7 +2292,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
CClassNode* cc = &(NCCLASS(y));
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + enc_len(reg->enc, c));
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
}
break;
@@ -2311,18 +2303,9 @@ is_not_included(Node* x, Node* y, regex_t* reg)
StrNode* ys = &(NSTRING(y));
len = NSTRING_LEN(x);
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_CASE_AMBIG(x) || NSTRING_IS_CASE_AMBIG(y)) {
- UChar plow[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar qlow[ONIGENC_MBC_TO_LOWER_MAXLEN];
- int plen, qlen;
- for (p = ys->s, q = xs->s; q < xs->end; ) {
- plen = ONIGENC_MBC_TO_LOWER(reg->enc, p, plow);
- qlen = ONIGENC_MBC_TO_LOWER(reg->enc, q, qlow);
- if (plen != qlen || onig_strncmp(plow, qlow, plen) != 0)
- return 1;
- p += enc_len(reg->enc, *p);
- q += enc_len(reg->enc, *q);
- }
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ /* tiny version */
+ return 0;
}
else {
for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
@@ -2379,8 +2362,12 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
if (exact != 0 &&
!NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- if (! ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, sn->s))
+#if 0
+ UChar* tmp = sn->s;
+ if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
+ &tmp, sn->end))
n = node;
+#endif
}
else {
n = node;
@@ -2937,7 +2924,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
- SET_EFFECT_STATUS(en, NST_SIMPLE_REPEAT);
+ SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
swap_node(node, en);
NEFFECT(node).target = en;
}
@@ -2956,9 +2943,114 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
return 0;
}
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
+static int
+divide_ambig_string_node(Node* node, regex_t* reg)
+{
+ StrNode* sn = &NSTRING(node);
+ int ambig, prev_ambig;
+ UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
+ Node *snode;
+ Node *root = NULL_NODE;
+ Node **tailp = (Node** )0;
+
+ start = prev_start = p = sn->s;
+ end = sn->end;
+ if (p >= end) return 0;
+
+ prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
+
+ while (p < end) {
+ prev = p;
+ if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
+ reg->ambig_flag, &p, end))) {
+
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < prev) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, prev);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (tailp == (Node** )0) {
+ root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
+ tailp = &(NCONS(root).right);
+ }
+ else {
+ *tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
+ tailp = &(NCONS(*tailp).right);
+ }
+
+ prev_ambig = ambig;
+ prev_start = prev;
+ }
+ }
+
+ if (prev_start == start) {
+ if (prev_ambig != 0) {
+ NSTRING_SET_AMBIG(node);
+ tmp = start;
+ wp = start;
+ while (tmp < end) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
+ sn->end = wp;
+ }
+ }
+ else {
+ if (prev_ambig != 0) {
+ tmp = prev_start;
+ wp = prev_start;
+ while (tmp < end) {
+ wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+ &tmp, end, wp);
+ }
+ snode = onig_node_new_str(prev_start, wp);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ NSTRING_SET_AMBIG(snode);
+ if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
+ }
+ else {
+ snode = onig_node_new_str(prev_start, end);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+
+ if (tailp == (Node** )0) {
+ root = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
+ tailp = &(NCONS(node).right);
+ }
+ else {
+ *tailp = onig_node_new_list(snode, NULL);
+ CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
+ tailp = &(NCONS(*tailp).right);
+ }
+
+ swap_node(node, root);
+ onig_node_str_clear(root); /* should be after swap! */
+ onig_node_free(root); /* free original string node */
+ }
+
+ return 0;
+}
+
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -2996,33 +3088,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
break;
case N_CCLASS:
- if (IS_IGNORECASE(reg->options)) {
- int i;
- UChar c, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
- BitSetRef bs = NCCLASS(node).bs;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- c = (UChar )i;
- ONIGENC_MBC_TO_LOWER(reg->enc, &c, lowbuf);
- if (*lowbuf != c) {
- if (BITSET_AT(bs, c)) BITSET_SET_BIT(bs, *lowbuf);
- if (BITSET_AT(bs, *lowbuf)) BITSET_SET_BIT(bs, c);
- }
- }
- }
break;
case N_STRING:
if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- StrNode* sn = &NSTRING(node);
- UChar* p = sn->s;
-
- while (p < sn->end) {
- if (ONIGENC_IS_MBC_CASE_AMBIG(reg->enc, p)) {
- NSTRING_SET_CASE_AMBIG(node);
- break;
- }
- p++;
- }
+ r = divide_ambig_string_node(node, reg);
}
break;
@@ -3057,6 +3127,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
QualifierNode* qn = &(NQUALIFIER(node));
Node* target = qn->target;
+ if ((state & IN_REPEAT) != 0) {
+ qn->state |= NST_IN_REPEAT;
+ }
+
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
r = get_min_match_length(target, &d, env);
if (r) break;
@@ -3083,8 +3157,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
+ state |= IN_REPEAT;
if (qn->lower != qn->upper)
- state |= IN_REPEAT;
+ state |= IN_VAR_REPEAT;
r = setup_tree(target, reg, state, env);
if (r) break;
@@ -3141,11 +3216,13 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
break;
case EFFECT_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_REPEAT)) != 0) {
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
/* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
}
- /* fall */
+ r = setup_tree(en->target, reg, state, env);
+ break;
+
case EFFECT_STOP_BACKTRACK:
{
Node* target = en->target;
@@ -3156,7 +3233,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
int qtype = NTYPE(tqn->target);
if (IS_NODE_TYPE_SIMPLE(qtype))
- SET_EFFECT_STATUS(node, NST_SIMPLE_REPEAT);
+ SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
}
}
}
@@ -3228,26 +3305,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
/* set skip map for Boyer-Moor search */
static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case,
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
UChar skip[], int** int_skip)
{
int i, len;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
len = end - s;
if (len < ONIG_CHAR_TABLE_SIZE) {
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
- if (ignore_case) {
- for (i = 0; i < len - 1; i++) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- skip[*lowbuf] = len - 1 - i;
- }
- }
- else {
- for (i = 0; i < len - 1; i++)
- skip[s[i]] = len - 1 - i;
- }
+ for (i = 0; i < len - 1; i++)
+ skip[s[i]] = len - 1 - i;
}
else {
if (IS_NULL(*int_skip)) {
@@ -3256,16 +3324,8 @@ set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, int ignore_case,
}
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
- if (ignore_case) {
- for (i = 0; i < len - 1; i++) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- (*int_skip)[*lowbuf] = len - 1 - i;
- }
- }
- else {
- for (i = 0; i < len - 1; i++)
- (*int_skip)[s[i]] = len - 1 - i;
- }
+ for (i = 0; i < len - 1; i++)
+ (*int_skip)[s[i]] = len - 1 - i;
}
return 0;
}
@@ -3278,11 +3338,12 @@ typedef struct {
} MinMaxLen;
typedef struct {
- MinMaxLen mmd;
- BitStatusType backrefed_status;
- OnigEncoding enc;
- OnigOptionType options;
- ScanEnv* scan_env;
+ MinMaxLen mmd;
+ BitStatusType backrefed_status;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigAmbigType ambig_flag;
+ ScanEnv* scan_env;
} OptEnv;
typedef struct {
@@ -3321,29 +3382,34 @@ typedef struct {
static int
-map_position_value(int i)
+map_position_value(OnigEncoding enc, int i)
{
- static int vals[] = {
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 1, 10, 10, 1, 10, 10,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 1, 6, 3, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5,
- 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 5, 5, 5,
- 5, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 10,
+ static short int ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < sizeof(vals)/sizeof(vals[0])) return vals[i];
-
- return 7; /* Take it easy. */
+ if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
+ if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
+ return 20;
+ else
+ return (int )ByteValTable[i];
+ }
+ else
+ return 4; /* Take it easy. */
}
static int
distance_value(MinMaxLen* mm)
{
/* 1000 / (min-max-dist + 1) */
- static int dist_vals[] = {
+ static short int dist_vals[] = {
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
@@ -3363,7 +3429,7 @@ distance_value(MinMaxLen* mm)
d = mm->max - mm->min;
if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
/* return dist_vals[d] * 16 / (mm->min + 12); */
- return dist_vals[d];
+ return (int )dist_vals[d];
else
return 1;
}
@@ -3419,12 +3485,14 @@ add_mml(MinMaxLen* to, MinMaxLen* from)
to->max = distance_add(to->max, from->max);
}
+#if 0
static void
add_len_mml(MinMaxLen* to, OnigDistance len)
{
to->min = distance_add(to->min, len);
to->max = distance_add(to->max, len);
}
+#endif
static void
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
@@ -3571,7 +3639,7 @@ concat_opt_exact_info_str(OptExactInfo* to,
to->s[i++] = *p++;
}
else {
- len = enc_len(enc, *p);
+ len = enc_len(enc, p);
if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len; j++)
to->s[i++] = *p++;
@@ -3598,7 +3666,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
for (i = 0; i < to->len && i < add->len; ) {
if (to->s[i] != add->s[i]) break;
- len = enc_len(env->enc, to->s[i]);
+ len = enc_len(env->enc, to->s + i);
for (j = 1; j < len; j++) {
if (to->s[i+j] != add->s[i+j]) break;
@@ -3618,27 +3686,55 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
}
static void
-select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
+select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
{
- int vlen1, vlen2;
+ int v1, v2;
- vlen1 = now->len * (now->ignore_case ? 1 : 2);
- vlen2 = alt->len * (alt->ignore_case ? 1 : 2);
+ v1 = now->len;
+ v2 = alt->len;
- if (comp_distance_value(&now->mmd, &alt->mmd, vlen1, vlen2) > 0)
+ if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = map_position_value(enc, now->s[0]);
+ v1 = map_position_value(enc, alt->s[0]);
+
+ if (now->len > 1) v1 += 5;
+ if (alt->len > 1) v2 += 5;
+ }
+
+ if (now->ignore_case == 0) v1 *= 2;
+ if (alt->ignore_case == 0) v2 *= 2;
+
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
copy_opt_exact_info(now, alt);
}
static void
clear_opt_map_info(OptMapInfo* map)
{
- int i;
+ static OptMapInfo clean_info = {
+ {0, 0}, {0, 0}, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ }
+ };
- clear_mml(&map->mmd);
- clear_opt_anc_info(&map->anc);
- map->value = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- map->map[i] = 0;
+ xmemcpy(map, &clean_info, sizeof(OptMapInfo));
}
static void
@@ -3648,34 +3744,56 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
}
static void
-add_char_opt_map_info(OptMapInfo* map, int c)
+add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
{
if (map->map[c] == 0) {
map->map[c] = 1;
- map->value += map_position_value(c);
+ map->value += map_position_value(enc, c);
}
}
-static void
-add_char_amb_opt_map_info(OptMapInfo* map, int c, OnigEncoding enc)
+static int
+add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
+ OnigEncoding enc, OnigAmbigType ambig_flag)
{
- UChar x, low[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ int i, j, n, len;
+ UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ OnigCodePoint code, ccode;
+ OnigCompAmbigCodes* ccs;
+ OnigPairAmbigCodes* pccs;
+ OnigAmbigType amb;
- add_char_opt_map_info(map, c);
+ add_char_opt_map_info(map, p[0], enc);
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
- x = (UChar )c;
- ONIGENC_MBC_TO_LOWER(enc, &x, low);
- if (*low != x) {
- add_char_opt_map_info(map, (int )(*low));
- }
- else {
- int i;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
- x = (UChar )i;
- ONIGENC_MBC_TO_LOWER(enc, &x, low);
- if ((int )(*low) == c) add_char_opt_map_info(map, i);
+ for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
+ if ((amb & ambig_flag) == 0) continue;
+
+ n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
+ for (i = 0; i < n; i++) {
+ if (pccs[i].from == code) {
+ len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
+ if (len < 0) return len;
+ add_char_opt_map_info(map, buf[0], enc);
+ }
+ }
+
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
+ for (i = 0; i < n; i++) {
+ if (ccs[i].code == code) {
+ for (j = 0; j < ccs[i].n; j++) {
+ ccode = ccs[i].items[j].code[0];
+ len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
+ if (len < 0) return len;
+ add_char_opt_map_info(map, buf[0], enc);
+ }
+ break;
+ }
+ }
}
}
+ return 0;
}
static void
@@ -3711,7 +3829,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
}
static void
-alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
+alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
{
int i, val;
@@ -3730,7 +3848,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
to->map[i] = 1;
if (to->map[i])
- val += map_position_value(i);
+ val += map_position_value(enc, i);
}
to->value = val;
@@ -3763,7 +3881,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
}
static void
-concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
+concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
{
int exb_reach, exm_reach;
OptAncInfo tanc;
@@ -3798,8 +3916,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
clear_opt_exact_info(&add->exb);
}
}
- select_opt_exact_info(&to->exm, &add->exb);
- select_opt_exact_info(&to->exm, &add->exm);
+ select_opt_exact_info(enc, &to->exm, &add->exb);
+ select_opt_exact_info(enc, &to->exm, &add->exm);
if (to->expr.len > 0) {
if (add->len.max > 0) {
@@ -3807,9 +3925,9 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
to->expr.len = add->len.max;
if (to->expr.mmd.max == 0)
- select_opt_exact_info(&to->exb, &to->expr);
+ select_opt_exact_info(enc, &to->exb, &to->expr);
else
- select_opt_exact_info(&to->exm, &to->expr);
+ select_opt_exact_info(enc, &to->exm, &to->expr);
}
}
else if (add->expr.len > 0) {
@@ -3828,7 +3946,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
alt_merge_opt_exact_info(&to->exb, &add->exb, env);
alt_merge_opt_exact_info(&to->exm, &add->exm, env);
alt_merge_opt_exact_info(&to->expr, &add->expr, env);
- alt_merge_opt_map_info (&to->map, &add->map);
+ alt_merge_opt_map_info(env->enc, &to->map, &add->map);
alt_merge_mml(&to->len, &add->len);
}
@@ -3858,7 +3976,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
if (r == 0) {
add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(opt, &nopt);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
}
} while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
}
@@ -3881,148 +3999,110 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case N_STRING:
{
- UChar *p;
- int len, plen;
StrNode* sn = &(NSTRING(node));
int slen = sn->end - sn->s;
int is_raw = NSTRING_IS_RAW(node);
- if ((! IS_IGNORECASE(env->options)) || is_raw) {
+ if (! NSTRING_IS_AMBIG(node)) {
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
NSTRING_IS_RAW(node), env->enc);
if (slen > 0) {
- add_char_opt_map_info(&opt->map, *(sn->s));
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
}
+ set_mml(&opt->len, slen, slen);
}
else {
- for (p = sn->s; p < sn->end; ) {
- len = enc_len(env->enc, *p);
- if (len == 1 && ONIGENC_IS_MBC_CASE_AMBIG(env->enc, p)) {
- break;
- }
- p += len;
- }
+ int n, max;
- plen = p - sn->s;
- if (plen > slen / 5) {
- concat_opt_exact_info_str(&opt->exb, sn->s, p, is_raw, env->enc);
- concat_opt_exact_info_str(&opt->exm, p, sn->end, is_raw, env->enc);
- opt->exm.ignore_case = 1;
- if (opt->exm.len == sn->end - p)
- opt->exm.reach_end = 1;
-
- copy_mml(&(opt->exm.mmd), &(opt->exb.mmd));
- add_len_mml(&(opt->exm.mmd), plen);
- }
- else {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 1;
- }
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
if (slen > 0) {
- if (p == sn->s)
- add_char_amb_opt_map_info(&opt->map, *(sn->s), env->enc);
- else
- add_char_opt_map_info(&opt->map, *(sn->s));
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->ambig_flag);
+ if (r != 0) break;
}
+
+ if (NSTRING_IS_AMBIG_REDUCE(node)) {
+ n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ }
+ else {
+ max = slen;
+ }
+ set_mml(&opt->len, slen, max);
}
if (opt->exb.len == slen)
opt->exb.reach_end = 1;
-
- set_mml(&opt->len, slen, slen);
}
break;
case N_CCLASS:
{
- int i, z, len, found, mb_found;
+ int i, z;
CClassNode* cc = &(NCCLASS(node));
/* no need to check ignore case. (setted in setup_tree()) */
- found = mb_found = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = BITSET_AT(cc->bs, i);
- if ((z && !cc->not) || (!z && cc->not)) {
- found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
- if (IS_NULL(cc->mbuf)) {
- if (cc->not) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- add_char_opt_map_info(&opt->map, i);
- }
- mb_found = 1;
- }
+ if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ set_mml(&opt->len, min, max);
}
else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = ONIGENC_IS_MBC_HEAD(env->enc, i);
- if (z) {
- mb_found = 1;
- add_char_opt_map_info(&opt->map, i);
- }
- }
- }
-
- if (mb_found) {
- len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, 1, len);
- }
- else if (found) {
- len = 1;
- set_mml(&opt->len, 1, len);
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ set_mml(&opt->len, 1, 1);
}
}
break;
case N_CTYPE:
{
- int c;
- int len, min, max;
+ int i, min, max;
- min = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- max = 0;
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
-#define IS_WORD_HEAD_BYTE(enc,b) \
- (ONIGENC_IS_MBC_ASCII(&b) ? ONIGENC_IS_CODE_WORD(enc,((OnigCodePoint )b)) \
- : ONIGENC_IS_MBC_HEAD(enc,b))
+ if (max == 1) {
+ min = 1;
- switch (NCTYPE(node).type) {
- case CTYPE_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_WORD_HEAD_BYTE(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- len = enc_len(env->enc, c);
- if (len < min) min = len;
- if (len > max) max = len;
- }
+ switch (NCTYPE(node).type) {
+ case CTYPE_NOT_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ break;
+
+ case CTYPE_WORD:
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ break;
}
- break;
-
- case CTYPE_NOT_WORD:
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! IS_WORD_HEAD_BYTE(env->enc, c)) {
- add_char_opt_map_info(&opt->map, c);
- len = enc_len(env->enc, c);
- if (len < min) min = len;
- if (len > max) max = len;
- }
- }
- break;
}
-
+ else {
+ min = ONIGENC_MBC_MINLEN(env->enc);
+ }
set_mml(&opt->len, min, max);
}
break;
case N_ANYCHAR:
{
- OnigDistance len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, 1, len);
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, min, max);
}
break;
@@ -4223,36 +4303,20 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (e->len == 0) return 0;
- reg->exact = onig_strdup(e->s, e->s + e->len);
- CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
-
- reg->exact_end = reg->exact + e->len;
-
if (e->ignore_case) {
- UChar buf[ONIGENC_MBC_TO_LOWER_MAXLEN];
- int len, low_len, i, j, alloc_size;
-
- alloc_size = e->len;
- i = j = 0;
- while (i < e->len) {
- low_len = ONIGENC_MBC_TO_LOWER(reg->enc, &(e->s[i]), buf);
- len = enc_len(reg->enc, e->s[i]);
- if (low_len > alloc_size - i) {
- reg->exact = xrealloc(reg->exact, alloc_size * 2);
- CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
- alloc_size *= 2;
- }
-
- xmemcpy(&(reg->exact[j]), buf, low_len);
- i += len;
- j += low_len;
- }
- reg->exact_end = reg->exact + j;
+ reg->exact = (UChar* )xmalloc(e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ xmemcpy(reg->exact, e->s, e->len);
+ reg->exact_end = reg->exact + e->len;
reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
}
else {
int allow_reverse;
+ reg->exact = k_strdup(e->s, e->s + e->len);
+ CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+ reg->exact_end = reg->exact + e->len;
+
if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
allow_reverse = 1;
else
@@ -4260,7 +4324,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, 0,
+ r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
reg->map, &(reg->int_map));
if (r) return r;
@@ -4320,6 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
env.enc = reg->enc;
env.options = reg->options;
+ env.ambig_flag = reg->ambig_flag;
env.scan_env = scan_env;
clear_mml(&env.mmd);
@@ -4337,7 +4402,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
}
if (opt.exb.len > 0 || opt.exm.len > 0) {
- select_opt_exact_info(&opt.exb, &opt.exm);
+ select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
@@ -4474,17 +4539,26 @@ print_optimize_info(FILE* f, regex_t* reg)
fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
}
else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
- int i, n = 0;
+ int c, i, n = 0;
+
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
if (reg->map[i]) n++;
fprintf(f, "map: n=%d\n", n);
if (n > 0) {
+ c = 0;
fputc('[', f);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- if (reg->map[i] && enc_len(reg->enc, i) == 1 &&
- ONIGENC_IS_CODE_PRINT(reg->enc, i))
- fputc(i, f);
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (reg->map[i] != 0) {
+ if (c > 0) fputs(", ", f);
+ c++;
+ if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
+ fputc(i, f);
+ else
+ fprintf(f, "%d", i);
+ }
+ }
fprintf(f, "]\n");
}
}
@@ -4500,7 +4574,7 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
#ifdef USE_NAMED_GROUP
onig_names_free(reg);
@@ -4523,7 +4597,7 @@ onig_free(regex_t* reg)
xfree(from);\
} while (0)
-static void
+extern void
onig_transfer(regex_t* to, regex_t* from)
{
THREAD_ATOMIC_START;
@@ -4537,7 +4611,7 @@ onig_transfer(regex_t* to, regex_t* from)
}\
} while (0)
-static void
+extern void
onig_chain_link_add(regex_t* to, regex_t* add)
{
THREAD_ATOMIC_START;
@@ -4573,11 +4647,12 @@ onig_clone(regex_t** to, regex_t* from)
int r, size;
regex_t* reg;
- if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
- from->state++; /* increment as search counter */
- if (IS_NOT_NULL(from->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(from);
+ if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(from);
- from->state++;
+ ONIG_STATE_INC(from);
}
}
else {
@@ -4587,18 +4662,20 @@ onig_clone(regex_t** to, regex_t* from)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- from->state++; /* increment as search counter */
+ ONIG_STATE_INC(from);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
- r = onig_alloc_init(®, ONIG_OPTION_NONE, from->enc, ONIG_SYNTAX_DEFAULT);
+ r = onig_alloc_init(®, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ from->enc, ONIG_SYNTAX_DEFAULT);
if (r != 0) {
- from->state--;
+ ONIG_STATE_DEC(from);
return r;
}
xmemcpy(reg, from, sizeof(onig_t));
- reg->state = ONIG_STATE_NORMAL;
reg->chain = (regex_t* )NULL;
+ reg->state = ONIG_STATE_NORMAL;
if (from->p) {
reg->p = (UChar* )xmalloc(reg->alloc);
@@ -4631,12 +4708,12 @@ onig_clone(regex_t** to, regex_t* from)
reg->name_table = names_clone(from); /* names_clone is not implemented */
#endif
- from->state--;
+ ONIG_STATE_DEC(from);
*to = reg;
return 0;
mem_error:
- from->state--;
+ ONIG_STATE_DEC(from);
return ONIGERR_MEMORY;
}
#endif
@@ -4649,7 +4726,7 @@ static void print_tree P_((FILE* f, Node* node));
#endif
extern int
-onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigErrorInfo* einfo)
{
#define COMPILE_INIT_SIZE 20
@@ -4800,7 +4877,7 @@ onig_compile(regex_t* reg, UChar* pattern, UChar* pattern_end,
}
extern int
-onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
+onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* einfo)
{
@@ -4821,8 +4898,8 @@ onig_recompile(regex_t* reg, UChar* pattern, UChar* pattern_end,
static int onig_inited = 0;
extern int
-onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax)
+onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
+ OnigEncoding enc, OnigSyntaxType* syntax)
{
if (! onig_inited)
onig_init();
@@ -4832,6 +4909,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+ (*reg)->state = ONIG_STATE_MODIFY;
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
@@ -4840,7 +4918,6 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
else
option |= syntax->options;
- (*reg)->state = ONIG_STATE_NORMAL;
(*reg)->enc = enc;
(*reg)->options = option;
(*reg)->syntax = syntax;
@@ -4855,11 +4932,14 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigEncoding enc,
(*reg)->used = 0;
(*reg)->name_table = (void* )NULL;
+ (*reg)->ambig_flag = ambig_flag;
+ (*reg)->ambig_flag &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
+
return 0;
}
extern int
-onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
+onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
OnigErrorInfo* einfo)
{
@@ -4867,7 +4947,8 @@ onig_new(regex_t** reg, UChar* pattern, UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
- r = onig_alloc_init(reg, option, enc, syntax);
+ r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ enc, syntax);
if (r) return r;
r = onig_compile(*reg, pattern, pattern_end, einfo);
@@ -4899,9 +4980,14 @@ onig_init()
return 0;
}
+
extern int
onig_end()
{
+ extern int onig_free_shared_cclass_table();
+
+ THREAD_ATOMIC_START;
+
#ifdef ONIG_DEBUG_STATISTICS
onig_print_statistics(stderr);
#endif
@@ -4910,7 +4996,13 @@ onig_end()
onig_free_node_list();
#endif
+#ifdef USE_SHARED_CCLASS_TABLE
+ onig_free_shared_cclass_table();
+#endif
+
onig_inited = 0;
+
+ THREAD_ATOMIC_END;
return 0;
}
@@ -4940,6 +5032,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@@ -4964,7 +5057,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_BACKREF2, "backref2", ARG_NON },
{ OP_BACKREF3, "backref3", ARG_NON },
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
{ OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL },
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
@@ -4985,6 +5078,8 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
{ OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
{ OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
{ OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM },
{ OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
{ OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
@@ -5051,7 +5146,8 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
}
extern void
-onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
+ OnigEncoding enc)
{
int i, n, arg_type;
RelAddrType addr;
@@ -5068,8 +5164,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
case ARG_NON:
break;
case ARG_RELADDR:
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
+ GET_RELADDR_INC(addr, bp);
fprintf(f, ":(%d)", addr);
break;
case ARG_ABSADDR:
@@ -5143,7 +5238,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
break;
case OP_EXACT1_IC:
- p_string(f, 1, bp++);
+ len = enc_len(enc, bp);
+ p_string(f, len, bp);
+ bp += len;
break;
case OP_EXACTN_IC:
GET_LENGTH_INC(len, bp);
@@ -5189,8 +5286,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp)
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
- case OP_BACKREF_MULTI:
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%u:%d", (unsigned int )cc, n);
+ }
+ break;
+
+ case OP_BACKREFN_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
fputs(" ", f);
GET_LENGTH_INC(len, bp);
for (i = 0; i < len; i++) {
@@ -5258,7 +5371,7 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
else
fputs(" ", f);
}
- onig_print_compiled_byte_code(f, bp, &bp);
+ onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
}
fprintf(f, "\n");
@@ -5310,7 +5423,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case N_CCLASS:
fprintf(f, "", (int )node);
- if (NCCLASS(node).not) fputs(" not", f);
+ if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
if (NCCLASS(node).mbuf) {
BBuf* bbuf = NCCLASS(node).mbuf;
for (i = 0; i < bbuf->used; i++) {
@@ -5318,12 +5431,6 @@ print_indent_tree(FILE* f, Node* node, int indent)
fprintf(f, "%0x", bbuf->p[i]);
}
}
-#if 0
- fprintf(f, "\n");
- Indent(f, indent);
- for (i = 0; i < SINGLE_BYTE_SIZE; i++)
- fputc((BITSET_AT(NCCLASS(node).bs, i) ? '1' : '0'), f);
-#endif
break;
case N_CTYPE:
diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c
index 21598ca7c7d..a767ca60b6a 100644
--- a/ext/mbstring/oniguruma/regenc.c
+++ b/ext/mbstring/oniguruma/regenc.c
@@ -1,11 +1,33 @@
/**********************************************************************
-
regenc.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
-#include "regenc.h"
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
@@ -29,33 +51,33 @@ onigenc_set_default_encoding(OnigEncoding enc)
}
extern UChar*
-onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
- p += enc_len(enc, *p);
+ p += enc_len(enc, p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
- UChar* start, UChar* s, UChar** prev)
+ const UChar* start, const UChar* s, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
- if (prev) *prev = p;
- p += enc_len(enc, *p);
+ if (prev) *prev = (const UChar* )p;
+ p += enc_len(enc, p);
}
else {
- if (prev) *prev = (UChar* )NULL; /* Sorry */
+ if (prev) *prev = (const UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
-onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
+onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
if (s <= start)
return (UChar* )NULL;
@@ -64,7 +86,7 @@ onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
}
extern UChar*
-onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
+onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
@@ -72,20 +94,127 @@ onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
- return s;
+ return (UChar* )s;
}
+extern UChar*
+onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
+{
+ UChar* q = (UChar* )p;
+ while (n-- > 0) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q);
+ }
+ return (q <= end ? q : NULL);
+}
+
+extern int
+onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ int n = 0;
+ UChar* q = (UChar* )p;
+
+ while (q < end) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q);
+ n++;
+ }
+ return n;
+}
+
+extern int
+onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+{
+ int n = 0;
+ UChar* p = (UChar* )s;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return n;
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return n;
+ }
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ n++;
+ }
+}
+
+extern int
+onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+{
+ UChar* start = (UChar* )s;
+ UChar* p = (UChar* )s;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return (int )(p - start);
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return (int )(p - start);
+ }
+ p += ONIGENC_MBC_ENC_LEN(enc, p);
+ }
+}
#ifndef ONIG_RUBY_M17N
#ifndef NOT_RUBY
+
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
+
+unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
+ 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
+ 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+};
#endif
-UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
+const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
-static UChar BuiltInAsciiToLowerCaseTable[] = {
+static const UChar BuiltInAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -121,23 +250,61 @@ static UChar BuiltInAsciiToLowerCaseTable[] = {
};
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
+#ifdef USE_UPPER_CASE_TABLE
+UChar OnigEncAsciiToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif
+
unsigned short OnigEncAsciiCtypeTable[256] = {
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
- 0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
- 0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
- 0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
- 0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
- 0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
- 0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -156,10 +323,82 @@ unsigned short OnigEncAsciiCtypeTable[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
+UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+#ifdef USE_UPPER_CASE_TABLE
+UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
+};
+#endif
+
extern void
-onigenc_set_default_caseconv_table(UChar* table)
+onigenc_set_default_caseconv_table(const UChar* table)
{
- if (table == (UChar* )0) {
+ if (table == (const UChar* )0) {
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
@@ -173,47 +412,240 @@ onigenc_set_default_caseconv_table(UChar* table)
}
extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
+OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+ { 0x41, 0x61 },
+ { 0x42, 0x62 },
+ { 0x43, 0x63 },
+ { 0x44, 0x64 },
+ { 0x45, 0x65 },
+ { 0x46, 0x66 },
+ { 0x47, 0x67 },
+ { 0x48, 0x68 },
+ { 0x49, 0x69 },
+ { 0x4a, 0x6a },
+ { 0x4b, 0x6b },
+ { 0x4c, 0x6c },
+ { 0x4d, 0x6d },
+ { 0x4e, 0x6e },
+ { 0x4f, 0x6f },
+ { 0x50, 0x70 },
+ { 0x51, 0x71 },
+ { 0x52, 0x72 },
+ { 0x53, 0x73 },
+ { 0x54, 0x74 },
+ { 0x55, 0x75 },
+ { 0x56, 0x76 },
+ { 0x57, 0x77 },
+ { 0x58, 0x78 },
+ { 0x59, 0x79 },
+ { 0x5a, 0x7a },
+
+ { 0x61, 0x41 },
+ { 0x62, 0x42 },
+ { 0x63, 0x43 },
+ { 0x64, 0x44 },
+ { 0x65, 0x45 },
+ { 0x66, 0x46 },
+ { 0x67, 0x47 },
+ { 0x68, 0x48 },
+ { 0x69, 0x49 },
+ { 0x6a, 0x4a },
+ { 0x6b, 0x4b },
+ { 0x6c, 0x4c },
+ { 0x6d, 0x4d },
+ { 0x6e, 0x4e },
+ { 0x6f, 0x4f },
+ { 0x70, 0x50 },
+ { 0x71, 0x51 },
+ { 0x72, 0x52 },
+ { 0x73, 0x53 },
+ { 0x74, 0x54 },
+ { 0x75, 0x55 },
+ { 0x76, 0x56 },
+ { 0x77, 0x57 },
+ { 0x78, 0x58 },
+ { 0x79, 0x59 },
+ { 0x7a, 0x5a }
+};
+
extern int
-onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
+onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
+{
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+ }
+ else {
+ return 0;
+ }
+}
+
+extern int
+onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
+ OnigCompAmbigCodes** ccs)
{
return 0;
}
extern int
-onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
- OnigEncFoldMatchInfo** info)
+onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
+ OnigPairAmbigCodes** ccs)
{
- return -1;
+ static OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
+ *ccs = OnigAsciiPairAmbigCodes;
+ return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+ }
+ else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = cc;
+ return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+ }
+ else
+ return 0;
}
extern int
-onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
- OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
+ OnigCompAmbigCodes** ccs)
{
- return -1;
+ static OnigCompAmbigCodes folds[] = {
+ { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
+ };
+
+ if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
+ *ccs = folds;
+ return sizeof(folds) / sizeof(OnigCompAmbigCodes);
+ }
+ else
+ return 0;
+}
+
+extern int
+onigenc_not_support_get_ctype_code_range(int ctype,
+ OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+ }
+ return 0;
}
/* for single byte encodings */
extern int
-onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
+onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
+ UChar* lower)
{
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
+ }
+ else {
+ *lower = **p;
+ }
+
+ (*p)++;
return 1; /* return byte length of converted char to lower */
}
extern int
-onigenc_ascii_mbc_is_case_ambig(UChar* p)
+onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ const UChar* p = *pp;
+
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ else {
+ return FALSE;
+ }
+}
+
+extern int
+onigenc_single_byte_mbc_enc_len(const UChar* p)
+{
+ return 1;
}
extern OnigCodePoint
-onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
+onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(*p);
}
@@ -238,26 +670,31 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
}
extern UChar*
-onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
+onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
{
- return s;
+ return (UChar* )s;
}
extern int
-onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
+onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
return TRUE;
}
+extern int
+onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
+{
+ return FALSE;
+}
+
extern OnigCodePoint
-onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
+onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
- c = *p++;
- len = enc_len(enc, c);
- n = c;
+ len = enc_len(enc, p);
+ n = (OnigCodePoint )(*p++);
if (len == 1) return n;
for (i = 1; i < len; i++) {
@@ -269,33 +706,52 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
}
extern int
-onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
+onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
int len;
+ const UChar *p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ }
+ else {
+ *lower = *p;
+ }
+ (*pp)++;
return 1;
}
else {
- len = enc_len(enc, *p);
+ len = enc_len(enc, p);
if (lower != p) {
- /* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
+ (*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
extern int
-onigenc_mbn_mbc_is_case_ambig(UChar* p)
+onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+ const UChar** pp, const UChar* end)
{
- if (ONIGENC_IS_MBC_ASCII(p))
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ const UChar* p = *pp;
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ (*pp)++;
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
+ }
+ else {
+ return FALSE;
+ }
+ }
+
+ (*pp) += enc_len(enc, p);
return FALSE;
}
@@ -360,8 +816,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf[0]) != (p - buf))
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ if (enc_len(enc, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
@@ -383,23 +839,21 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf[0]) != (p - buf))
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ if (enc_len(enc, buf) != (p - buf))
+ return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
-onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- int first = onigenc_mb2_code_to_mbc_first(code);
- return (enc_len(enc, first) > 1 ? TRUE : FALSE);
- }
+ else
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@@ -412,16 +866,14 @@ onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
}
extern int
-onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
+onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- int first = onigenc_mb4_code_to_mbc_first(code);
- return (enc_len(enc, first) > 1 ? TRUE : FALSE);
- }
+ else
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@@ -434,39 +886,22 @@ onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
}
extern int
-onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
+onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
+ const UChar* sascii /* ascii */, int n)
{
- static OnigCodePoint list[] = { 0xdf };
- *codes = list;
- return 1;
-}
+ int x, c;
-extern int
-onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
- OnigEncFoldMatchInfo** info)
-{
- /* German alphabet ess-tsett(U+00DF) */
- static OnigEncFoldMatchInfo ss = {
- 3,
- { 1, 2, 2 },
- { "\337", "ss", "SS" } /* 0337: 0xdf */
- };
+ while (n-- > 0) {
+ if (p >= end) return (int )(*sascii);
- if (p >= end) return -1;
+ c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
+ x = *sascii - c;
+ if (x) return x;
- if (*p == 0xdf) {
- *info = &ss;
- return 1;
+ sascii++;
+ p += enc_len(enc, p);
}
- else if (p + 1 < end) {
- if ((*p == 'S' && *(p+1) == 'S') ||
- (*p == 's' && *(p+1) == 's')) {
- *info = &ss;
- return 2;
- }
- }
-
- return -1; /* is not a fold string. */
+ return 0;
}
#else /* ONIG_RUBY_M17N */
@@ -475,6 +910,10 @@ extern int
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
{
switch (ctype) {
+ case ONIGENC_CTYPE_NEWLINE:
+ if (code == 0x0a) return 1;
+ break;
+
case ONIGENC_CTYPE_ALPHA:
return m17n_isalpha(enc, code);
break;
@@ -548,12 +987,22 @@ onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
}
extern int
-onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
+onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+ UChar** pp, UChar* end)
{
- unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
+ int len;
+ unsigned int c;
+ UChar* p = *pp;
+
+ len = enc_len(enc, *p);
+ (*pp) += len;
+ c = m17n_codepoint(enc, p, p + len);
+
+ if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
+ if (m17n_isupper(enc, c) || m17n_islower(enc, c))
+ return TRUE;
+ }
- if (m17n_isupper(enc, c) || m17n_islower(enc, c))
- return TRUE;
return FALSE;
}
@@ -575,7 +1024,8 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
}
extern int
-onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
+onigenc_is_allowed_reverse_match(OnigEncoding enc,
+ const UChar* s, const UChar* end)
{
return ONIGENC_IS_SINGLEBYTE(enc);
}
diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h
index e0c6211d32d..510455146ef 100644
--- a/ext/mbstring/oniguruma/regenc.h
+++ b/ext/mbstring/oniguruma/regenc.h
@@ -1,12 +1,33 @@
-/**********************************************************************
-
- regenc.h - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef REGENC_H
#define REGENC_H
+/**********************************************************************
+ regenc.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#ifndef RUBY_PLATFORM
#include "config.h"
@@ -26,15 +47,11 @@
#endif
/* error codes */
-/* internal error */
-#define ONIGERR_MEMORY -5
-#define ONIGERR_TYPE_BUG -6
-/* syntax error [-400, -999] */
-#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
-#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#define ONIGENCERR_MEMORY -5
+#define ONIGENCERR_TYPE_BUG -6
+#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401
-#define ONIG_NEWLINE '\n'
-#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
@@ -47,47 +64,79 @@
#else /* ONIG_RUBY_M17N */
+#define USE_UNICODE_FULL_RANGE_CTYPE
+
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
-ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
-ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
-ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
-ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
+ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
-ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
-ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
+ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
+ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
/* methods for multi byte encoding */
-ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
-ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
-ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
+ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
-ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
+
+/* in enc/unicode.c */
+ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+
+
+#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
+ OnigEncISO_8859_1_ToLowerCaseTable[c]
+#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
+ OnigEncISO_8859_1_ToUpperCaseTable[c]
+#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
+ ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
+
+ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
#endif /* is not ONIG_RUBY_M17N */
+ONIG_EXTERN int
+onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
+ONIG_EXTERN UChar*
+onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
+
+/* defined in regexec.c, but used in enc/xxx.c */
+extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
-ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
+#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & ctype) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c
index 5a6c31b82ec..560b5e12c56 100644
--- a/ext/mbstring/oniguruma/regerror.c
+++ b/ext/mbstring/oniguruma/regerror.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
regerror.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regint.h"
#include /* for vsnprintf() */
@@ -56,8 +78,8 @@ onig_error_code_to_format(int code)
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
- case ONIGERR_END_PATTERN_AT_BACKSLASH:
- p = "end pattern at backslash"; break;
+ case ONIGERR_END_PATTERN_AT_ESCAPE:
+ p = "end pattern at escape"; break;
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case ONIGERR_END_PATTERN_AT_CONTROL:
@@ -145,7 +167,9 @@ onig_error_code_to_format(int code)
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- p = "invalid character property name"; break;
+ p = "invalid character property name {%n}"; break;
+ case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
+ p = "not supported encoding combination"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
@@ -184,6 +208,7 @@ onig_error_code_to_str(s, code, va_alist)
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = einfo->par_end - einfo->par;
q = onig_error_code_to_format(code);
@@ -218,7 +243,7 @@ onig_error_code_to_str(s, code, va_alist)
default:
q = onig_error_code_to_format(code);
- len = strlen(q);
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
s[len] = '\0';
break;
@@ -245,7 +270,8 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
#endif
{
int n, need, len;
- UChar *p, *s;
+ UChar *p, *s, *bp;
+ char bs[6];
va_list args;
va_init_list(args, fmt);
@@ -256,29 +282,41 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
if (n + need < bufsize) {
strcat(buf, ": /");
- s = buf + strlen(buf);
+ s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
while (p < (UChar* )pat_end) {
- if (*p == MC_ESC) {
+ if (*p == MC_ESC(enc)) {
*s++ = *p++;
- len = enc_len(enc, *p);
+ len = enc_len(enc, p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
- *s++ = MC_ESC;
+ *s++ = (unsigned char )MC_ESC(enc);
*s++ = *p++;
}
- else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
- len = enc_len(enc, *p);
- while (len-- > 0) *s++ = *p++;
+ else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
+ len = enc_len(enc, p);
+ if (ONIGENC_MBC_MINLEN(enc) == 1) {
+ while (len-- > 0) *s++ = *p++;
+ }
+ else { /* for UTF16 */
+ int blen;
+
+ while (len-- > 0) {
+ sprintf(bs, "\\%03o", *p++ & 0377);
+ blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (blen-- > 0) *s++ = *bp++;
+ }
+ }
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
- char b[5];
- sprintf(b, "\\%03o", *p & 0377);
- len = strlen(b);
- while (len-- > 0) *s++ = *p++;
+ sprintf(bs, "\\%03o", *p++ & 0377);
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (len-- > 0) *s++ = *bp++;
}
else {
*s++ = *p++;
diff --git a/ext/mbstring/oniguruma/regex.c b/ext/mbstring/oniguruma/regex.c
deleted file mode 100644
index 2d79d000a8b..00000000000
--- a/ext/mbstring/oniguruma/regex.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/**********************************************************************
-
- regex.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
-/*
- * Source wrapper for Ruby.
- */
-#include "regint.h"
-#include "regex.h"
-
-#include "regparse.c"
-#include "regcomp.c"
-#include "regexec.c"
-#include "regenc.c"
-#include "reggnu.c"
-#include "regerror.c"
-
-#ifndef ONIG_RUBY_M17N
-#include "enc/ascii.c"
-#include "enc/utf8.c"
-#include "enc/euc_jp.c"
-#include "enc/sjis.c"
-#endif
diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c
index 2ded602e15a..2c082de423f 100644
--- a/ext/mbstring/oniguruma/regexec.c
+++ b/ext/mbstring/oniguruma/regexec.c
@@ -1,53 +1,152 @@
/**********************************************************************
-
regexec.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regint.h"
+#ifdef USE_CAPTURE_HISTORY
+static void history_tree_free(OnigCaptureTreeNode* node);
+
static void
-region_list_clear(OnigRegion** list)
+history_tree_clear(OnigCaptureTreeNode* node)
{
int i;
- if (IS_NOT_NULL(list)) {
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (IS_NOT_NULL(list[i])) {
- xfree(list[i]);
- list[i] = (OnigRegion* )0;
+ if (IS_NOT_NULL(node)) {
+ for (i = 0; i < node->num_childs; i++) {
+ if (IS_NOT_NULL(node->childs[i])) {
+ history_tree_free(node->childs[i]);
}
}
+ for (i = 0; i < node->allocated; i++) {
+ node->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ node->num_childs = 0;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+ node->group = -1;
}
}
static void
-region_list_free(OnigRegion* r)
+history_tree_free(OnigCaptureTreeNode* node)
{
- if (IS_NOT_NULL(r->list)) {
- region_list_clear(r->list);
- xfree(r->list);
- r->list = (OnigRegion** )0;
+ history_tree_clear(node);
+ xfree(node);
+}
+
+static void
+history_root_free(OnigRegion* r)
+{
+ if (IS_NOT_NULL(r->history_root)) {
+ history_tree_free(r->history_root);
+ r->history_root = (OnigCaptureTreeNode* )0;
}
}
-static OnigRegion**
-region_list_new()
+static OnigCaptureTreeNode*
+history_node_new()
+{
+ OnigCaptureTreeNode* node;
+
+ node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
+ CHECK_NULL_RETURN(node);
+ node->childs = (OnigCaptureTreeNode** )0;
+ node->allocated = 0;
+ node->num_childs = 0;
+ node->group = -1;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+
+ return node;
+}
+
+static int
+history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
+{
+#define HISTORY_TREE_INIT_ALLOC_SIZE 8
+
+ if (parent->num_childs >= parent->allocated) {
+ int n, i;
+
+ if (IS_NULL(parent->childs)) {
+ n = HISTORY_TREE_INIT_ALLOC_SIZE;
+ parent->childs =
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ }
+ else {
+ n = parent->allocated * 2;
+ parent->childs =
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
+ }
+ CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
+ for (i = parent->allocated; i < n; i++) {
+ parent->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ parent->allocated = n;
+ }
+
+ parent->childs[parent->num_childs] = child;
+ parent->num_childs++;
+ return 0;
+}
+
+static OnigCaptureTreeNode*
+history_tree_clone(OnigCaptureTreeNode* node)
{
int i;
- OnigRegion** list;
+ OnigCaptureTreeNode *clone, *child;
- list = (OnigRegion** )xmalloc(sizeof(OnigRegion*)
- * (ONIG_MAX_CAPTURE_HISTORY_GROUP + 1));
- CHECK_NULL_RETURN(list);
- for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- list[i] = (OnigRegion* )0;
+ clone = history_node_new();
+ CHECK_NULL_RETURN(clone);
+
+ clone->beg = node->beg;
+ clone->end = node->end;
+ for (i = 0; i < node->num_childs; i++) {
+ child = history_tree_clone(node->childs[i]);
+ if (IS_NULL(child)) {
+ history_tree_free(clone);
+ return (OnigCaptureTreeNode* )0;
+ }
+ history_tree_add_child(clone, child);
}
- return list;
+ return clone;
}
+extern OnigCaptureTreeNode*
+onig_get_capture_tree(OnigRegion* region)
+{
+ return region->history_root;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
extern void
onig_region_clear(OnigRegion* region)
{
@@ -56,14 +155,14 @@ onig_region_clear(OnigRegion* region)
for (i = 0; i < region->num_regs; i++) {
region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
- region_list_clear(region->list);
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(region);
+#endif
}
extern int
onig_region_resize(OnigRegion* region, int n)
{
- int i;
-
region->num_regs = n;
if (n < ONIG_NREGION)
@@ -88,92 +187,43 @@ onig_region_resize(OnigRegion* region, int n)
region->allocated = n;
}
- for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
-
- if (IS_NOT_NULL(region->list))
- region_list_clear(region->list);
-
return 0;
}
-static int
-region_ensure_size(OnigRegion* region, int n)
+extern int
+onig_region_resize_clear(OnigRegion* region, int n)
{
- int i, new_size;
-
- if (region->allocated >= n)
- return 0;
-
- new_size = region->allocated;
- if (new_size == 0)
- new_size = ONIG_NREGION;
- while (new_size < n)
- new_size *= 2;
-
- if (region->allocated == 0) {
- region->beg = (int* )xmalloc(new_size * sizeof(int));
- region->end = (int* )xmalloc(new_size * sizeof(int));
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = new_size;
- }
- else if (region->allocated < new_size) {
- region->beg = (int* )xrealloc(region->beg, new_size * sizeof(int));
- region->end = (int* )xrealloc(region->end, new_size * sizeof(int));
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = new_size;
- }
-
- for (i = region->num_regs; i < n; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
- return 0;
-}
-
-static int
-region_list_add_entry(OnigRegion* region, int group, int start, int end)
-{
- int r, pos;
- OnigRegion** list;
-
- if (group > ONIG_MAX_CAPTURE_HISTORY_GROUP)
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
-
- if (IS_NULL(region->list)) {
- region->list = region_list_new();
- CHECK_NULL_RETURN_VAL(region->list, ONIGERR_MEMORY);
- }
-
- list = region->list;
- if (IS_NULL(list[group])) {
- list[group] = onig_region_new();
- CHECK_NULL_RETURN_VAL(list[group], ONIGERR_MEMORY);
- }
-
- r = region_ensure_size(list[group], list[group]->num_regs + 1);
+ int r;
+
+ r = onig_region_resize(region, n);
if (r != 0) return r;
+ onig_region_clear(region);
+ return 0;
+}
+
+extern int
+onig_region_set(OnigRegion* region, int at, int beg, int end)
+{
+ if (at < 0) return ONIGERR_INVALID_ARGUMENT;
- pos = list[group]->num_regs;
- list[group]->beg[pos] = start;
- list[group]->end[pos] = end;
- list[group]->num_regs++;
-
+ if (at >= region->allocated) {
+ int r = onig_region_resize(region, at + 1);
+ if (r < 0) return r;
+ }
+
+ region->beg[at] = beg;
+ region->end[at] = end;
return 0;
}
-static void
+extern void
onig_region_init(OnigRegion* region)
{
- region->num_regs = 0;
- region->allocated = 0;
- region->beg = (int* )0;
- region->end = (int* )0;
- region->list = (OnigRegion** )0;
+ region->num_regs = 0;
+ region->allocated = 0;
+ region->beg = (int* )0;
+ region->end = (int* )0;
+ region->history_root = (OnigCaptureTreeNode* )0;
}
extern OnigRegion*
@@ -195,7 +245,9 @@ onig_region_free(OnigRegion* r, int free_self)
if (r->end) xfree(r->end);
r->allocated = 0;
}
- region_list_free(r);
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(r);
+#endif
if (free_self) xfree(r);
}
}
@@ -227,34 +279,19 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
}
to->num_regs = from->num_regs;
- if (IS_NOT_NULL(from->list)) {
- if (IS_NULL(to->list)) {
- to->list = region_list_new();
- }
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(to);
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (IS_NOT_NULL(from->list[i])) {
- if (IS_NULL(to->list[i]))
- to->list[i] = onig_region_new();
-
- onig_region_copy(to->list[i], from->list[i]);
- }
- else {
- if (IS_NOT_NULL(to->list[i])) {
- xfree(to->list[i]);
- to->list[i] = (OnigRegion* )0;
- }
- }
- }
+ if (IS_NOT_NULL(from->history_root)) {
+ to->history_root = history_tree_clone(from->history_root);
}
- else
- region_list_free(to);
+#endif
}
/** stack **/
#define INVALID_STACK_INDEX -1
-typedef int StackIndex;
+typedef long StackIndex;
typedef struct _StackType {
unsigned int type;
@@ -324,7 +361,7 @@ typedef struct {
int stack_n;
OnigOptionType options;
OnigRegion* region;
- UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
} MatchArg;
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
@@ -362,11 +399,26 @@ typedef struct {
};\
} while(0)
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
static int
stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
{
- int n;
+ unsigned int n;
StackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
@@ -385,7 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}
else {
n *= 2;
- if (n > MATCH_STACK_LIMIT_SIZE) return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) {
+ if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = MatchStackLimitSize;
+ }
x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
@@ -831,24 +888,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while(0)
-#define STRING_CMP_IC(s1,ps2,len) do {\
- if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
goto fail; \
} while(0)
-static int string_cmp_ic(OnigEncoding enc,
+static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
UChar* s1, UChar** ps2, int mblen)
{
- UChar buf1[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar buf2[ONIGENC_MBC_TO_LOWER_MAXLEN];
- UChar *p1, *p2, *end, *s2;
+ UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *p1, *p2, *end, *s2, *end2;
int len1, len2;
- s2 = *ps2;
- end = s1 + mblen;
+ s2 = *ps2;
+ end = s1 + mblen;
+ end2 = s2 + mblen;
while (s1 < end) {
- len1 = ONIGENC_MBC_TO_LOWER(enc, s1, buf1);
- len2 = ONIGENC_MBC_TO_LOWER(enc, s2, buf2);
+ len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
+ len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
if (len1 != len2) return 0;
p1 = buf1;
p2 = buf2;
@@ -857,9 +915,6 @@ static int string_cmp_ic(OnigEncoding enc,
p1++;
p2++;
}
-
- s1 += enc_len(enc, *s1);
- s2 += enc_len(enc, *s2);
}
*ps2 = s2;
@@ -875,8 +930,8 @@ static int string_cmp_ic(OnigEncoding enc,
}\
} while(0)
-#define STRING_CMP_VALUE_IC(s1,ps2,len,is_fail) do {\
- if (string_cmp_ic(encode, s1, ps2, len) == 0) \
+#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
is_fail = 1; \
else \
is_fail = 0; \
@@ -891,6 +946,110 @@ static int string_cmp_ic(OnigEncoding enc,
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+#ifdef USE_CAPTURE_HISTORY
+static int
+make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
+ StackType* stk_top, UChar* str, regex_t* reg)
+{
+ int n, r;
+ OnigCaptureTreeNode* child;
+ StackType* k = *kp;
+
+ while (k < stk_top) {
+ if (k->type == STK_MEM_START) {
+ n = k->u.mem.num;
+ if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
+ child->group = n;
+ child->beg = (int )(k->u.mem.pstr - str);
+ r = history_tree_add_child(node, child);
+ if (r != 0) return r;
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = (int )(k->u.mem.pstr - str);
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (k->u.mem.num == node->group) {
+ node->end = (int )(k->u.mem.pstr - str);
+ *kp = k;
+ return 0;
+ }
+ }
+ k++;
+ }
+
+ return 1; /* 1: root node ending. */
+}
+#endif
+
+#ifdef RUBY_PLATFORM
+
+typedef struct {
+ int state;
+ regex_t* reg;
+ MatchArg* msa;
+ StackType* stk_base;
+} TrapEnsureArg;
+
+static VALUE
+trap_ensure(VALUE arg)
+{
+ TrapEnsureArg* ta = (TrapEnsureArg* )arg;
+
+ if (ta->state == 0) { /* trap_exec() is not normal return */
+ ONIG_STATE_DEC(ta->reg);
+ if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
+ xfree(ta->stk_base);
+
+ MATCH_ARG_FREE(*(ta->msa));
+ }
+
+ return Qnil;
+}
+
+static VALUE
+trap_exec(VALUE arg)
+{
+ TrapEnsureArg* ta;
+
+ rb_trap_exec();
+
+ ta = (TrapEnsureArg* )arg;
+ ta->state = 1; /* normal return */
+ return Qnil;
+}
+
+extern void
+onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
+{
+ VALUE arg;
+ TrapEnsureArg ta;
+
+ ta.state = 0;
+ ta.reg = reg;
+ ta.msa = msa;
+ ta.stk_base = stk_base;
+ arg = (VALUE )(&ta);
+ rb_ensure(trap_exec, arg, trap_ensure, arg);
+}
+
+#define CHECK_INTERRUPT_IN_MATCH_AT do {\
+ if (rb_trap_pending) {\
+ if (! rb_prohibit_interrupt) {\
+ onig_exec_trap(reg, msa, stk_base);\
+ }\
+ }\
+} while (0)
+#else
+#define CHECK_INTERRUPT_IN_MATCH_AT
+#endif /* RUBY_PLATFORM */
+
#ifdef ONIG_DEBUG_STATISTICS
#define USE_TIMEOFDAY
@@ -935,6 +1094,7 @@ static int MaxStackDepth = 0;
} while (0)
#ifdef RUBY_PLATFORM
+
/*
* :nodoc:
*/
@@ -984,7 +1144,7 @@ onig_print_statistics(FILE* f)
#endif
extern int
-onig_is_in_code_range(UChar* p, OnigCodePoint code)
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
{
OnigCodePoint n, *data;
OnigCodePoint low, high, x;
@@ -1004,6 +1164,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code)
return ((low < n && code >= data[low * 2]) ? 1 : 0);
}
+static int
+code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+{
+ unsigned int in_cc;
+ CClassNode* cc = (CClassNode* )node;
+
+ if (enclen == 1) {
+ in_cc = BITSET_AT(cc->bs, code);
+ }
+ else {
+ UChar* p = ((BBuf* )(cc->mbuf))->p;
+ in_cc = onig_is_in_code_range(p, code);
+ }
+
+ if (IS_CCLASS_NOT(cc)) {
+ return (in_cc ? 0 : 1);
+ }
+ else {
+ return (in_cc ? 1 : 0);
+ }
+}
/* matching region of POSIX API */
typedef int regoff_t;
@@ -1016,7 +1197,7 @@ typedef struct {
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
-match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
+match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
UChar* sprev, MatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1027,18 +1208,18 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
RelAddrType addr;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
- int ignore_case;
+ OnigAmbigType ambig_flag = reg->ambig_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
char *alloca_base;
StackType *stk_alloc, *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
+ StackIndex si;
StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
n = reg->num_repeat + reg->num_mem * 2;
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
- ignore_case = IS_IGNORECASE(option);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
repeat_stk = (StackIndex* )alloca_base;
@@ -1062,7 +1243,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
- s = sstart;
+ s = (UChar* )sstart;
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
@@ -1071,7 +1252,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
fprintf(stderr, "%4d> \"", (int )(s - str));
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enc_len(encode, *q);
+ len = enc_len(encode, q);
while (len-- > 0) *bp++ = *q++;
}
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
@@ -1079,7 +1260,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
*bp = 0;
fputs(buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- onig_print_compiled_byte_code(stderr, p, NULL);
+ onig_print_compiled_byte_code(stderr, p, NULL, encode);
fprintf(stderr, "\n");
}
#endif
@@ -1134,27 +1315,33 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
}
+#ifdef USE_CAPTURE_HISTORY
if (reg->capture_history != 0) {
- UChar *pstart, *pend;
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(reg->capture_history, i) != 0) {
- stkp = stk_base;
- do {
- STACK_GET_MEM_RANGE(stkp, i, pstart, pend);
- if (stkp < stk) {
- int r;
- r = region_list_add_entry(region, i,
- pstart - str, pend - str);
- if (r) {
- STACK_SAVE;
- return r;
- }
- }
- stkp++;
- } while (stkp < stk);
- }
- }
- } /* list of captures */
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+ }
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = sstart - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
+#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_REGION_OPTION
} /* else IS_POSIX_REGION() */
#endif
@@ -1171,10 +1358,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
goto fail; /* for retry */
}
}
- else {
- /* default behavior: return first-matching result. */
- goto finish;
- }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
break;
case OP_EXACT1: STAT_OP_IN(OP_EXACT1);
@@ -1192,14 +1378,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
- len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
- DATA_ENSURE(len);
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exact1_ic_retry:
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
q = lowbuf;
- s += enc_len(encode, *s);
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exact1_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1276,19 +1479,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
while (p < endp) {
- len = ONIGENC_MBC_TO_LOWER(encode, s, lowbuf);
- DATA_ENSURE(len);
sprev = s;
- s += enc_len(encode, *s);
+ DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exactn_ic_retry:
+ len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exactn_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1389,20 +1609,22 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, *s); /* OP_CCLASS can match mb-code. \D, \S */
+ s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */
STAT_OP_OUT;
break;
case OP_CCLASS_MB: STAT_OP_IN(OP_CCLASS_MB);
- if (! ONIGENC_IS_MBC_HEAD(encode, *s)) goto fail;
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
GET_LENGTH_INC(tlen, p);
{
OnigCodePoint code;
UChar *ss;
- int mb_len = enc_len(encode, *s);
+ int mb_len;
+ DATA_ENSURE(1);
+ mb_len = enc_len(encode, s);
DATA_ENSURE(mb_len);
ss = s;
s += mb_len;
@@ -1422,7 +1644,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX: STAT_OP_IN(OP_CCLASS_MIX);
DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
goto cclass_mb;
}
@@ -1442,13 +1664,13 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
case OP_CCLASS_MB_NOT: STAT_OP_IN(OP_CCLASS_MB_NOT);
- if (! ONIGENC_IS_MBC_HEAD(encode, *s)) {
- DATA_ENSURE(1);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
s++;
GET_LENGTH_INC(tlen, p);
p += tlen;
@@ -1460,10 +1682,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
{
OnigCodePoint code;
UChar *ss;
- int mb_len = enc_len(encode, *s);
+ int mb_len = enc_len(encode, s);
if (s + mb_len > end) {
- s = end;
+ DATA_ENSURE(1);
+ s = (UChar* )end;
p += tlen;
goto cc_mb_not_success;
}
@@ -1488,7 +1711,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_CCLASS_MIX_NOT: STAT_OP_IN(OP_CCLASS_MIX_NOT);
DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, *s)) {
+ if (ONIGENC_IS_MBC_HEAD(encode, s)) {
p += SIZE_BITSET;
goto cclass_mb_not;
}
@@ -1504,22 +1727,36 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
break;
- case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- s += n;
- }
- else {
- DATA_ENSURE(1);
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- s++;
+ case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
+ {
+ OnigCodePoint code;
+ void *node;
+ int mb_len;
+ UChar *ss;
+
+ DATA_ENSURE(1);
+ GET_POINTER_INC(node, p);
+ mb_len = enc_len(encode, s);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
}
STAT_OP_OUT;
break;
+ case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ s += n;
+ STAT_OP_OUT;
+ break;
+
case OP_ANYCHAR_ML: STAT_OP_IN(OP_ANYCHAR_ML);
- n = enc_len(encode, *s);
+ DATA_ENSURE(1);
+ n = enc_len(encode, s);
DATA_ENSURE(n);
s += n;
STAT_OP_OUT;
@@ -1528,17 +1765,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
while (s < end) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- sprev = s;
- s++;
- }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
}
STAT_OP_OUT;
break;
@@ -1546,7 +1777,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
while (s < end) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, *s);
+ n = enc_len(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@@ -1565,17 +1796,11 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, *s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- if (ONIG_IS_NEWLINE(*s)) goto fail;
- sprev = s;
- s++;
- }
+ n = enc_len(encode, s);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
}
p++;
STAT_OP_OUT;
@@ -1586,7 +1811,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, *s);
+ n = enc_len(encode, s);
if (n >1) {
DATA_ENSURE(n);
sprev = s;
@@ -1606,7 +1831,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
@@ -1615,7 +1840,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, *s);
+ s += enc_len(encode, s);
STAT_OP_OUT;
break;
@@ -1698,7 +1923,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
continue;
}
- else if (ONIG_IS_NEWLINE(*sprev) && !ON_STR_END(s)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
STAT_OP_OUT;
continue;
}
@@ -1708,7 +1933,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_END_LINE: STAT_OP_IN(OP_END_LINE);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail;
STAT_OP_OUT;
@@ -1717,7 +1942,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- else if (ONIG_IS_NEWLINE(*s)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
STAT_OP_OUT;
continue;
}
@@ -1727,7 +1952,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SEMI_END_BUF: STAT_OP_IN(OP_SEMI_END_BUF);
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIG_IS_NEWLINE(*sprev)) {
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (IS_NOTEOL(msa->options)) goto fail; /* Is it needed? */
STAT_OP_OUT;
@@ -1736,7 +1961,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
}
#endif
}
- if (ONIG_IS_NEWLINE(*s) && ON_STR_END(s+1)) {
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
+ ON_STR_END(s + enc_len(encode, s))) {
STAT_OP_OUT;
continue;
}
@@ -1845,7 +2071,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(n);
sprev = s;
STRING_CMP(pstart, s, n);
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
STAT_OP_OUT;
@@ -1876,8 +2102,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
n = pend - pstart;
DATA_ENSURE(n);
sprev = s;
- STRING_CMP_IC(pstart, &s, n);
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ STRING_CMP_IC(ambig_flag, pstart, &s, n);
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
STAT_OP_OUT;
@@ -1912,7 +2138,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -1948,10 +2174,10 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
DATA_ENSURE(n);
sprev = s;
swork = s;
- STRING_CMP_VALUE_IC(pstart, &swork, n, is_fail);
+ STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, *sprev)) < s)
+ while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -1965,7 +2191,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH);
GET_OPTION_INC(option, p);
- ignore_case = IS_IGNORECASE(option);
STACK_PUSH_ALT(p, s, sprev);
p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
STAT_OP_OUT;
@@ -1974,7 +2199,6 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_SET_OPTION: STAT_OP_IN(OP_SET_OPTION);
GET_OPTION_INC(option, p);
- ignore_case = IS_IGNORECASE(option);
STAT_OP_OUT;
continue;
break;
@@ -2006,6 +2230,8 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
p += SIZE_MEMNUM;
break;
default:
@@ -2072,6 +2298,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
GET_RELADDR_INC(addr, p);
p += addr;
STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
@@ -2150,79 +2377,70 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
break;
case OP_REPEAT_INC: STAT_OP_IN(OP_REPEAT_INC);
- {
- StackIndex si;
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
-#ifdef USE_SUBEXP_CALL
- if (reg->num_call > 0) {
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- }
- else {
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
- }
-#else
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-#endif
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
- }
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(p, s, sprev);
- p = stkp->u.repeat.pcode;
- }
- else {
- p = stkp->u.repeat.pcode;
- }
- STACK_PUSH_REPEAT_INC(si);
+ repeat_inc:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ /* end of repeat. Nothing to do. */
}
+ else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ }
+ STACK_PUSH_REPEAT_INC(si);
STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
+ case OP_REPEAT_INC_SG: STAT_OP_IN(OP_REPEAT_INC_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc;
+ break;
+
case OP_REPEAT_INC_NG: STAT_OP_IN(OP_REPEAT_INC_NG);
- {
- StackIndex si;
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
-#ifdef USE_SUBEXP_CALL
- if (reg->num_call > 0) {
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- }
- else {
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
- }
-#else
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-#endif
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- UChar* pcode = stkp->u.repeat.pcode;
+ repeat_inc_ng:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
+ IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
- }
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- }
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
}
STAT_OP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
continue;
break;
+ case OP_REPEAT_INC_NG_SG: STAT_OP_IN(OP_REPEAT_INC_NG_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc_ng;
+ break;
+
case OP_PUSH_POS: STAT_OP_IN(OP_PUSH_POS);
STACK_PUSH_POS(s, sprev);
STAT_OP_OUT;
@@ -2265,9 +2483,9 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_LOOK_BEHIND: STAT_OP_IN(OP_LOOK_BEHIND);
GET_LENGTH_INC(tlen, p);
- s = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
- sprev = onigenc_get_prev_char_head(encode, str, s);
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
STAT_OP_OUT;
continue;
break;
@@ -2275,7 +2493,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_PUSH_LOOK_BEHIND_NOT: STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
GET_RELADDR_INC(addr, p);
GET_LENGTH_INC(tlen, p);
- q = ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
+ q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(q)) {
/* too short case -> success. ex. /(? text_range)
end = text_range;
- s = text;
+ s = (UChar* )text;
while (s < end) {
if (*s == *target) {
@@ -2380,97 +2599,66 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s += enc_len(enc, *s);
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
}
-#if 0
static int
-str_trans_match_after_head_byte(OnigEncoding enc,
- int len, UChar* t, UChar* tend, UChar* p)
+str_lower_case_match(OnigEncoding enc, int ambig_flag,
+ const UChar* t, const UChar* tend,
+ const UChar* p, const UChar* end)
{
- while (--len > 0) {
- if (*t != *p) break;
- t++; p++;
- }
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ const UChar* tsave;
+ const UChar* psave;
- if (len == 0) {
- int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
-
- while (t < tend) {
- len = enc_len(enc, *p);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
- q = lowbuf;
- while (lowlen > 0) {
- if (*t++ != *q++) break;
- lowlen--;
- }
- if (lowlen > 0) break;
- p += len;
- }
- if (t == tend)
- return 1;
- }
-
- return 0;
-}
-#endif
-
-static int
-str_lower_case_match(OnigEncoding enc, UChar* t, UChar* tend, UChar* p)
-{
- int len, lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ tsave = t;
+ psave = p;
+ retry:
while (t < tend) {
- len = enc_len(enc, *p);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, p, lowbuf);
+ lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
- if (*t++ != *q++) return 0;
+ if (*t++ != *q++) {
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ t = tsave;
+ p = psave;
+ goto retry;
+ }
+ else
+ return 0;
+ }
lowlen--;
}
- p += len;
}
return 1;
}
static UChar*
-slow_search_ic(OnigEncoding enc,
+slow_search_ic(OnigEncoding enc, int ambig_flag,
UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+ const UChar* text, const UChar* text_end, UChar* text_range)
{
- int len, lowlen;
- UChar *t, *p, *s, *end;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *s, *end;
- end = text_end - (target_end - target) + 1;
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
if (end > text_range)
end = text_range;
- s = text;
+ s = (UChar* )text;
while (s < end) {
- len = enc_len(enc, *s);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; *t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, t, target_end, s + len))
- return s;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ return s;
- s += len;
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
@@ -2478,13 +2666,15 @@ slow_search_ic(OnigEncoding enc,
static UChar*
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
- UChar* text, UChar* adjust_text, UChar* text_end, UChar* text_start)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
UChar *t, *p, *s;
- s = text_end - (target_end - target);
+ s = (UChar* )text_end;
+ s -= (target_end - target);
if (s > text_start)
- s = text_start;
+ s = (UChar* )text_start;
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
@@ -2500,58 +2690,52 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-slow_search_backward_ic(OnigEncoding enc,
- UChar* target,UChar* target_end,
- UChar* text, UChar* adjust_text,
- UChar* text_end, UChar* text_start)
+slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
- int len, lowlen;
- UChar *t, *p, *s;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
+ UChar *s;
- s = text_end - (target_end - target);
+ s = (UChar* )text_end;
+ s -= (target_end - target);
if (s > text_start)
- s = text_start;
+ s = (UChar* )text_start;
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- len = enc_len(enc, *s);
- lowlen = ONIGENC_MBC_TO_LOWER(enc, s, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; *t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, t, target_end, s + len))
- return s;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag,
+ target, target_end, s, text_end))
+ return s;
- s = onigenc_get_prev_char_head(enc, adjust_text, s);
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
}
static UChar*
-bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
{
- UChar *s, *t, *p, *end;
- UChar *tail;
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
int skip;
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
+ (int )text, (int )text_end, (int )text_range);
+#endif
+
end = text_range + (target_end - target) - 1;
if (end > text_end)
end = text_end;
@@ -2559,7 +2743,7 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
tail = target_end - 1;
s = text;
while ((s - text) < target_end - target) {
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
s--; /* set to text check tail position. */
@@ -2570,14 +2754,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
skip = reg->map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
- p += enc_len(reg->enc, *p);
- }
+ do {
+ p += enc_len(reg->enc, p);
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2588,14 +2774,16 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
skip = reg->int_map[*s];
- p++;
+ p = s + 1;
+ if (p >= text_end) return (UChar* )NULL;
t = p;
- while ((p - t) < skip) {
- p += enc_len(reg->enc, *p);
- }
+ do {
+ p += enc_len(reg->enc, p);
+ } while ((p - t) < skip && p < text_end);
+
s += (p - t);
}
}
@@ -2603,11 +2791,11 @@ bm_search_notrev(regex_t* reg, UChar* target, UChar* target_end,
}
static UChar*
-bm_search(regex_t* reg, UChar* target, UChar* target_end,
- UChar* text, UChar* text_end, UChar* text_range)
+bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end, const UChar* text_range)
{
- UChar *s, *t, *p, *end;
- UChar *tail;
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
end = text_range + (target_end - target) - 1;
if (end > text_end)
@@ -2622,7 +2810,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
s += reg->map[*s];
}
}
@@ -2633,7 +2821,7 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
while (t >= target && *p == *t) {
p--; t--;
}
- if (t < target) return p + 1;
+ if (t < target) return (UChar* )(p + 1);
s += reg->int_map[*s];
}
}
@@ -2641,11 +2829,10 @@ bm_search(regex_t* reg, UChar* target, UChar* target_end,
}
static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
- int ignore_case, int** skip)
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
+
{
int i, len;
- UChar lowbuf[ONIGENC_MBC_TO_LOWER_MAXLEN];
if (IS_NULL(*skip)) {
*skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
@@ -2656,24 +2843,18 @@ set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc,
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
(*skip)[i] = len;
- if (ignore_case) {
- for (i = len - 1; i > 0; i--) {
- ONIGENC_MBC_TO_LOWER(enc, &(s[i]), lowbuf);
- (*skip)[*lowbuf] = i;
- }
- }
- else {
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
- }
+ for (i = len - 1; i > 0; i--)
+ (*skip)[s[i]] = i;
+
return 0;
}
static UChar*
-bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
- UChar* adjust_text, UChar* text_end, UChar* text_start)
+bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
{
- UChar *s, *t, *p;
+ const UChar *s, *t, *p;
s = text_end - (target_end - target);
if (text_start < s)
@@ -2688,7 +2869,7 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
p++; t++;
}
if (t == target_end)
- return s;
+ return (UChar* )s;
s -= reg->int_map_backward[*s];
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s);
@@ -2698,26 +2879,28 @@ bm_search_backward(regex_t* reg, UChar* target, UChar* target_end, UChar* text,
}
static UChar*
-map_search(OnigEncoding enc, UChar map[], UChar* text, UChar* text_range)
+map_search(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* text_range)
{
- UChar *s = text;
+ const UChar *s = text;
while (s < text_range) {
- if (map[*s]) return s;
+ if (map[*s]) return (UChar* )s;
- s += enc_len(enc, *s);
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
}
static UChar*
map_search_backward(OnigEncoding enc, UChar map[],
- UChar* text, UChar* adjust_text, UChar* text_start)
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_start)
{
- UChar *s = text_start;
+ const UChar *s = text_start;
while (s >= text) {
- if (map[*s]) return s;
+ if (map[*s]) return (UChar* )s;
s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
@@ -2725,13 +2908,32 @@ map_search_backward(OnigEncoding enc, UChar map[],
}
extern int
-onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
+onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
OnigOptionType option)
{
int r;
UChar *prev;
MatchArg msa;
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ ONIG_STATE_INC(reg);
+ }
+#endif /* USE_MULTI_THREAD_SYSTEM */
+
MATCH_ARG_INIT(msa, option, region, at);
if (region
@@ -2739,21 +2941,23 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
&& !IS_POSIX_REGION(option)
#endif
) {
- r = onig_region_resize(region, reg->num_mem + 1);
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
}
else
r = 0;
if (r == 0) {
- prev = onigenc_get_prev_char_head(reg->enc, str, at);
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end, at, prev, &msa);
}
+
MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC(reg);
return r;
}
static int
-forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
+forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
UChar* range, UChar** low, UChar** high, UChar** low_prev)
{
UChar *p, *pprev = (UChar* )NULL;
@@ -2770,7 +2974,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
else {
UChar *q = p + reg->dmin;
- while (p < q) p += enc_len(reg->enc, *p);
+ while (p < q) p += enc_len(reg->enc, p);
}
}
@@ -2780,7 +2984,8 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_ic(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ p = slow_search_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -2800,7 +3005,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (p - reg->dmin < s) {
retry_gate:
pprev = p;
- p += enc_len(reg->enc, *p);
+ p += enc_len(reg->enc, p);
goto retry;
}
@@ -2812,19 +3017,19 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
- if (!ONIG_IS_NEWLINE(*prev))
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
break;
case ANCHOR_END_LINE:
if (ON_STR_END(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
- if (prev && ONIG_IS_NEWLINE(*prev))
+ if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
- else if (!ONIG_IS_NEWLINE(*p))
+ else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
goto retry_gate;
break;
}
@@ -2845,7 +3050,7 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
*low = p - reg->dmax;
if (*low > s) {
*low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, low_prev);
+ *low, (const UChar** )low_prev);
if (low_prev && IS_NULL(*low_prev))
*low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : s), *low);
@@ -2872,13 +3077,14 @@ forward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
}
static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc,
- int ignore_case, int** skip));
+ int** skip));
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
static int
-backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
- UChar* range, UChar* adjrange, UChar** low, UChar** high)
+backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
+ UChar* s, const UChar* range, UChar* adjrange,
+ UChar** low, UChar** high)
{
int r;
UChar *p;
@@ -2895,8 +3101,9 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_backward_ic(reg->enc, reg->exact,
- reg->exact_end, range, adjrange, end, p);
+ p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -2905,7 +3112,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
goto exact_method;
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 0,
+ r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
&(reg->int_map_backward));
if (r) return r;
}
@@ -2926,7 +3133,7 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
case ANCHOR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (!ONIG_IS_NEWLINE(*prev)) {
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
@@ -2937,12 +3144,12 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
if (ON_STR_END(p)) {
prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(prev)) goto fail;
- if (ONIG_IS_NEWLINE(*prev)) {
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
p = prev;
goto retry;
}
}
- else if (!ONIG_IS_NEWLINE(*p)) {
+ else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
if (IS_NULL(p)) goto fail;
goto retry;
@@ -2974,18 +3181,19 @@ backward_search_range(regex_t* reg, UChar* str, UChar* end, UChar* s,
extern int
-onig_search(regex_t* reg, UChar* str, UChar* end,
- UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)
+onig_search(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
{
int r;
UChar *s, *prev;
MatchArg msa;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- reg->state++; /* increment as search counter */
- if (IS_NOT_NULL(reg->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
- reg->state++;
+ ONIG_STATE_INC(reg);
}
}
else {
@@ -2995,12 +3203,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- reg->state++; /* increment as search counter */
+ ONIG_STATE_INC(reg);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
- (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+ fprintf(stderr,
+ "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+ (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
if (region
@@ -3008,7 +3218,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
&& !IS_POSIX_REGION(option)
#endif
) {
- r = onig_region_resize(region, reg->num_mem + 1);
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
if (r) goto finish_no_msa;
}
@@ -3049,7 +3259,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
- semi_end = end;
+ semi_end = (UChar* )end;
end_buf:
if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
@@ -3082,14 +3292,16 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
- if (ONIG_IS_NEWLINE(end[-1])) {
- semi_end = end - 1;
+ UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
+
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
+ semi_end = pre_end;
if (semi_end > str && start <= semi_end) {
goto end_buf;
}
}
else {
- semi_end = end;
+ semi_end = (UChar* )end;
goto end_buf;
}
}
@@ -3098,14 +3310,15 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
}
}
else if (str == end) { /* empty string */
- static UChar* address_for_empty_string = "";
+ static const UChar* address_for_empty_string = "";
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search: empty string.\n");
#endif
if (reg->threshold_len == 0) {
- s = start = end = str = address_for_empty_string;
+ start = end = str = address_for_empty_string;
+ s = (UChar* )start;
prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, option, region, start);
@@ -3122,7 +3335,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
MATCH_ARG_INIT(msa, option, region, start);
- s = start;
+ s = (UChar* )start;
if (range > start) { /* forward search */
if (s > str)
prev = onigenc_get_prev_char_head(reg->enc, str, s);
@@ -3132,13 +3345,13 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
- sch_range = range;
+ sch_range = (UChar* )range;
if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_range = end;
+ sch_range = (UChar* )end;
else {
sch_range += reg->dmax;
- if (sch_range > end) sch_range = end;
+ if (sch_range > end) sch_range = (UChar* )end;
}
}
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
@@ -3153,13 +3366,14 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
if (IS_NOT_NULL(prev)) {
- while (!ONIG_IS_NEWLINE(*prev) && s < range) {
+ while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) &&
+ s < range) {
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
}
}
}
@@ -3176,19 +3390,23 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
do {
MATCH_AND_RETURN_CHECK;
prev = s;
- s += enc_len(reg->enc, *s);
+ s += enc_len(reg->enc, s);
} while (s <= range); /* exec s == range, because empty match with /$/. */
}
else { /* backward search */
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ if (range < end)
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
+ else
+ adjrange = (UChar* )end;
+
if (reg->dmax != ONIG_INFINITE_DISTANCE &&
(end - range) >= reg->threshold_len) {
do {
sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = end;
+ if (sch_start > end) sch_start = (UChar* )end;
if (backward_search_range(reg, str, end, sch_start, range, adjrange,
&low, &high) <= 0)
goto mismatch;
@@ -3210,10 +3428,10 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
sch_start = s;
if (reg->dmax != 0) {
if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_start = end;
+ sch_start = (UChar* )end;
else {
sch_start += reg->dmax;
- if (sch_start > end) sch_start = end;
+ if (sch_start > end) sch_start = (UChar* )end;
else
sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
start, sch_start);
@@ -3236,7 +3454,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
finish:
MATCH_ARG_FREE(msa);
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@@ -3257,7 +3475,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@@ -3265,7 +3483,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return r;
match:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
MATCH_ARG_FREE(msa);
return s - str;
}
@@ -3282,18 +3500,44 @@ onig_get_options(regex_t* reg)
return reg->options;
}
+extern OnigAmbigType
+onig_get_ambig_flag(regex_t* reg)
+{
+ return reg->ambig_flag;
+}
+
extern OnigSyntaxType*
onig_get_syntax(regex_t* reg)
{
return reg->syntax;
}
-extern const char*
-onig_version(void)
+extern int
+onig_number_of_captures(regex_t* reg)
{
-#define MSTR(a) # a
-
- return (MSTR(ONIGURUMA_VERSION_MAJOR) "."
- MSTR(ONIGURUMA_VERSION_MINOR) "."
- MSTR(ONIGURUMA_VERSION_TEENY));
+ return reg->num_mem;
}
+
+extern int
+onig_number_of_capture_histories(regex_t* reg)
+{
+#ifdef USE_CAPTURE_HISTORY
+ int i, n;
+
+ n = 0;
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0)
+ n++;
+ }
+ return n;
+#else
+ return 0;
+#endif
+}
+
+extern void
+onig_copy_encoding(OnigEncoding to, OnigEncoding from)
+{
+ *to = *from;
+}
+
diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c
new file mode 100755
index 00000000000..6839708be7a
--- /dev/null
+++ b/ext/mbstring/oniguruma/regext.c
@@ -0,0 +1,213 @@
+/**********************************************************************
+ regext.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+static void
+conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = *s++;
+ }
+}
+
+static void
+conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = *s++;
+ *conv++ = '\0';
+ *conv++ = '\0';
+ *conv++ = '\0';
+ }
+}
+
+static void
+conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = '\0';
+ *conv++ = *s++;
+ }
+}
+
+static void
+conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = *s++;
+ *conv++ = '\0';
+ }
+}
+
+static void
+conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = s[3];
+ *conv++ = s[2];
+ *conv++ = s[1];
+ *conv++ = s[0];
+ s += 4;
+ }
+}
+
+static void
+conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
+{
+ while (s < end) {
+ *conv++ = s[1];
+ *conv++ = s[0];
+ s += 2;
+ }
+}
+
+static int
+conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
+ UChar** conv, UChar** conv_end)
+{
+ int len = end - s;
+
+ if (to == ONIG_ENCODING_UTF16_BE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 2);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 2);
+ conv_ext0be(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF16_LE) {
+ swap16:
+ *conv = (UChar* )xmalloc(len);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + len;
+ conv_swap2bytes(s, end, *conv);
+ return 0;
+ }
+ }
+ else if (to == ONIG_ENCODING_UTF16_LE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 2);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 2);
+ conv_ext0le(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF16_BE) {
+ goto swap16;
+ }
+ }
+ if (to == ONIG_ENCODING_UTF32_BE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 4);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 4);
+ conv_ext0be32(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF32_LE) {
+ swap32:
+ *conv = (UChar* )xmalloc(len);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + len;
+ conv_swap4bytes(s, end, *conv);
+ return 0;
+ }
+ }
+ else if (to == ONIG_ENCODING_UTF32_LE) {
+ if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
+ *conv = (UChar* )xmalloc(len * 4);
+ CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+ *conv_end = *conv + (len * 4);
+ conv_ext0le32(s, end, *conv);
+ return 0;
+ }
+ else if (from == ONIG_ENCODING_UTF32_BE) {
+ goto swap32;
+ }
+ }
+
+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
+}
+
+extern int
+onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+{
+ int r;
+ UChar *cpat, *cpat_end;
+
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+ if (ci->pattern_enc != ci->target_enc) {
+ r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
+ &cpat, &cpat_end);
+ if (r) return r;
+ }
+ else {
+ cpat = (UChar* )pattern;
+ cpat_end = (UChar* )pattern_end;
+ }
+
+ r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
+ ci->syntax);
+ if (r) goto err;
+
+ r = onig_compile(*reg, cpat, cpat_end, einfo);
+ if (r) {
+ onig_free(*reg);
+ *reg = NULL;
+ }
+
+ err:
+ if (cpat != pattern) xfree(cpat);
+
+ return r;
+}
+
+extern int
+onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigCompileInfo* ci, OnigErrorInfo* einfo)
+{
+ int r;
+ regex_t *new_reg;
+
+ r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
+ if (r) return r;
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
+ }
+ else {
+ onig_chain_link_add(reg, new_reg);
+ }
+ return 0;
+}
diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c
index 9c6a2161c2c..2c8169c481f 100644
--- a/ext/mbstring/oniguruma/reggnu.c
+++ b/ext/mbstring/oniguruma/reggnu.c
@@ -1,26 +1,38 @@
/**********************************************************************
-
reggnu.c - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regint.h"
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
#include "oniggnu.h"
#endif
-#if defined(RUBY_PLATFORM) || defined(RUBY)
-#ifndef ONIG_RUBY_M17N
-#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
-#endif
-#endif
-
-#ifndef NULL
-#define NULL ((void* )0)
-#endif
-
extern void
re_free_registers(OnigRegion* r)
{
@@ -111,7 +123,9 @@ re_free_pattern(regex_t* reg)
extern int
re_alloc_pattern(regex_t** reg)
{
- return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
+ return onig_alloc_init(reg, ONIG_OPTION_DEFAULT,
+ ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
+ OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
@@ -121,86 +135,6 @@ re_set_casetable(const char* table)
onigenc_set_default_caseconv_table((UChar* )table);
}
-#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
-static const unsigned char mbctab_ascii[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-static const unsigned char mbctab_euc[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-};
-
-static const unsigned char mbctab_sjis[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
-};
-
-static const unsigned char mbctab_utf8[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
-};
-
-const unsigned char *re_mbctab = mbctab_ascii;
-#endif
-
extern void
#ifdef ONIG_RUBY_M17N
re_mbcinit(OnigEncoding enc)
@@ -236,21 +170,4 @@ re_mbcinit(int mb_code)
onigenc_set_default_encoding(enc);
#endif
-
-#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
- switch (mb_code) {
- case MBCTYPE_ASCII:
- re_mbctab = mbctab_ascii;
- break;
- case MBCTYPE_EUC:
- re_mbctab = mbctab_euc;
- break;
- case MBCTYPE_SJIS:
- re_mbctab = mbctab_sjis;
- break;
- case MBCTYPE_UTF8:
- re_mbctab = mbctab_utf8;
- break;
- }
-#endif
}
diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h
index 35736b6dcbe..a704b0e2635 100644
--- a/ext/mbstring/oniguruma/regint.h
+++ b/ext/mbstring/oniguruma/regint.h
@@ -1,12 +1,33 @@
-/**********************************************************************
-
- regint.h - Oniguruma (regular expression library)
-
- Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
+/**********************************************************************
+ regint.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
@@ -19,7 +40,8 @@
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
- defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
+ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
+ defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
@@ -34,9 +56,9 @@
/* config */
/* spec. config */
+/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
-#define USE_FOLD_MATCH /* ess-tsett etc... */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@@ -44,48 +66,55 @@
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
+#define USE_ST_HASH_TABLE
+#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
-#define MATCH_STACK_LIMIT_SIZE 500000
+#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
+#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
-#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
+#define CHECK_INTERRUPT /* depend on application */
#define xmalloc malloc
#define xrealloc realloc
+#define xcalloc calloc
#define xfree free
#else
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+
+#define USE_MULTI_THREAD_SYSTEM
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
+#define CHECK_INTERRUPT do {\
+ if (rb_trap_pending) {\
+ if (! rb_prohibit_interrupt) {\
+ rb_trap_exec();\
+ }\
+ }\
+} while (0)
+
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
-#if defined(RUBY_VERSION_MAJOR)
-#if RUBY_VERSION_MAJOR > 1 || \
-(RUBY_VERSION_MAJOR == 1 && \
- defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
-#define USE_ST_HASH_TABLE
-#endif
-#endif
-
#endif /* else NOT_RUBY */
-#define THREAD_PASS_LIMIT_COUNT 10
+#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
-#if defined(_WIN32) && !defined(__CYGWIN__)
+#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
#ifdef NOT_RUBY
#define vsnprintf _vsnprintf
@@ -94,6 +123,69 @@
#define xalloca alloca
#endif
+
+#ifdef USE_MULTI_THREAD_SYSTEM
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
+#else
+#define ONIG_STATE_INC(reg) /* Nothing */
+#define ONIG_STATE_DEC(reg) /* Nothing */
+#endif /* USE_MULTI_THREAD_SYSTEM */
+
+
+#define onig_st_is_member st_is_member
+
+#ifdef NOT_RUBY
+
+#define st_init_table onig_st_init_table
+#define st_init_table_with_size onig_st_init_table_with_size
+#define st_init_numtable onig_st_init_numtable
+#define st_init_numtable_with_size onig_st_init_numtable_with_size
+#define st_init_strtable onig_st_init_strtable
+#define st_init_strtable_with_size onig_st_init_strtable_with_size
+#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
+#define st_delete onig_st_delete
+#define st_delete_safe onig_st_delete_safe
+#define st_insert onig_st_insert
+#define st_insert_strend onig_st_insert_strend
+#define st_lookup onig_st_lookup
+#define st_lookup_strend onig_st_lookup_strend
+#define st_foreach onig_st_foreach
+#define st_add_direct onig_st_add_direct
+#define st_add_direct_strend onig_st_add_direct_strend
+#define st_free_table onig_st_free_table
+#define st_cleanup_safe onig_st_cleanup_safe
+#define st_copy onig_st_copy
+#define st_nothing_key_clone onig_st_nothing_key_clone
+#define st_nothing_key_free onig_st_nothing_key_free
+
+#else /* NOT_RUBY */
+
+#define onig_st_init_table st_init_table
+#define onig_st_init_table_with_size st_init_table_with_size
+#define onig_st_init_numtable st_init_numtable
+#define onig_st_init_numtable_with_size st_init_numtable_with_size
+#define onig_st_init_strtable st_init_strtable
+#define onig_st_init_strtable_with_size st_init_strtable_with_size
+#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
+#define onig_st_delete st_delete
+#define onig_st_delete_safe st_delete_safe
+#define onig_st_insert st_insert
+#define onig_st_insert_strend st_insert_strend
+#define onig_st_lookup st_lookup
+#define onig_st_lookup_strend st_lookup_strend
+#define onig_st_foreach st_foreach
+#define onig_st_add_direct st_add_direct
+#define onig_st_add_direct_strend st_add_direct_strend
+#define onig_st_free_table st_free_table
+#define onig_st_cleanup_safe st_cleanup_safe
+#define onig_st_copy st_copy
+#define onig_st_nothing_key_clone st_nothing_key_clone
+#define onig_st_nothing_key_free st_nothing_key_free
+
+#endif /* NOT_RUBY */
+
+
#ifdef HAVE_STDLIB_H
#include
#endif
@@ -109,7 +201,11 @@
#endif
#include
+#ifdef HAVE_SYS_TYPES_H
+#ifndef __BORLANDC__
#include
+#endif
+#endif
#ifdef ONIG_DEBUG
# include
@@ -292,6 +388,8 @@ typedef unsigned int BitStatusType;
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
+#define REPEAT_INFINITE -1
+#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
/* bitset */
#define BITS_PER_BYTE 8
@@ -449,6 +547,7 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
+ OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@@ -501,6 +600,8 @@ enum OpCode {
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
+ OP_REPEAT_INC_SG, /* search and get in stack */
+ OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
@@ -529,11 +630,12 @@ enum OpCode {
#define ARG_MEMNUM 4
#define ARG_OPTION 5
-typedef short int RelAddrType;
-typedef short int AbsAddrType;
-typedef short int LengthType;
-typedef short int MemNumType;
-typedef int RepeatNumType;
+typedef int RelAddrType;
+typedef int AbsAddrType;
+typedef int LengthType;
+typedef int RepeatNumType;
+typedef short int MemNumType;
+typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@@ -543,57 +645,33 @@ typedef int RepeatNumType;
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
+#define SIZE_POINTER sizeof(PointerType)
+
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-#define GET_RELADDR_INC(addr,p) do{\
- addr = *((RelAddrType* )(p));\
- (p) += SIZE_RELADDR;\
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+ val = *(type* )p;\
+ (p) += sizeof(type);\
} while(0)
-#define GET_ABSADDR_INC(addr,p) do{\
- addr = *((AbsAddrType* )(p));\
- (p) += SIZE_ABSADDR;\
-} while(0)
-
-#define GET_LENGTH_INC(len,p) do{\
- len = *((LengthType* )(p));\
- (p) += SIZE_LENGTH;\
-} while(0)
-
-#define GET_MEMNUM_INC(num,p) do{\
- num = *((MemNumType* )(p));\
- (p) += SIZE_MEMNUM;\
-} while(0)
-
-#define GET_REPEATNUM_INC(num,p) do{\
- num = *((RepeatNumType* )(p));\
- (p) += SIZE_REPEATNUM;\
-} while(0)
-
-#define GET_OPTION_INC(option,p) do{\
- option = *((OnigOptionType* )(p));\
- (p) += SIZE_OPTION;\
-} while(0)
#else
-#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
-#define GET_ABSADDR_INC(addr,p) GET_SHORT_INC(addr,p)
-#define GET_LENGTH_INC(len,p) GET_SHORT_INC(len,p)
-#define GET_MEMNUM_INC(num,p) GET_SHORT_INC(num,p)
-#define GET_REPEATNUM_INC(num,p) GET_INT_INC(num,p)
-#define GET_OPTION_INC(option,p) GET_UINT_INC(option,p)
-
-#define SERIALIZE_RELADDR(addr,p) SERIALIZE_SHORT(addr,p)
-#define SERIALIZE_ABSADDR(addr,p) SERIALIZE_SHORT(addr,p)
-#define SERIALIZE_LENGTH(len,p) SERIALIZE_SHORT(len,p)
-#define SERIALIZE_MEMNUM(num,p) SERIALIZE_SHORT(num,p)
-#define SERIALIZE_REPEATNUM(num,p) SERIALIZE_INT(num,p)
-#define SERIALIZE_OPTION(option,p) SERIALIZE_UINT(option,p)
-
-#define SERIALIZE_BUFSIZE SIZEOF_INT
+#define PLATFORM_GET_INC(val,p,type) do{\
+ xmemcpy(&val, (p), sizeof(type));\
+ (p) += sizeof(type);\
+} while(0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
+#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
+#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
+#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
+#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
+#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
+#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
+
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
@@ -636,23 +714,53 @@ typedef int RepeatNumType;
#define SIZE_OP_RETURN SIZE_OPCODE
+#define MC_ESC(enc) (enc)->meta_char_table.esc
+#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
+#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
+#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
+#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
+
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
+/* cclass node */
+#define FLAG_CCLASS_NOT 1
+#define FLAG_CCLASS_SHARE (1<<1)
+
+#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
+#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
+#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
+#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
+#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
+
typedef struct {
- UChar esc;
- UChar anychar;
- UChar anytime;
- UChar zero_or_one_time;
- UChar one_or_more_time;
- UChar anychar_anytime;
-} OnigMetaCharTableType;
-
-extern OnigMetaCharTableType OnigMetaCharTable;
-
-#define MC_ESC OnigMetaCharTable.esc
-#define MC_ANYCHAR OnigMetaCharTable.anychar
-#define MC_ANYTIME OnigMetaCharTable.anytime
-#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
-#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
-#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
+ int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
#ifdef ONIG_DEBUG
@@ -665,7 +773,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@@ -675,11 +783,11 @@ extern void onig_print_statistics P_((FILE* f));
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
-extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
-extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
-extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
+extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
+extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
-extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
+extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
+extern void onig_transfer P_((regex_t* to, regex_t* from));
#endif /* REGINT_H */
diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c
index 2260df41555..58e122f4869 100644
--- a/ext/mbstring/oniguruma/regparse.c
+++ b/ext/mbstring/oniguruma/regparse.c
@@ -1,120 +1,36 @@
/**********************************************************************
-
regparse.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "regparse.h"
#define WARN_BUFSIZE 256
-#define SYN_POSIX_COMMON_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
- ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
- ONIG_SYN_OP_LINE_ANCHOR | \
- ONIG_SYN_OP_ESC_CONTROL_CHARS )
-
-#define SYN_GNU_REGEX_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
- ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
- ONIG_SYN_OP_VBAR_ALT | \
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
- ONIG_SYN_OP_QMARK_ZERO_ONE | \
- ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
- ONIG_SYN_OP_ESC_W_WORD | \
- ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
- ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
- ONIG_SYN_OP_LINE_ANCHOR )
-
-#define SYN_GNU_REGEX_BV \
- ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
- ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
- ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
-
-#ifdef USE_VARIABLE_SYNTAX
-OnigSyntaxType OnigSyntaxPosixBasic = {
- ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
- ONIG_SYN_OP_ESC_BRACE_INTERVAL )
- , 0
- , 0
- , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
-};
-
-OnigSyntaxType OnigSyntaxPosixExtended = {
- ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
- ONIG_SYN_OP_BRACE_INTERVAL |
- ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
- , 0
- , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
- ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
- ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
- ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
- , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
-};
-
-OnigSyntaxType OnigSyntaxEmacs = {
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
- ONIG_SYN_OP_ESC_BRACE_INTERVAL |
- ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
- ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
- ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
- , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
- , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxGrep = {
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
- ONIG_SYN_OP_ESC_VBAR_ALT |
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
- ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
- ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
- ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
- , 0
- , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxGnuRegex = {
- SYN_GNU_REGEX_OP
- , 0
- , SYN_GNU_REGEX_BV
- , ONIG_OPTION_NONE
-};
-
-OnigSyntaxType OnigSyntaxJava = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
- ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
- ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
- ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
- , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
- , ONIG_OPTION_SINGLELINE
-};
-
-OnigSyntaxType OnigSyntaxPerl = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
- ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
- ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY )
- , SYN_GNU_REGEX_BV
- , ONIG_OPTION_SINGLELINE
-};
-#endif /* USE_VARIABLE_SYNTAX */
-
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
@@ -127,12 +43,14 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
- ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB )
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
+ ONIG_SYN_OP2_ESC_H_XDIGIT )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
+ ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
@@ -140,89 +58,7 @@ OnigSyntaxType OnigSyntaxRuby = {
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
-#ifdef USE_VARIABLE_SYNTAX
-extern int
-onig_set_default_syntax(OnigSyntaxType* syntax)
-{
- if (IS_NULL(syntax))
- syntax = ONIG_SYNTAX_RUBY;
-
- OnigDefaultSyntax = syntax;
- return 0;
-}
-
-extern void
-onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
-{
- *to = *from;
-}
-
-extern void
-onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
-{
- syntax->op = op;
-}
-
-extern void
-onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
-{
- syntax->op2 = op2;
-}
-
-extern void
-onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
-{
- syntax->behavior = behavior;
-}
-
-extern void
-onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
-{
- syntax->options = options;
-}
-#endif
-
-OnigMetaCharTableType OnigMetaCharTable = {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )0 /* anychar '.' */
- , (OnigCodePoint )0 /* anytime '*' */
- , (OnigCodePoint )0 /* zero or one time '?' */
- , (OnigCodePoint )0 /* one or more time '+' */
- , (OnigCodePoint )0 /* anychar anytime */
-};
-
-#ifdef USE_VARIABLE_META_CHARS
-extern int onig_set_meta_char(unsigned int what, unsigned int c)
-{
- switch (what) {
- case ONIG_META_CHAR_ESCAPE:
- OnigMetaCharTable.esc = c;
- break;
- case ONIG_META_CHAR_ANYCHAR:
- OnigMetaCharTable.anychar = c;
- break;
- case ONIG_META_CHAR_ANYTIME:
- OnigMetaCharTable.anytime = c;
- break;
- case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
- OnigMetaCharTable.zero_or_one_time = c;
- break;
- case ONIG_META_CHAR_ONE_OR_MORE_TIME:
- OnigMetaCharTable.one_or_more_time = c;
- break;
- case ONIG_META_CHAR_ANYCHAR_ANYTIME:
- OnigMetaCharTable.anychar_anytime = c;
- break;
- default:
- return ONIGERR_INVALID_ARGUMENT;
- break;
- }
- return 0;
-}
-#endif /* USE_VARIABLE_META_CHARS */
-
-
-extern void onig_null_warn(char* s) { }
+extern void onig_null_warn(const char* s) { }
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
@@ -272,12 +108,15 @@ bbuf_clone(BBuf** rto, BBuf* from)
#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
-#define SET_ALL_MULTI_BYTE_RANGE(pbuf) \
- add_code_range_to_buf(pbuf, (OnigCodePoint )0x80, ~((OnigCodePoint )0))
+#define MBCODE_START_POS(enc) \
+ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
-#define ADD_ALL_MULTI_BYTE_RANGE(code, mbuf) do {\
- if (! ONIGENC_IS_SINGLEBYTE(code)) {\
- r = SET_ALL_MULTI_BYTE_RANGE(&(mbuf));\
+#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
+ add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
+
+#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
+ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
if (r) return r;\
}\
} while (0)
@@ -359,7 +198,7 @@ bitset_copy(BitSetRef dest, BitSetRef bs)
}
extern int
-onig_strncmp(UChar* s1, UChar* s2, int n)
+onig_strncmp(const UChar* s1, const UChar* s2, int n)
{
int x;
@@ -371,7 +210,7 @@ onig_strncmp(UChar* s1, UChar* s2, int n)
}
static void
-k_strcpy(UChar* dest, UChar* src, UChar* end)
+k_strcpy(UChar* dest, const UChar* src, const UChar* end)
{
int len = end - src;
if (len > 0) {
@@ -380,33 +219,47 @@ k_strcpy(UChar* dest, UChar* src, UChar* end)
}
}
-extern UChar*
-onig_strdup(UChar* s, UChar* end)
+static UChar*
+strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
- int len = end - s;
+ int slen, term_len, i;
+ UChar *r;
- if (len > 0) {
- UChar* r = (UChar* )xmalloc(len + 1);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, len);
- r[len] = (UChar )0;
- return r;
- }
- else return NULL;
+ slen = end - s;
+ term_len = ONIGENC_MBC_MINLEN(enc);
+
+ r = (UChar* )xmalloc(slen + term_len);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, slen);
+
+ for (i = 0; i < term_len; i++)
+ r[slen + i] = (UChar )0;
+
+ return r;
}
+
/* scan pattern methods */
-#define PEND_VALUE -1
+#define PEND_VALUE 0
-#define PFETCH(c) do { (c) = *p++; } while (0)
-#define PUNFETCH p--
-#define PINC p++
-#define PPEEK (p < end ? *p : PEND_VALUE)
-#define PEND (p < end ? 0 : 1)
+#define PFETCH_READY UChar* pfetch_prev
+#define PEND (p < end ? 0 : 1)
+#define PUNFETCH p = pfetch_prev
+#define PINC do { \
+ pfetch_prev = p; \
+ p += ONIGENC_MBC_ENC_LEN(enc, p); \
+} while (0)
+#define PFETCH(c) do { \
+ c = ONIGENC_MBC_TO_CODE(enc, p, end); \
+ pfetch_prev = p; \
+ p += ONIGENC_MBC_ENC_LEN(enc, p); \
+} while (0)
+#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
+#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
static UChar*
-k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end,
+k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
int capa)
{
UChar* r;
@@ -424,7 +277,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, UChar* src, UChar* src_end,
/* dest on static area */
static UChar*
strcat_capa_from_static(UChar* dest, UChar* dest_end,
- UChar* src, UChar* src_end, int capa)
+ const UChar* src, const UChar* src_end, int capa)
{
UChar* r;
@@ -450,7 +303,7 @@ typedef struct {
#ifdef USE_ST_HASH_TABLE
-#include
+#include "st.h"
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@@ -487,7 +340,7 @@ onig_print_names(FILE* fp, regex_t* reg)
if (IS_NOT_NULL(t)) {
fprintf(fp, "name table\n");
- st_foreach(t, i_print_name_entry, (HashDataType )fp);
+ onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
fputs("\n", fp);
}
return 0;
@@ -508,7 +361,7 @@ names_clear(regex_t* reg)
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
- st_foreach(t, i_free_name_entry, 0);
+ onig_st_foreach(t, i_free_name_entry, 0);
}
return 0;
}
@@ -523,56 +376,39 @@ onig_names_free(regex_t* reg)
if (r) return r;
t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) st_free_table(t);
+ if (IS_NOT_NULL(t)) onig_st_free_table(t);
reg->name_table = (void* )NULL;
return 0;
}
static NameEntry*
-name_find(regex_t* reg, UChar* name, UChar* name_end)
+name_find(regex_t* reg, const UChar* name, const UChar* name_end)
{
- int len;
- UChar namebuf[NAMEBUF_SIZE_1];
- UChar *key;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
e = (NameEntry* )NULL;
if (IS_NOT_NULL(t)) {
- if (*name_end == '\0') {
- key = name;
- }
- else {
- /* dirty, but st.c API claims NULL terminated key. */
- len = name_end - name;
- if (len <= NAMEBUF_SIZE) {
- xmemcpy(namebuf, name, len);
- namebuf[len] = '\0';
- key = namebuf;
- }
- else {
- key = onig_strdup(name, name_end);
- if (IS_NULL(key)) return (NameEntry* )NULL;
- }
- }
-
- st_lookup(t, (HashDataType )key, (HashDataType * )&e);
- if (key != name && key != namebuf) xfree(key);
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
}
return e;
}
typedef struct {
- int (*func)(UChar*,UChar*,int,int*,regex_t*,void*);
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
int ret;
+ OnigEncoding enc;
} INamesArg;
static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
- int r = (*(arg->func))(e->name, e->name + strlen(e->name), e->back_num,
+ int r = (*(arg->func))(e->name,
+ /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
+ e->name + e->name_len,
+ e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
arg->reg, arg->arg);
if (r != 0) {
@@ -584,8 +420,8 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
{
INamesArg narg;
NameTable* t = (NameTable* )reg->name_table;
@@ -595,11 +431,41 @@ onig_foreach_name(regex_t* reg,
narg.func = func;
narg.reg = reg;
narg.arg = arg;
- st_foreach(t, i_names, (HashDataType )&narg);
+ narg.enc = reg->enc; /* should be pattern encoding. */
+ onig_st_foreach(t, i_names, (HashDataType )&narg);
}
return narg.ret;
}
+static int
+i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
+{
+ int i;
+
+ if (e->back_num > 1) {
+ for (i = 0; i < e->back_num; i++) {
+ e->back_refs[i] = map[e->back_refs[i]].new_val;
+ }
+ }
+ else if (e->back_num == 1) {
+ e->back_ref1 = map[e->back_ref1].new_val;
+ }
+
+ return ST_CONTINUE;
+}
+
+extern int
+onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_renumber_name, (HashDataType )map);
+ }
+ return 0;
+}
+
+
extern int
onig_number_of_names(regex_t* reg)
{
@@ -719,8 +585,8 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
{
int i, r;
NameEntry* e;
@@ -765,14 +631,16 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
if (IS_NULL(e)) {
#ifdef USE_ST_HASH_TABLE
if (IS_NULL(t)) {
- reg->name_table = t = st_init_strtable();
+ t = onig_st_init_strend_table_with_size(5);
+ reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
- e->name = onig_strdup(name, name_end);
+ e->name = strdup_with_null(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
- st_insert(t, (HashDataType )e->name, (HashDataType )e);
+ onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
e->name_len = name_end - name;
e->back_num = 0;
@@ -817,7 +685,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
}
e = &(t->e[t->num]);
t->num++;
- e->name = onig_strdup(name, name_end);
+ e->name = strdup_with_null(reg->enc, name, name_end);
e->name_len = name_end - name;
#endif
}
@@ -857,8 +725,8 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
}
extern int
-onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
- int** nums)
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
{
NameEntry* e;
@@ -879,8 +747,8 @@ onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
}
extern int
-onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
- OnigRegion *region)
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion *region)
{
int i, n, *nums;
@@ -905,23 +773,23 @@ onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
#else /* USE_NAMED_GROUP */
extern int
-onig_name_to_group_numbers(regex_t* reg, UChar* name, UChar* name_end,
- int** nums)
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
-onig_name_to_backref_number(regex_t* reg, UChar* name, UChar* name_end,
- OnigRegion* region)
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion* region)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
onig_foreach_name(regex_t* reg,
- int (*func)(UChar*,UChar*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
+ void* arg)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -1014,6 +882,7 @@ static FreeNode* FreeNodeList = (FreeNode* )NULL;
extern void
onig_node_free(Node* node)
{
+ start:
if (IS_NULL(node)) return ;
switch (NTYPE(node)) {
@@ -1026,12 +895,38 @@ onig_node_free(Node* node)
case N_LIST:
case N_ALT:
onig_node_free(NCONS(node).left);
- onig_node_free(NCONS(node).right);
+ /* onig_node_free(NCONS(node).right); */
+ {
+ Node* next_node = NCONS(node).right;
+
+#ifdef USE_RECYCLE_NODE
+ {
+ FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ THREAD_ATOMIC_END;
+ }
+#else
+ xfree(node);
+#endif
+
+ node = next_node;
+ goto start;
+ }
break;
case N_CCLASS:
- if (NCCLASS(node).mbuf)
- bbuf_free(NCCLASS(node).mbuf);
+ {
+ CClassNode* cc = &(NCCLASS(node));
+
+ if (IS_CCLASS_SHARE(cc))
+ return ;
+
+ if (cc->mbuf)
+ bbuf_free(cc->mbuf);
+ }
break;
case N_QUALIFIER:
@@ -1057,11 +952,12 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
{
- FreeNode* n;
+ FreeNode* n = (FreeNode* )node;
- n = (FreeNode* )node;
+ THREAD_ATOMIC_START;
n->next = FreeNodeList;
FreeNodeList = n;
+ THREAD_ATOMIC_END;
}
#else
xfree(node);
@@ -1092,8 +988,10 @@ node_new()
#ifdef USE_RECYCLE_NODE
if (IS_NOT_NULL(FreeNodeList)) {
+ THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
+ THREAD_ATOMIC_END;
return node;
}
#endif
@@ -1107,8 +1005,8 @@ static void
initialize_cclass(CClassNode* cc)
{
BITSET_CLEAR(cc->bs);
- cc->not = 0;
- cc->mbuf = NULL;
+ cc->flags = 0;
+ cc->mbuf = NULL;
}
static Node*
@@ -1122,6 +1020,54 @@ node_new_cclass()
return node;
}
+extern Node*
+node_new_cclass_by_codepoint_range(int not,
+ OnigCodePoint sbr[], OnigCodePoint mbr[])
+{
+ CClassNode* cc;
+ int n, i, j;
+
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CCLASS;
+
+ cc = &(NCCLASS(node));
+ cc->flags = 0;
+ if (not != 0) CCLASS_SET_NOT(cc);
+
+ BITSET_CLEAR(cc->bs);
+ if (IS_NOT_NULL(sbr)) {
+ n = ONIGENC_CODE_RANGE_NUM(sbr);
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ }
+
+ if (IS_NULL(mbr)) {
+ is_null:
+ cc->mbuf = NULL;
+ }
+ else {
+ BBuf* bbuf;
+
+ n = ONIGENC_CODE_RANGE_NUM(mbr);
+ if (n == 0) goto is_null;
+
+ bbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(bbuf, NULL);
+ bbuf->alloc = n + 1;
+ bbuf->used = n + 1;
+ bbuf->p = (UChar* )((void* )mbr);
+
+ cc->mbuf = bbuf;
+ }
+
+ return node;
+}
+
static Node*
node_new_ctype(int type)
{
@@ -1152,6 +1098,12 @@ node_new_list(Node* left, Node* right)
return node;
}
+extern Node*
+onig_node_new_list(Node* left, Node* right)
+{
+ return node_new_list(left, right);
+}
+
static Node*
node_new_alt(Node* left, Node* right)
{
@@ -1237,6 +1189,7 @@ node_new_qualifier(int lower, int upper, int by_number)
Node* node = node_new();
CHECK_NULL_RETURN(node);
node->type = N_QUALIFIER;
+ NQUALIFIER(node).state = 0;
NQUALIFIER(node).target = NULL;
NQUALIFIER(node).lower = lower;
NQUALIFIER(node).upper = upper;
@@ -1295,7 +1248,7 @@ node_new_option(OnigOptionType option)
}
extern int
-onig_node_str_cat(Node* node, UChar* s, UChar* end)
+onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
{
int addlen = end - s;
@@ -1350,8 +1303,22 @@ onig_node_conv_to_str_node(Node* node, int flag)
NSTRING(node).end = NSTRING(node).buf;
}
+extern void
+onig_node_str_clear(Node* node)
+{
+ if (NSTRING(node).capa != 0 &&
+ IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
+ xfree(NSTRING(node).s);
+ }
+
+ NSTRING(node).capa = 0;
+ NSTRING(node).flag = 0;
+ NSTRING(node).s = NSTRING(node).buf;
+ NSTRING(node).end = NSTRING(node).buf;
+}
+
static Node*
-node_new_str(UChar* s, UChar* end)
+node_new_str(const UChar* s, const UChar* end)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1368,6 +1335,12 @@ node_new_str(UChar* s, UChar* end)
return node;
}
+extern Node*
+onig_node_new_str(const UChar* s, const UChar* end)
+{
+ return node_new_str(s, end);
+}
+
static Node*
node_new_str_raw(UChar* s, UChar* end)
{
@@ -1382,15 +1355,6 @@ node_new_empty()
return node_new_str(NULL, NULL);
}
-static Node*
-node_new_str_char(UChar c)
-{
- UChar p[1];
-
- p[0] = c;
- return node_new_str(p, p + 1);
-}
-
static Node*
node_new_str_raw_char(UChar c)
{
@@ -1403,7 +1367,7 @@ node_new_str_raw_char(UChar c)
static Node*
str_node_split_last_char(StrNode* sn, OnigEncoding enc)
{
- UChar *p;
+ const UChar *p;
Node* n = NULL_NODE;
if (sn->end > sn->s) {
@@ -1412,7 +1376,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)
n = node_new_str(p, sn->end);
if ((sn->flag & NSTR_RAW) != 0)
NSTRING_SET_RAW(n);
- sn->end = p;
+ sn->end = (UChar* )p;
}
}
return n;
@@ -1422,17 +1386,18 @@ static int
str_node_can_be_split(StrNode* sn, OnigEncoding enc)
{
if (sn->end > sn->s) {
- return ((enc_len(enc, *(sn->s)) < sn->end - sn->s) ? 1 : 0);
+ return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
}
return 0;
}
extern int
-onig_scan_unsigned_number(UChar** src, UChar* end, OnigEncoding enc)
+onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
{
unsigned int num, val;
- int c;
+ OnigCodePoint c;
UChar* p = *src;
+ PFETCH_READY;
num = 0;
while (!PEND) {
@@ -1457,9 +1422,10 @@ static int
scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
OnigEncoding enc)
{
- int c;
+ OnigCodePoint c;
unsigned int num, val;
UChar* p = *src;
+ PFETCH_READY;
num = 0;
while (!PEND && maxlen-- != 0) {
@@ -1484,9 +1450,10 @@ static int
scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
OnigEncoding enc)
{
- int c;
+ OnigCodePoint c;
unsigned int num, val;
UChar* p = *src;
+ PFETCH_READY;
num = 0;
while (!PEND && maxlen-- != 0) {
@@ -1622,15 +1589,15 @@ add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
}
static int
-not_code_range_buf(BBuf* bbuf, BBuf** pbuf)
+not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
{
int r, i, n;
- OnigCodePoint pre, from, to, *data;
+ OnigCodePoint pre, from, *data, to = 0;
*pbuf = (BBuf* )NULL;
if (IS_NULL(bbuf)) {
set_all:
- return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
}
data = (OnigCodePoint* )(bbuf->p);
@@ -1639,7 +1606,7 @@ not_code_range_buf(BBuf* bbuf, BBuf** pbuf)
if (n <= 0) goto set_all;
r = 0;
- pre = 0x80;
+ pre = MBCODE_START_POS(enc);
for (i = 0; i < n; i++) {
from = data[i*2];
to = data[i*2+1];
@@ -1664,7 +1631,8 @@ not_code_range_buf(BBuf* bbuf, BBuf** pbuf)
} while (0)
static int
-or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
+or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
+ BBuf* bbuf2, int not2, BBuf** pbuf)
{
int r;
OnigCodePoint i, n1, *data1;
@@ -1673,7 +1641,7 @@ or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
*pbuf = (BBuf* )NULL;
if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
if (not1 != 0 || not2 != 0)
- return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
return 0;
}
@@ -1683,14 +1651,14 @@ or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
if (IS_NULL(bbuf1)) {
if (not1 != 0) {
- return SET_ALL_MULTI_BYTE_RANGE(pbuf);
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
}
else {
if (not2 == 0) {
return bbuf_clone(pbuf, bbuf2);
}
else {
- return not_code_range_buf(bbuf2, pbuf);
+ return not_code_range_buf(enc, bbuf2, pbuf);
}
}
}
@@ -1706,7 +1674,7 @@ or_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
r = bbuf_clone(pbuf, bbuf2);
}
else if (not1 == 0) { /* 1 OR (not 2) */
- r = not_code_range_buf(bbuf2, pbuf);
+ r = not_code_range_buf(enc, bbuf2, pbuf);
}
if (r != 0) return r;
@@ -1816,6 +1784,29 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
return 0;
}
+static int
+clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
+{
+ BBuf *tbuf;
+ int r;
+
+ if (IS_CCLASS_NOT(cc)) {
+ bitset_invert(cc->bs);
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ r = not_code_range_buf(enc, cc->mbuf, &tbuf);
+ if (r != 0) return r;
+
+ bbuf_free(cc->mbuf);
+ cc->mbuf = tbuf;
+ }
+
+ CCLASS_CLEAR_NOT(cc);
+ }
+
+ return 0;
+}
+
static int
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
{
@@ -1824,10 +1815,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
- not1 = dest->not;
+ not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
- not2 = cc->not;
+ not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@@ -1850,13 +1841,13 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
if (not1 != 0 && not2 != 0) {
- r = or_code_range_buf(buf1, 0, buf2, 0, &pbuf);
+ r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
}
else {
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
if (r == 0 && not1 != 0) {
BBuf *tbuf;
- r = not_code_range_buf(pbuf, &tbuf);
+ r = not_code_range_buf(enc, pbuf, &tbuf);
if (r != 0) {
bbuf_free(pbuf);
return r;
@@ -1882,10 +1873,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
- not1 = dest->not;
+ not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
- not2 = cc->not;
+ not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@@ -1911,10 +1902,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
}
else {
- r = or_code_range_buf(buf1, not1, buf2, not2, &pbuf);
+ r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
if (r == 0 && not1 != 0) {
BBuf *tbuf;
- r = not_code_range_buf(pbuf, &tbuf);
+ r = not_code_range_buf(enc, pbuf, &tbuf);
if (r != 0) {
bbuf_free(pbuf);
return r;
@@ -2014,26 +2005,29 @@ popular_qualifier_num(QualifierNode* qf)
return -1;
}
+
+enum ReduceType {
+ RQ_ASIS = 0, /* as is */
+ RQ_DEL = 1, /* delete parent */
+ RQ_A, /* to '*' */
+ RQ_AQ, /* to '*?' */
+ RQ_QQ, /* to '??' */
+ RQ_P_QQ, /* to '+)??' */
+ RQ_PQ_Q, /* to '+?)?' */
+};
+
+static enum ReduceType ReduceTypeTable[6][6] = {
+ {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
+ {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
+ {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
+ {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
+};
+
extern void
onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
{
-#define NQ_ASIS 0 /* as is */
-#define NQ_DEL 1 /* delete parent */
-#define NQ_A 2 /* to '*' */
-#define NQ_AQ 3 /* to '*?' */
-#define NQ_QQ 4 /* to '??' */
-#define NQ_P_QQ 5 /* to '+)??' */
-#define NQ_PQ_Q 6 /* to '+?)?' */
-
- static char reduces[][6] = {
- {NQ_DEL, NQ_A, NQ_A, NQ_QQ, NQ_AQ, NQ_ASIS}, /* '?' */
- {NQ_DEL, NQ_DEL, NQ_DEL, NQ_P_QQ, NQ_P_QQ, NQ_DEL}, /* '*' */
- {NQ_A, NQ_A, NQ_DEL, NQ_ASIS, NQ_P_QQ, NQ_DEL}, /* '+' */
- {NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL, NQ_AQ, NQ_AQ}, /* '??' */
- {NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL, NQ_DEL}, /* '*?' */
- {NQ_ASIS, NQ_PQ_Q, NQ_DEL, NQ_AQ, NQ_AQ, NQ_DEL} /* '+?' */
- };
-
int pnum, cnum;
QualifierNode *p, *c;
@@ -2042,35 +2036,35 @@ onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
pnum = popular_qualifier_num(p);
cnum = popular_qualifier_num(c);
- switch(reduces[cnum][pnum]) {
- case NQ_DEL:
+ switch(ReduceTypeTable[cnum][pnum]) {
+ case RQ_DEL:
*p = *c;
break;
- case NQ_A:
+ case RQ_A:
p->target = c->target;
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
break;
- case NQ_AQ:
+ case RQ_AQ:
p->target = c->target;
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
break;
- case NQ_QQ:
+ case RQ_QQ:
p->target = c->target;
p->lower = 0; p->upper = 1; p->greedy = 0;
break;
- case NQ_P_QQ:
+ case RQ_P_QQ:
p->target = cnode;
p->lower = 0; p->upper = 1; p->greedy = 0;
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
return ;
break;
- case NQ_PQ_Q:
+ case RQ_PQ_Q:
p->target = cnode;
p->lower = 0; p->upper = 1; p->greedy = 1;
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
return ;
break;
- case NQ_ASIS:
+ case RQ_ASIS:
p->target = cnode;
return ;
break;
@@ -2083,8 +2077,9 @@ onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
enum TokenSyms {
TK_EOT = 0, /* end of token */
- TK_BYTE = 1,
- TK_RAW_BYTE = 2,
+ TK_RAW_BYTE = 1,
+ TK_CHAR,
+ TK_STRING,
TK_CODE_POINT,
TK_ANYCHAR,
TK_CHAR_TYPE,
@@ -2114,6 +2109,7 @@ typedef struct {
int base; /* is number: 8, 16 (used in [....]) */
UChar* backp;
union {
+ UChar* s;
int c;
OnigCodePoint code;
int anchor;
@@ -2145,8 +2141,11 @@ static int
fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
{
int low, up, syn_allow, non_low = 0;
- int c;
+ int r = 0;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
UChar* p = *src;
+ PFETCH_READY;
syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
@@ -2200,12 +2199,13 @@ fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
PUNFETCH;
up = low; /* {n} : exact n times */
+ r = 2; /* fixed */
}
if (PEND) goto invalid;
PFETCH(c);
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
- if (c != MC_ESC) goto invalid;
+ if (c != MC_ESC(enc)) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
@@ -2218,7 +2218,7 @@ fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
tok->u.repeat.lower = low;
tok->u.repeat.upper = up;
*src = p;
- return 0;
+ return r; /* 0: normal {n,m}, 2: fixed {n} */
invalid:
if (syn_allow)
@@ -2231,10 +2231,13 @@ fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
static int
fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
{
- int c;
+ int v;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
UChar* p = *src;
+ PFETCH_READY;
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
switch (c) {
@@ -2245,9 +2248,10 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (c != '-') return ONIGERR_META_CODE_SYNTAX;
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH(c);
- if (c == MC_ESC) {
- c = fetch_escaped_value(&p, end, env);
- if (c < 0) return c;
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
}
c = ((c & 0xff) | 0x80);
}
@@ -2270,9 +2274,10 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
control:
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH(c);
- if (c == MC_ESC) {
- c = fetch_escaped_value(&p, end, env);
- if (c < 0) return c;
+ if (c == MC_ESC(enc)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
}
else if (c == '?')
c = 0177;
@@ -2304,10 +2309,13 @@ static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
static int
fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
{
- int r, len, is_num;
- int c = 0;
+ int r, is_num;
+ OnigCodePoint c = 0;
+ OnigCodePoint first_code;
+ OnigEncoding enc = env->enc;
UChar *name_end;
UChar *p = *src;
+ PFETCH_READY;
name_end = end;
r = 0;
@@ -2317,19 +2325,20 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
}
else {
PFETCH(c);
+ first_code = c;
if (c == '>')
return ONIGERR_EMPTY_GROUP_NAME;
- if (ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
if (ref == 1)
is_num = 1;
else {
r = ONIGERR_INVALID_GROUP_NAME;
}
}
- len = enc_len(env->enc, c);
- while (!PEND && len-- > 1)
- PFETCH(c);
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
}
while (!PEND) {
@@ -2337,35 +2346,28 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
PFETCH(c);
if (c == '>' || c == ')') break;
- len = enc_len(env->enc, c);
if (is_num == 1) {
- if (! ONIGENC_IS_CODE_DIGIT(env->enc, c)) {
- if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) && c != '_')
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (!ONIGENC_IS_CODE_WORD(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
else
r = ONIGERR_INVALID_GROUP_NAME;
}
}
else {
- if (len == 1) {
- if (!ONIGENC_IS_CODE_ALPHA(env->enc, c) &&
- !ONIGENC_IS_CODE_DIGIT(env->enc, c) &&
- c != '_') {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
-
- while (!PEND && len-- > 1)
- PFETCH(c);
}
+
if (c != '>') {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
else {
- c = **src;
- if (ONIGENC_IS_CODE_UPPER(env->enc, c))
+ if (ONIGENC_IS_CODE_ASCII(first_code) &&
+ ONIGENC_IS_CODE_UPPER(enc, first_code))
r = ONIGERR_INVALID_GROUP_NAME;
}
@@ -2384,19 +2386,21 @@ static int
fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
{
int r, len;
- int c = 0;
+ OnigCodePoint c = 0;
UChar *name_end;
+ OnigEncoding enc = env->enc;
UChar *p = *src;
+ PFETCH_READY;
r = 0;
while (!PEND) {
name_end = p;
- PFETCH(c);
- if (enc_len(env->enc, c) > 1)
+ if (enc_len(enc, p) > 1)
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ PFETCH(c);
if (c == '>' || c == ')') break;
- if (! ONIGENC_IS_CODE_DIGIT(env->enc, c))
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
if (c != '>') {
@@ -2457,12 +2461,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enc_len(enc, *p);
+ q = p + enc_len(enc, p);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += enc_len(enc, *q);
+ q += enc_len(enc, q);
}
if (i >= n) {
if (IS_NOT_NULL(next))
@@ -2488,24 +2492,24 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) {
if (in_esc) {
in_esc = 0;
- p += enc_len(enc, *p);
+ p += enc_len(enc, p);
}
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enc_len(enc, *p);
+ q = p + enc_len(enc, p);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += enc_len(enc, *q);
+ q += enc_len(enc, q);
}
if (i >= n) return 1;
- p += enc_len(enc, *p);
+ p += enc_len(enc, p);
}
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
if (x == bad) return 0;
- else if (x == MC_ESC) in_esc = 1;
+ else if (x == MC_ESC(enc)) in_esc = 1;
p = q;
}
}
@@ -2516,10 +2520,13 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
static int
fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
- int c, num;
+ int num;
+ OnigCodePoint c, c2;
OnigSyntaxType* syn = env->syntax;
+ OnigEncoding enc = env->enc;
UChar* prev;
UChar* p = *src;
+ PFETCH_READY;
if (PEND) {
tok->type = TK_EOT;
@@ -2527,7 +2534,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
PFETCH(c);
- tok->type = TK_BYTE;
+ tok->type = TK_CHAR;
tok->base = 0;
tok->u.c = c;
if (c == ']') {
@@ -2536,11 +2543,11 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (c == '-') {
tok->type = TK_CC_RANGE;
}
- else if (c == MC_ESC) {
+ else if (c == MC_ESC(enc)) {
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
tok->escaped = 1;
@@ -2570,14 +2577,34 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CHAR_TYPE;
tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
break;
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_XDIGIT;
+ break;
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_XDIGIT;
+ break;
case 'p':
case 'P':
- if (PPEEK == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ c2 = PPEEK;
+ if (c2 == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
}
break;
@@ -2585,14 +2612,17 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (PEND) break;
prev = p;
- if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
- if (p > prev + 1 && !PEND && PPEEK == '}') {
+ if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
PINC;
tok->type = TK_CODE_POINT;
tok->base = 16;
@@ -2604,7 +2634,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
@@ -2620,14 +2650,14 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
- tok->base = 16;
- tok->u.c = num;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
}
break;
@@ -2636,7 +2666,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
PUNFETCH;
prev = p;
- num = scan_unsigned_octal_number(&p, end, 3, env->enc);
+ num = scan_unsigned_octal_number(&p, end, 3, enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
@@ -2652,19 +2682,19 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = fetch_escaped_value(&p, end, env);
if (num < 0) return num;
if (tok->u.c != num) {
- tok->u.c = num;
- tok->type = TK_RAW_BYTE;
+ tok->u.code = (OnigCodePoint )num;
+ tok->type = TK_CODE_POINT;
}
break;
}
}
else if (c == '[') {
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && PPEEK == ':') {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
tok->backp = p; /* point at '[' is readed */
PINC;
- if (str_exist_check_with_esc(send, 2, p, end, (OnigCodePoint )']',
- env->enc)) {
+ if (str_exist_check_with_esc(send, 2, p, end,
+ (OnigCodePoint )']', enc)) {
tok->type = TK_POSIX_BRACKET_OPEN;
}
else {
@@ -2684,7 +2714,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else if (c == '&') {
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
- !PEND && PPEEK == '&') {
+ !PEND && (PPEEK_IS('&'))) {
PINC;
tok->type = TK_CC_AND;
}
@@ -2698,10 +2728,13 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
- int r, c, num;
+ int r, num;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
OnigSyntaxType* syn = env->syntax;
UChar* prev;
UChar* p = *src;
+ PFETCH_READY;
start:
if (PEND) {
@@ -2709,13 +2742,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
return tok->type;
}
- tok->type = TK_BYTE;
- tok->base = 0;
- PFETCH(c);
- if (c == MC_ESC) {
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ tok->type = TK_STRING;
+ tok->base = 0;
+ tok->backp = p;
+ PFETCH(c);
+ if (c == MC_ESC(enc)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ tok->backp = p;
PFETCH(c);
+
tok->u.c = c;
tok->escaped = 1;
switch (c) {
@@ -2741,37 +2778,42 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
greedy_check:
- if (!PEND && PPEEK == '?' &&
+ if (!PEND && PPEEK_IS('?') &&
IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
PFETCH(c);
tok->u.repeat.greedy = 0;
tok->u.repeat.possessive = 0;
}
- else if (!PEND && PPEEK == '+' &&
- ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
- tok->type != TK_INTERVAL) ||
- (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
- tok->type == TK_INTERVAL))) {
- PFETCH(c);
- tok->u.repeat.greedy = 1;
- tok->u.repeat.possessive = 1;
- }
else {
- tok->u.repeat.greedy = 1;
- tok->u.repeat.possessive = 0;
+ possessive_check:
+ if (!PEND && PPEEK_IS('+') &&
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
+ tok->type != TK_INTERVAL) ||
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
+ tok->type == TK_INTERVAL))) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 1;
+ }
+ else {
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 0;
+ }
}
break;
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- tok->backp = p;
r = fetch_range_qualifier(&p, end, tok, env);
if (r < 0) return r; /* error */
- if (r > 0) {
- /* normal char */
- }
- else
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
goto greedy_check;
+ }
+ /* r == 1 : normal char */
break;
case '|':
@@ -2851,6 +2893,18 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.subtype = CTYPE_NOT_DIGIT;
break;
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_XDIGIT;
+ break;
+
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.subtype = CTYPE_NOT_XDIGIT;
+ break;
+
case 'A':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
begin_buf:
@@ -2891,14 +2945,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (PEND) break;
prev = p;
- if (PPEEK == '{' && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 8, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND && ONIGENC_IS_CODE_XDIGIT(env->enc, *p) && p - prev >= 9)
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
- if (p > prev + 1 && !PEND && PPEEK == '}') {
+ if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )num;
@@ -2909,7 +2965,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 2, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
@@ -2925,14 +2981,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, env->enc);
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
- tok->base = 16;
- tok->u.c = num;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
}
break;
@@ -2940,9 +2996,10 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '5': case '6': case '7': case '8': case '9':
PUNFETCH;
prev = p;
- num = onig_scan_unsigned_number(&p, end, env->enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (num > ONIG_MAX_BACKREF_NUM) return ONIGERR_TOO_BIG_BACKREF_NUMBER;
+ num = onig_scan_unsigned_number(&p, end, enc);
+ if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
+ goto skip_backref;
+ }
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
(num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
@@ -2957,7 +3014,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.by_name = 0;
break;
}
- else if (c == '8' || c == '9') {
+
+ skip_backref:
+ if (c == '8' || c == '9') {
/* normal char */
p = prev; PINC;
break;
@@ -2968,7 +3027,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '0':
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
prev = p;
- num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), env->enc);
+ num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
@@ -3054,11 +3113,20 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'p':
case 'P':
- if (PPEEK == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY)) {
+ if (PPEEK_IS('{') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
PINC;
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c);
+ if (c == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
}
break;
@@ -3068,8 +3136,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num < 0) return num;
/* set_raw: */
if (tok->u.c != num) {
- tok->type = TK_RAW_BYTE;
- tok->u.c = num;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else { /* string */
+ p = tok->backp + enc_len(enc, tok->backp);
}
break;
}
@@ -3081,15 +3152,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
- if (c == MC_ANYCHAR)
+ if (c == MC_ANYCHAR(enc))
goto any_char;
- else if (c == MC_ANYTIME)
+ else if (c == MC_ANYTIME(enc))
goto anytime;
- else if (c == MC_ZERO_OR_ONE_TIME)
+ else if (c == MC_ZERO_OR_ONE_TIME(enc))
goto zero_or_one_time;
- else if (c == MC_ONE_OR_MORE_TIME)
+ else if (c == MC_ONE_OR_MORE_TIME(enc))
goto one_or_more_time;
- else if (c == MC_ANYCHAR_ANYTIME) {
+ else if (c == MC_ANYCHAR_ANYTIME(enc)) {
tok->type = TK_ANYCHAR_ANYTIME;
goto out;
}
@@ -3132,14 +3203,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- tok->backp = p;
r = fetch_range_qualifier(&p, end, tok, env);
if (r < 0) return r; /* error */
- if (r > 0) {
- /* normal char */
- }
- else
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
goto greedy_check;
+ }
+ /* r == 1 : normal char */
break;
case '|':
@@ -3148,6 +3221,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case '(':
+ if (PPEEK_IS('?') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PPEEK_IS('#')) {
+ PFETCH(c);
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c == MC_ESC(enc)) {
+ if (!PEND) PFETCH(c);
+ }
+ else {
+ if (c == ')') break;
+ }
+ }
+ goto start;
+ }
+ PUNFETCH;
+ }
+
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
tok->type = TK_SUBEXP_OPEN;
break;
@@ -3185,7 +3278,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_EXTEND(env->option)) {
while (!PEND) {
PFETCH(c);
- if (ONIG_IS_NEWLINE(c))
+ if (ONIGENC_IS_CODE_NEWLINE(enc, c))
break;
}
goto start;
@@ -3199,6 +3292,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
default:
+ /* string */
break;
}
}
@@ -3209,48 +3303,57 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
static int
-add_ctype_to_cc_by_list(CClassNode* cc, int ctype, int not,
- OnigEncoding enc)
+add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
+ OnigCodePoint sbr[], OnigCodePoint mbr[])
{
- int i, r, nsb, nmb;
- OnigCodePointRange *sbr, *mbr;
+ int i, r;
OnigCodePoint j;
- r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
- if (r != 0) return r;
+ int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
+ int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
if (not == 0) {
for (i = 0; i < nsb; i++) {
- for (j = sbr[i].from; j <= sbr[i].to; j++) {
- BITSET_SET_BIT(cc->bs, j);
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
}
}
+
for (i = 0; i < nmb; i++) {
- r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
+ r = add_code_range_to_buf(&(cc->mbuf),
+ ONIGENC_CODE_RANGE_FROM(mbr, i),
+ ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r;
}
}
else {
OnigCodePoint prev = 0;
- for (i = 0; i < nsb; i++) {
- for (j = prev; j < sbr[i].from; j++) {
- BITSET_SET_BIT(cc->bs, j);
+
+ if (ONIGENC_MBC_MINLEN(enc) == 1) {
+ for (i = 0; i < nsb; i++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
}
- prev = sbr[i].to + 1;
- }
- if (prev < 0x7f) {
- for (j = prev; j < 0x7f; j++) {
- BITSET_SET_BIT(cc->bs, j);
+ if (prev < 0x7f) {
+ for (j = prev; j < 0x7f; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
}
+
+ prev = 0x80;
}
- prev = 0x80;
for (i = 0; i < nmb; i++) {
- if (prev < mbr[i].from) {
- r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
+ if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev,
+ ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
if (r != 0) return r;
}
- prev = mbr[i].to + 1;
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff) {
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
@@ -3258,17 +3361,21 @@ add_ctype_to_cc_by_list(CClassNode* cc, int ctype, int not,
}
}
- return r;
+ return 0;
}
static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
+ OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
- if (ONIGENC_CTYPE_SUPPORT_LEVEL(enc) != ONIGENC_CTYPE_SUPPORT_LEVEL_SB) {
- r = add_ctype_to_cc_by_list(cc, ctype, not, env->enc);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
+ if (r == 0) {
+ return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
+ }
+ else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
}
@@ -3326,7 +3433,8 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! ONIGENC_IS_CODE_SB_WORD(enc, c) && ! ONIGENC_IS_MBC_HEAD(enc, c))
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
+ && ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c);
}
}
@@ -3370,6 +3478,14 @@ parse_ctype_to_enc_ctype(int pctype, int* not)
ctype = ONIGENC_CTYPE_DIGIT;
*not = 1;
break;
+ case CTYPE_XDIGIT:
+ ctype = ONIGENC_CTYPE_XDIGIT;
+ *not = 0;
+ break;
+ case CTYPE_NOT_XDIGIT:
+ ctype = ONIGENC_CTYPE_XDIGIT;
+ *not = 1;
+ break;
default:
return ONIGERR_PARSER_BUG;
break;
@@ -3407,23 +3523,26 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
};
PosixBracketEntryType *pb;
- int not, i, c, r;
+ int not, i, r;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
UChar *p = *src;
+ PFETCH_READY;
- if (PPEEK == '^') {
+ if (PPEEK_IS('^')) {
PINC;
not = 1;
}
else
not = 0;
- if (end - p < POSIX_BRACKET_NAME_MAX_LEN + 1)
+ if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
goto not_posix_bracket;
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (onig_strncmp(p, pb->name, pb->len) == 0) {
- p += pb->len;
- if (end - p < 2 || *p != ':' || *(p+1) != ']')
+ if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
+ p = (UChar* )onigenc_step(enc, p, end, pb->len);
+ if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not, env);
@@ -3442,9 +3561,9 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
PINC;
if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
}
- if (c == ':' && !PEND) {
+ if (c == ':' && ! PEND) {
PINC;
- if (!PEND) {
+ if (! PEND) {
PFETCH(c);
if (c == ']')
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
@@ -3455,7 +3574,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
}
static int
-property_name_to_ctype(UChar* p, UChar* end)
+property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
{
static PosixBracketEntryType PBS[] = {
{ "Alnum", ONIGENC_CTYPE_ALNUM, 5 },
@@ -3477,28 +3596,49 @@ property_name_to_ctype(UChar* p, UChar* end)
PosixBracketEntryType *pb;
int len;
- len = end - p;
+ len = onigenc_strlen(enc, p, end);
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (len == pb->len && onig_strncmp(p, pb->name, pb->len) == 0)
+ if (len == pb->len &&
+ onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;
}
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ return -1;
}
static int
fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
{
int ctype;
- UChar *prev, *p = *src;
- int c = 0;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar *prev, *start, *p = *src;
+ PFETCH_READY;
+
+ /* 'IsXXXX' => 'XXXX' */
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
+ c = PPEEK;
+ if (c == 'I') {
+ PINC;
+ if (! PEND) {
+ c = PPEEK;
+ if (c == 's')
+ PINC;
+ else
+ PUNFETCH;
+ }
+ }
+ }
+
+ start = prev = p;
while (!PEND) {
prev = p;
PFETCH(c);
if (c == '}') {
- ctype = property_name_to_ctype(*src, prev);
- if (ctype < 0) return ctype;
+ ctype = property_name_to_ctype(start, prev, enc);
+ if (ctype < 0) break;
*src = p;
return ctype;
@@ -3507,6 +3647,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
break;
}
+ onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
+ *src, prev);
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}
@@ -3588,6 +3730,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
+ if (*vs > 0xff || v > 0xff)
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
if (*vs > v) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
goto ccs_range_end;
@@ -3602,14 +3747,23 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
}
}
else {
- if (intype == CCV_CODE_POINT && *type == CCV_SB &&
- ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) {
- bitset_set_range(cc->bs, (int )*vs, 0x7f);
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v);
+#if 0
+ if (intype == CCV_CODE_POINT && *type == CCV_SB) {
+#endif
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r;
+#if 0
}
else
return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
+#endif
}
ccs_range_end:
*state = CCS_COMPLETE;
@@ -3631,22 +3785,24 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
}
static int
-char_exist_check(UChar c, UChar* from, UChar* to, int ignore_escaped,
+code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
OnigEncoding enc)
{
int in_esc;
+ OnigCodePoint code;
UChar* p = from;
+ PFETCH_READY;
in_esc = 0;
- while (p < to) {
+ while (! PEND) {
if (ignore_escaped && in_esc) {
in_esc = 0;
}
else {
- if (*p == c) return 1;
- if (*p == MC_ESC) in_esc = 1;
+ PFETCH(code);
+ if (code == c) return 1;
+ if (code == MC_ESC(enc)) in_esc = 1;
}
- p += enc_len(enc, *p);
}
return 0;
}
@@ -3669,7 +3825,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
prev_cc = (CClassNode* )NULL;
*np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
- if (r == TK_BYTE && tok->u.c == '^' && tok->escaped == 0) {
+ if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
neg = 1;
r = fetch_token_in_cc(tok, src, end, env);
}
@@ -3679,11 +3835,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (r < 0) return r;
if (r == TK_CC_CLOSE) {
- if (! char_exist_check(']', *src, env->pattern_end, 1, env->enc))
+ if (! code_exist_check((OnigCodePoint )']',
+ *src, env->pattern_end, 1, env->enc))
return ONIGERR_EMPTY_CHAR_CLASS;
CC_ESC_WARN(env, "]");
- r = tok->type = TK_BYTE; /* allow []...] */
+ r = tok->type = TK_CHAR; /* allow []...] */
}
*np = node = node_new_cclass();
@@ -3696,58 +3853,69 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
while (r != TK_CC_CLOSE) {
fetched = 0;
switch (r) {
- case TK_BYTE:
- len = enc_len(env->enc, tok->u.c);
+ case TK_CHAR:
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
if (len > 1) {
- PUNFETCH;
- v = ONIGENC_MBC_TO_CODE(env->enc, p, end);
- p += len;
in_type = CCV_CODE_POINT;
}
else {
sb_char:
- v = (OnigCodePoint )tok->u.c;
in_type = CCV_SB;
}
+ v = (OnigCodePoint )tok->u.c;
in_israw = 0;
goto val_entry2;
break;
case TK_RAW_BYTE:
- len = enc_len(env->enc, tok->u.c);
- if (len > 1 && tok->base != 0) { /* tok->base != 0 : octal or hexadec. */
+ /* tok->base != 0 : octal or hexadec. */
+ if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- UChar* bufp = buf;
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
+ UChar* psave = p;
int i, base = tok->base;
- if (len > ONIGENC_CODE_TO_MBC_MAXLEN) {
- bufp = (UChar* )xmalloc(len);
- if (IS_NULL(bufp)) {
- r = ONIGERR_MEMORY;
- goto err;
- }
- bufe = bufp + len;
- }
- bufp[0] = tok->u.c;
- for (i = 1; i < len; i++) {
+ buf[0] = tok->u.c;
+ for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto raw_byte_err;
- if (r != TK_RAW_BYTE || tok->base != base) break;
- bufp[i] = tok->u.c;
+ if (r < 0) goto err;
+ if (r != TK_RAW_BYTE || tok->base != base) {
+ fetched = 1;
+ break;
+ }
+ buf[i] = tok->u.c;
}
- if (i < len) {
+
+ if (i < ONIGENC_MBC_MINLEN(env->enc)) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- raw_byte_err:
- if (bufp != buf) xfree(bufp);
goto err;
}
- v = ONIGENC_MBC_TO_CODE(env->enc, bufp, bufe);
- if (bufp != buf) xfree(bufp);
- in_type = CCV_CODE_POINT;
+
+ len = enc_len(env->enc, buf);
+ if (i < len) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ goto err;
+ }
+ else if (i > len) { /* fetch back */
+ p = psave;
+ for (i = 1; i < len; i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ }
+ fetched = 0;
+ }
+
+ if (i == 1) {
+ v = (OnigCodePoint )buf[0];
+ goto raw_single;
+ }
+ else {
+ v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
+ in_type = CCV_CODE_POINT;
+ }
}
else {
v = (OnigCodePoint )tok->u.c;
+ raw_single:
in_type = CCV_SB;
}
in_israw = 1;
@@ -3881,7 +4049,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_AND: /* && */
{
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@@ -3921,7 +4089,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB,
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@@ -3933,16 +4101,28 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
cc = prev_cc;
}
- cc->not = neg;
- if (cc->not != 0 &&
+ if (neg != 0)
+ CCLASS_SET_NOT(cc);
+ else
+ CCLASS_CLEAR_NOT(cc);
+ if (IS_CCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
if (is_empty != 0)
BITSET_IS_EMPTY(cc->bs, is_empty);
- if (is_empty == 0)
- BITSET_SET_BIT(cc->bs, ONIG_NEWLINE);
+
+ if (is_empty == 0) {
+#define NEWLINE_CODE 0x0a
+
+ if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
+ if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
+ BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
+ else
+ add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
+ }
+ }
}
*src = p;
return 0;
@@ -3961,33 +4141,26 @@ static int
parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
{
+ int r, num;
+ int list_capture;
Node *target;
OnigOptionType option;
- int r, c, num;
- int list_capture;
+ OnigEncoding enc = env->enc;
+ OnigCodePoint c;
UChar* p = *src;
+ PFETCH_READY;
*np = NULL;
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
option = env->option;
- if (PPEEK == '?' &&
+ if (PPEEK_IS('?') &&
IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
PINC;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
switch (c) {
- case '#': /* (?#...) comment */
- while (1) {
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- PFETCH(c);
- if (c == ')') break;
- }
- *src = p;
- return 3; /* 3: comment */
- break;
-
case ':': /* (?:...) grouping only */
group:
r = fetch_token(tok, &p, end, env);
@@ -4129,7 +4302,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else if (c == ':') {
OnigOptionType prev = env->option;
- env->option = option;
+ env->option = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
@@ -4185,6 +4358,14 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return 0;
}
+static char* PopularQStr[] = {
+ "?", "*", "+", "??", "*?", "+?"
+};
+
+static char* ReduceQStr[] = {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+};
+
static int
set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
{
@@ -4217,38 +4398,38 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
if (qn->by_number == 0 && qnt->by_number == 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
- if (IS_REPEAT_INFINITE(qn->upper)) {
- if (qn->lower == 0) { /* '*' */
- redundant:
- {
- char buf[WARN_BUFSIZE];
- if (onig_verb_warn != onig_null_warn) {
- onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- "redundant nested repeat operator");
- (*onig_verb_warn)(buf);
- }
- goto warn_exit;
- }
- }
- else if (qn->lower == 1) { /* '+' */
- /* (?:a?)+? only allowed. */
- if (qn->greedy || !(qnt->upper == 1 && qnt->greedy))
- goto redundant;
- }
- }
- else if (qn->upper == 1 && qn->lower == 0) {
- if (qn->greedy) { /* '?' */
- if (!(qnt->lower == 1 && qnt->greedy == 0)) /* not '+?' */
- goto redundant;
- }
- else { /* '??' */
- /* '(?:a+)?? only allowd. (?:a*)?? can be replaced to (?:a+)?? */
- if (!(qnt->greedy && qnt->lower == 1 &&
- IS_REPEAT_INFINITE(qnt->upper)))
- goto redundant;
- }
- }
+ int nestq_num, targetq_num;
+ char buf[WARN_BUFSIZE];
+
+ nestq_num = popular_qualifier_num(qn);
+ targetq_num = popular_qualifier_num(qnt);
+
+ switch(ReduceTypeTable[targetq_num][nestq_num]) {
+ case RQ_ASIS:
+ break;
+
+ case RQ_DEL:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ "redundant nested repeat operator");
+ (*onig_verb_warn)(buf);
+ }
+ goto warn_exit;
+ break;
+
+ default:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ "nested repeat operator %s and %s was replaced with '%s'",
+ PopularQStr[targetq_num], PopularQStr[nestq_num],
+ ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
+ (*onig_verb_warn)(buf);
+ }
+ goto warn_exit;
+ break;
+ }
}
warn_exit:
@@ -4269,74 +4450,151 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
return 0;
}
-#ifdef USE_FOLD_MATCH
static int
-make_alt_node_from_fold_info(OnigEncFoldMatchInfo* info, Node** node)
+make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
+ CClassNode* cc, Node** root)
{
- int i;
- UChar *s, *end;
- Node *root, **ptail, *snode;
-
- ptail = &root;
- for (i = 0; i < info->target_num; i++) {
- s = info->target_str[i];
- end = s + info->target_byte_len[i];
- /* ex.
- U+00DF match "ss" and "SS, but not match "Ss".
- So, string nodes must be raw.
- */
- snode = node_new_str_raw(s, end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-
- *ptail = node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- ptail = &(NCONS(*ptail).right);
- }
- *ptail = NULL_NODE;
- *node = root;
- return 0;
-}
-
-static int
-make_fold_alt_node_from_cc(OnigEncoding enc, CClassNode* cc, Node** root)
-{
- int i, j, flen, len, ncode, n;
- UChar *s, *end, buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- OnigCodePoint* codes;
- Node **ptail, *snode;
- OnigEncFoldMatchInfo* info;
+ int r, i, j, k, clen, len, ncode, n;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node **ptail, *snode = NULL_NODE;
+ OnigCompAmbigCodes* ccs;
+ OnigCompAmbigCodeItem* ci;
+ OnigAmbigType amb;
+ n = 0;
*root = NULL_NODE;
ptail = root;
- ncode = ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc, &codes);
- n = 0;
- for (i = 0; i < ncode; i++) {
- if (onig_is_code_in_cc(enc, codes[i], cc)) {
- len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
- flen = ONIGENC_GET_FOLD_MATCH_INFO(enc, buf, buf + len, &info);
- if (flen > 0) { /* fold */
- for (j = 0; j < info->target_num; j++) {
- s = info->target_str[j];
- end = s + info->target_byte_len[j];
- if (onig_strncmp(s, buf, enc_len(enc, *s)) == 0)
- continue; /* ignore single char. */
- snode = node_new_str_raw(s, end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
+ if ((amb & ambig_flag) == 0) continue;
- *ptail = node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- ptail = &(NCONS(*ptail).right);
- n++;
- }
+ ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
+ for (i = 0; i < ncode; i++) {
+ if (onig_is_code_in_cc(enc, ccs[i].code, cc)) {
+ for (j = 0; j < ccs[i].n; j++) {
+ ci = &(ccs[i].items[j]);
+ if (ci->len > 1) { /* compound only */
+ if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
+
+ clen = ci->len;
+ for (k = 0; k < clen; k++) {
+ len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf);
+
+ if (k == 0) {
+ snode = node_new_str_raw(buf, buf + len);
+ CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+ }
+ else {
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r < 0) return r;
+ }
+ }
+
+ *ptail = node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
+ ptail = &(NCONS(*ptail).right);
+ n++;
+ }
+ }
}
}
}
return n;
}
-#endif
+
+
+#ifdef USE_SHARED_CCLASS_TABLE
+
+#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
+
+/* for ctype node hash table */
+
+typedef struct {
+ OnigEncoding enc;
+ int not;
+ int type;
+} type_cclass_key;
+
+static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
+{
+ if (x->type != y->type) return 1;
+ if (x->enc != y->enc) return 1;
+ if (x->not != y->not) return 1;
+ return 0;
+}
+
+static int type_cclass_hash(type_cclass_key* key)
+{
+ int i, val;
+ unsigned char *p;
+
+ val = 0;
+
+ p = (unsigned char* )&(key->enc);
+ for (i = 0; i < sizeof(key->enc); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ p = (unsigned char* )(&key->type);
+ for (i = 0; i < sizeof(key->type); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ val += key->not;
+ return val + (val >> 5);
+}
+
+static int type_cclass_key_free(st_data_t x)
+{
+ xfree((void* )x);
+ return 0;
+}
+
+static st_data_t type_cclass_key_clone(st_data_t x)
+{
+ type_cclass_key* new_key;
+ type_cclass_key* key = (type_cclass_key* )x;
+
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ *new_key = *key;
+ return (st_data_t )new_key;
+}
+
+static struct st_hash_type type_type_cclass_hash = {
+ type_cclass_cmp,
+ type_cclass_hash,
+ type_cclass_key_free,
+ type_cclass_key_clone
+};
+
+static st_table* OnigTypeCClassTable;
+
+
+static int
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
+{
+ if (IS_NOT_NULL(node)) {
+ CClassNode* cc = &(NCCLASS(node));
+ if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
+ xfree(node);
+ }
+ return ST_DELETE;
+}
+
+extern int
+onig_free_shared_cclass_table()
+{
+ if (IS_NOT_NULL(OnigTypeCClassTable)) {
+ onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+ }
+
+ return 0;
+}
+
+#endif /* USE_SHARED_CCLASS_TABLE */
+
static int
parse_exp(Node** np, OnigToken* tok, int term,
@@ -4346,7 +4604,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
Node* qn;
Node** targetp;
- start:
*np = NULL;
if (tok->type == term)
goto end_of_token;
@@ -4376,11 +4633,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
NEFFECT(*np).target = target;
return tok->type;
}
- else if (r == 3) { /* comment */
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- goto start;
- }
break;
case TK_SUBEXP_CLOSE:
@@ -4391,76 +4643,22 @@ parse_exp(Node** np, OnigToken* tok, int term,
else goto tk_byte;
break;
- case TK_BYTE:
+ case TK_STRING:
tk_byte:
{
- *np = node_new_str_char((UChar )tok->u.c);
+ *np = node_new_str(tok->backp, *src);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
while (1) {
- len = enc_len(env->enc, tok->u.c);
- if (len > 1) {
- r = onig_node_str_cat(*np, *src, *src + len - 1);
- if (r < 0) return r;
- *src += (len - 1);
- }
-
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- if (r != TK_BYTE) break;
+ if (r != TK_STRING) break;
- r = node_str_cat_char(*np, (UChar )tok->u.c);
+ r = onig_node_str_cat(*np, tok->backp, *src);
if (r < 0) return r;
}
- fold_entry:
-#ifdef USE_FOLD_MATCH
- if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
- int flen, ret;
- Node *root, **ptail, *work, *snode, *anode;
- UChar *p, *pprev;
- OnigEncFoldMatchInfo* fold_info;
- StrNode* sn = &(NSTRING(*np));
-
- ptail = &root;
- pprev = sn->s;
- for (p = sn->s; p < sn->end; ) {
- flen = ONIGENC_GET_FOLD_MATCH_INFO(env->enc, p, sn->end, &fold_info);
- if (flen > 0) { /* fold */
- ret = make_alt_node_from_fold_info(fold_info, &anode);
- if (ret != 0) return ret;
- work = node_new_list(anode, NULL);
- CHECK_NULL_RETURN_VAL(work, ONIGERR_MEMORY);
-
- if (pprev < p) {
- snode = node_new_str(pprev, p);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- *ptail = node_new_list(snode, work);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- }
- else {
- *ptail = work;
- }
- ptail = &(NCONS(work).right);
- p += flen;
- pprev = p;
- }
- else
- p += enc_len(env->enc, *p);
- }
- *ptail = NULL_NODE;
- if (IS_NOT_NULL(root)) {
- if (pprev < sn->end) {
- snode = node_new_str(pprev, sn->end);
- CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
- *ptail = node_new_list(snode, NULL_NODE);
- CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
- }
- onig_node_free(*np);
- *np = root;
- }
- }
-#endif
+ string_end:
targetp = np;
goto repeat;
}
@@ -4469,22 +4667,19 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_RAW_BYTE:
tk_raw_byte:
{
- int expect_len;
-
*np = node_new_str_raw_char((UChar )tok->u.c);
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- expect_len = enc_len(env->enc, tok->u.c);
len = 1;
while (1) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
if (r != TK_RAW_BYTE) {
#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- if (len >= expect_len) {
+ if (len >= enc_len(env->enc, NSTRING(*np).s)) {
NSTRING_CLEAR_RAW(*np);
}
#endif
- goto fold_entry;
+ goto string_end;
}
r = node_str_cat_char(*np, (UChar )tok->u.c);
@@ -4510,9 +4705,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_QUOTE_OPEN:
{
- OnigCodePoint end_op[] = { (OnigCodePoint )MC_ESC, (OnigCodePoint )'E' };
+ OnigCodePoint end_op[2];
UChar *qstart, *qend, *nextp;
+ end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
+ end_op[1] = (OnigCodePoint )'E';
qstart = *src;
qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
if (IS_NULL(qend)) {
@@ -4537,17 +4734,69 @@ parse_exp(Node** np, OnigToken* tok, int term,
case CTYPE_NOT_WHITE_SPACE:
case CTYPE_DIGIT:
case CTYPE_NOT_DIGIT:
+ case CTYPE_XDIGIT:
+ case CTYPE_NOT_XDIGIT:
{
CClassNode* cc;
int ctype, not;
- ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
+#ifdef USE_SHARED_CCLASS_TABLE
+ OnigCodePoint *sbr, *mbr;
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- cc = &(NCCLASS(*np));
- add_ctype_to_cc(cc, ctype, 0, env);
- if (not != 0) CCLASS_SET_NOT(cc);
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+ if (r == 0 &&
+ ONIGENC_CODE_RANGE_NUM(mbr)
+ >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
+ type_cclass_key key;
+ type_cclass_key* new_key;
+
+ key.enc = env->enc;
+ key.not = not;
+ key.type = ctype;
+
+ THREAD_ATOMIC_START;
+
+ if (IS_NULL(OnigTypeCClassTable)) {
+ OnigTypeCClassTable
+ = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
+ if (IS_NULL(OnigTypeCClassTable)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
+ (st_data_t* )np)) {
+ THREAD_ATOMIC_END;
+ break;
+ }
+ }
+
+ *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+ if (IS_NULL(*np)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+
+ CCLASS_SET_SHARE(&(NCCLASS(*np)));
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
+ (st_data_t )*np);
+
+ THREAD_ATOMIC_END;
+ }
+ else {
+#endif
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬);
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ add_ctype_to_cc(cc, ctype, 0, env);
+ if (not != 0) CCLASS_SET_NOT(cc);
+#ifdef USE_SHARED_CCLASS_TABLE
+ }
+#endif
}
break;
@@ -4564,27 +4813,66 @@ parse_exp(Node** np, OnigToken* tok, int term,
break;
case TK_CC_OPEN:
- r = parse_char_class(np, tok, src, end, env);
- if (r != 0) return r;
+ {
+ CClassNode* cc;
-#ifdef USE_FOLD_MATCH
- if (IS_IGNORECASE(env->option) && ONIGENC_IS_FOLD_MATCH(env->enc)) {
- int res;
- Node *alt_root, *work;
- CClassNode* cc = &(NCCLASS(*np));
+ r = parse_char_class(np, tok, src, end, env);
+ if (r != 0) return r;
- res = make_fold_alt_node_from_cc(env->enc, cc, &alt_root);
- if (res < 0) return res;
- if (res > 0) {
- work = node_new_alt(*np, alt_root);
- if (IS_NULL(work)) {
- onig_node_free(alt_root);
- return ONIGERR_MEMORY;
- }
- *np = work;
+ cc = &(NCCLASS(*np));
+
+ if (IS_IGNORECASE(env->option)) {
+ int i, n, in_cc;
+ OnigPairAmbigCodes* ccs;
+ BitSetRef bs = cc->bs;
+ OnigAmbigType amb;
+
+ for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
+ if ((amb & env->ambig_flag) == 0) continue;
+
+ n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
+ for (i = 0; i < n; i++) {
+ in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
+
+ if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
+ (in_cc == 0 && IS_CCLASS_NOT(cc))) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
+ ccs[i].from >= SINGLE_BYTE_SIZE) {
+ /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
+ add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
+ }
+ else {
+ if (BITSET_AT(bs, ccs[i].from)) {
+ /* /(?i:[^A-C])/.match("a") ==> fail. */
+ BITSET_SET_BIT(bs, ccs[i].to);
+ }
+ if (BITSET_AT(bs, ccs[i].to)) {
+ BITSET_SET_BIT(bs, ccs[i].from);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (IS_IGNORECASE(env->option) &&
+ (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ int res;
+ Node *alt_root, *work;
+
+ res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc,
+ cc, &alt_root);
+ if (res < 0) return res;
+ if (res > 0) {
+ work = node_new_alt(*np, alt_root);
+ if (IS_NULL(work)) {
+ onig_node_free(alt_root);
+ return ONIGERR_MEMORY;
+ }
+ *np = work;
+ }
}
}
-#endif
break;
case TK_ANYCHAR:
@@ -4630,7 +4918,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
*np = node_new_empty();
}
else {
- *src = tok->backp;
goto tk_byte;
}
break;
@@ -4781,7 +5068,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
}
extern int
-onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
+onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg,
ScanEnv* env)
{
int r;
@@ -4793,15 +5080,16 @@ onig_parse_make_tree(Node** root, UChar* pattern, UChar* end, regex_t* reg,
scan_env_clear(env);
env->option = reg->options;
+ env->ambig_flag = reg->ambig_flag;
env->enc = reg->enc;
env->syntax = reg->syntax;
- env->pattern = pattern;
- env->pattern_end = end;
+ env->pattern = (UChar* )pattern;
+ env->pattern_end = (UChar* )end;
env->reg = reg;
*root = NULL;
- p = pattern;
- r = parse_regexp(root, &p, end, env);
+ p = (UChar* )pattern;
+ r = parse_regexp(root, &p, (UChar* )end, env);
reg->num_mem = env->num_mem;
return r;
}
diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h
index b2726becbde..1a4ac7dea24 100644
--- a/ext/mbstring/oniguruma/regparse.h
+++ b/ext/mbstring/oniguruma/regparse.h
@@ -1,12 +1,33 @@
-/**********************************************************************
-
- regparse.h - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
-**********************************************************************/
#ifndef REGPARSE_H
#define REGPARSE_H
+/**********************************************************************
+ regparse.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#include "regint.h"
@@ -43,7 +64,8 @@
#define CTYPE_NOT_WHITE_SPACE (1<<3)
#define CTYPE_DIGIT (1<<4)
#define CTYPE_NOT_DIGIT (1<<5)
-
+#define CTYPE_XDIGIT (1<<6)
+#define CTYPE_NOT_XDIGIT (1<<7)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@@ -52,29 +74,27 @@
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)
-#define REPEAT_INFINITE -1
-#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
-
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 7
#define NSTR_RAW (1<<0) /* by backslashed number */
-#define NSTR_CASE_AMBIG (1<<1)
+#define NSTR_AMBIG (1<<1)
+#define NSTR_AMBIG_REDUCE (1<<2)
-#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
-#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
-#define NSTRING_IS_CASE_AMBIG(node) \
- (((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
+#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
+#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
+#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
+#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
+#define NSTRING_IS_AMBIG_REDUCE(node) \
+ (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
-#define CCLASS_SET_NOT(cc) (cc)->not = 1
-
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
@@ -89,13 +109,17 @@ typedef struct {
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
+/* move to regint.h */
+#if 0
typedef struct {
- int not;
+ int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
+#endif
typedef struct {
+ int state;
struct _Node* target;
int lower;
int upper;
@@ -108,19 +132,19 @@ typedef struct {
} QualifierNode;
/* status bits */
-#define NST_MIN_FIXED (1<<0)
-#define NST_MAX_FIXED (1<<1)
-#define NST_CLEN_FIXED (1<<2)
-#define NST_MARK1 (1<<3)
-#define NST_MARK2 (1<<4)
-#define NST_MEM_BACKREFED (1<<5)
-#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
-
-#define NST_RECURSION (1<<7)
-#define NST_CALLED (1<<8)
-#define NST_ADDR_FIXED (1<<9)
-#define NST_NAMED_GROUP (1<<10)
-#define NST_NAME_REF (1<<11)
+#define NST_MIN_FIXED (1<<0)
+#define NST_MAX_FIXED (1<<1)
+#define NST_CLEN_FIXED (1<<2)
+#define NST_MARK1 (1<<3)
+#define NST_MARK2 (1<<4)
+#define NST_MEM_BACKREFED (1<<5)
+#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
+#define NST_RECURSION (1<<7)
+#define NST_CALLED (1<<8)
+#define NST_ADDR_FIXED (1<<9)
+#define NST_NAMED_GROUP (1<<10)
+#define NST_NAME_REF (1<<11)
+#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@@ -133,13 +157,15 @@ typedef struct {
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
-#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
+#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
+ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
+#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
typedef struct {
int state;
@@ -224,9 +250,10 @@ typedef struct _Node {
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
- OnigOptionType option;
- OnigEncoding enc;
- OnigSyntaxType* syntax;
+ OnigOptionType option;
+ OnigAmbigType ambig_flag;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
@@ -254,19 +281,31 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
+
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} GroupNumRemap;
+
+extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
+#endif
+
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
-extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
+extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
-extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
+extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
-extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
+extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
+extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
+extern Node* onig_node_new_list P_((Node* left, Node* right));
+extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
-extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
+extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
diff --git a/ext/mbstring/oniguruma/regposerr.c b/ext/mbstring/oniguruma/regposerr.c
index 533f813c0c4..e54b5c4089e 100644
--- a/ext/mbstring/oniguruma/regposerr.c
+++ b/ext/mbstring/oniguruma/regposerr.c
@@ -1,10 +1,32 @@
/**********************************************************************
-
regposerr.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include "config.h"
#include "onigposix.h"
@@ -58,7 +80,7 @@ regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size)
s = tbuf;
}
- len = strlen(s) + 1;
+ len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
if (buf != NULL && size > 0) {
strncpy(buf, s, size - 1);
diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c
index 4cb30cc5653..34cbeb9a46f 100644
--- a/ext/mbstring/oniguruma/regposix.c
+++ b/ext/mbstring/oniguruma/regposix.c
@@ -1,10 +1,31 @@
/**********************************************************************
-
regposix.c - Oniguruma (regular expression library)
-
- Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
-
**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
#define regex_t onig_regex_t
#include "regint.h"
@@ -14,16 +35,17 @@
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
-#if 1
+/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
#define ENC_STRING_LEN(enc,s,len) do { \
- UChar* tmps = (UChar* )(s); \
- /* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \
- while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \
- len = tmps - (UChar* )(s); \
+ if (ONIGENC_MBC_MINLEN(enc) == 1) { \
+ UChar* tmps = (UChar* )(s); \
+ while (*tmps != 0) tmps++; \
+ len = tmps - (UChar* )(s); \
+ } \
+ else { \
+ len = onigenc_str_bytelen_null(enc, (UChar* )s); \
+ } \
} while(0)
-#else
-#define ENC_STRING_LEN(enc,s,len) len = strlen(s)
-#endif
typedef struct {
int onig_err;
@@ -50,7 +72,7 @@ onig2posix_error_code(int code)
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
- { ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
+ { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
@@ -91,6 +113,7 @@ onig2posix_error_code(int code)
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
+ { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
};
@@ -145,24 +168,37 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
{
int r, i, len;
UChar* end;
+ regmatch_t* pm;
OnigOptionType options;
options = ONIG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
- if ((reg->comp_options & REG_NOSUB) != 0) {
- pmatch = (regmatch_t* )NULL;
+ if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
+ pm = (regmatch_t* )NULL;
nmatch = 0;
}
+ else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
+ pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
+ * (ONIG_C(reg)->num_mem + 1));
+ if (pm == NULL)
+ return REG_ESPACE;
+ }
+ else {
+ pm = pmatch;
+ }
- ENC_STRING_LEN(ONIG_C(reg)->code,str,len);
+ ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pmatch, options);
if (r >= 0) {
r = 0; /* Match */
+ if (pm != pmatch && pm != NULL) {
+ xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
+ }
}
else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
@@ -173,6 +209,9 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
r = onig2posix_error_code(r);
}
+ if (pm != pmatch && pm != NULL)
+ xfree(pm);
+
return r;
}
@@ -201,6 +240,13 @@ reg_set_encoding(int mb_code)
case REG_POSIX_ENCODING_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
+ case REG_POSIX_ENCODING_UTF16_BE:
+ enc = ONIG_ENCODING_UTF16_BE;
+ break;
+ case REG_POSIX_ENCODING_UTF16_LE:
+ enc = ONIG_ENCODING_UTF16_LE;
+ break;
+
default:
return ;
break;
@@ -211,18 +257,18 @@ reg_set_encoding(int mb_code)
extern int
reg_name_to_group_numbers(regex_t* reg,
- unsigned char* name, unsigned char* name_end, int** nums)
+ const unsigned char* name, const unsigned char* name_end, int** nums)
{
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
- int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*);
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
} i_wrap;
-static int i_wrapper(unsigned char* name, unsigned char* name_end,
+static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
int ng, int* gs,
onig_regex_t* reg, void* arg)
{
@@ -233,8 +279,8 @@ static int i_wrapper(unsigned char* name, unsigned char* name_end,
extern int
reg_foreach_name(regex_t* reg,
- int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*),
- void* arg)
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
+ void* arg)
{
i_wrap warg;
diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c
new file mode 100644
index 00000000000..a0f36b8c33d
--- /dev/null
+++ b/ext/mbstring/oniguruma/regsyntax.c
@@ -0,0 +1,207 @@
+/**********************************************************************
+ regsyntax.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+OnigSyntaxType OnigSyntaxPosixBasic = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL )
+ , 0
+ , 0
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxPosixExtended = {
+ ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
+ ONIG_SYN_OP_BRACE_INTERVAL |
+ ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
+ , 0
+ , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
+ ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
+ ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
+ ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+ , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+};
+
+OnigSyntaxType OnigSyntaxEmacs = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
+ ONIG_SYN_OP_ESC_BRACE_INTERVAL |
+ ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
+ ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
+ ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
+ , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
+ , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGrep = {
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+ ONIG_SYN_OP_ESC_VBAR_ALT |
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
+ ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
+ ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
+ ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
+ , 0
+ , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxGnuRegex = {
+ SYN_GNU_REGEX_OP
+ , 0
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_NONE
+};
+
+OnigSyntaxType OnigSyntaxJava = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
+ ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
+ , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
+ , ONIG_OPTION_SINGLELINE
+};
+
+OnigSyntaxType OnigSyntaxPerl = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+ ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
+ , SYN_GNU_REGEX_BV
+ , ONIG_OPTION_SINGLELINE
+};
+
+
+extern int
+onig_set_default_syntax(OnigSyntaxType* syntax)
+{
+ if (IS_NULL(syntax))
+ syntax = ONIG_SYNTAX_RUBY;
+
+ OnigDefaultSyntax = syntax;
+ return 0;
+}
+
+extern void
+onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
+{
+ *to = *from;
+}
+
+extern void
+onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
+{
+ syntax->op = op;
+}
+
+extern void
+onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
+{
+ syntax->op2 = op2;
+}
+
+extern void
+onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
+{
+ syntax->behavior = behavior;
+}
+
+extern void
+onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
+{
+ syntax->options = options;
+}
+
+extern unsigned int
+onig_get_syntax_op(OnigSyntaxType* syntax)
+{
+ return syntax->op;
+}
+
+extern unsigned int
+onig_get_syntax_op2(OnigSyntaxType* syntax)
+{
+ return syntax->op2;
+}
+
+extern unsigned int
+onig_get_syntax_behavior(OnigSyntaxType* syntax)
+{
+ return syntax->behavior;
+}
+
+extern OnigOptionType
+onig_get_syntax_options(OnigSyntaxType* syntax)
+{
+ return syntax->options;
+}
+
+#ifdef USE_VARIABLE_META_CHARS
+extern int onig_set_meta_char(OnigEncoding enc,
+ unsigned int what, OnigCodePoint code)
+{
+ switch (what) {
+ case ONIG_META_CHAR_ESCAPE:
+ enc->meta_char_table.esc = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR:
+ enc->meta_char_table.anychar = code;
+ break;
+ case ONIG_META_CHAR_ANYTIME:
+ enc->meta_char_table.anytime = code;
+ break;
+ case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
+ enc->meta_char_table.zero_or_one_time = code;
+ break;
+ case ONIG_META_CHAR_ONE_OR_MORE_TIME:
+ enc->meta_char_table.one_or_more_time = code;
+ break;
+ case ONIG_META_CHAR_ANYCHAR_ANYTIME:
+ enc->meta_char_table.anychar_anytime = code;
+ break;
+ default:
+ return ONIGERR_INVALID_ARGUMENT;
+ break;
+ }
+ return 0;
+}
+#endif /* USE_VARIABLE_META_CHARS */
diff --git a/ext/mbstring/oniguruma/regtrav.c b/ext/mbstring/oniguruma/regtrav.c
new file mode 100644
index 00000000000..58a17f58b34
--- /dev/null
+++ b/ext/mbstring/oniguruma/regtrav.c
@@ -0,0 +1,76 @@
+/**********************************************************************
+ regtrav.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2004 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+#ifdef USE_CAPTURE_HISTORY
+
+static int
+capture_tree_traverse(OnigCaptureTreeNode* node, int at,
+ int(*callback_func)(int,int,int,int,int,void*),
+ int level, void* arg)
+{
+ int r, i;
+
+ if (node == (OnigCaptureTreeNode* )0)
+ return 0;
+
+ if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
+ r = (*callback_func)(node->group, node->beg, node->end,
+ level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
+ if (r != 0) return r;
+ }
+
+ for (i = 0; i < node->num_childs; i++) {
+ r = capture_tree_traverse(node->childs[i], at,
+ callback_func, level + 1, arg);
+ if (r != 0) return r;
+ }
+
+ if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
+ r = (*callback_func)(node->group, node->beg, node->end,
+ level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
+ if (r != 0) return r;
+ }
+
+ return 0;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
+extern int
+onig_capture_tree_traverse(OnigRegion* region, int at,
+ int(*callback_func)(int,int,int,int,int,void*), void* arg)
+{
+#ifdef USE_CAPTURE_HISTORY
+ return capture_tree_traverse(region->history_root, at,
+ callback_func, 0, arg);
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
+}
diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c
new file mode 100644
index 00000000000..5f15c10e652
--- /dev/null
+++ b/ext/mbstring/oniguruma/regversion.c
@@ -0,0 +1,55 @@
+/**********************************************************************
+ regversion.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2005 K.Kosako
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "oniguruma.h"
+#include
+
+extern const char*
+onig_version(void)
+{
+ static char s[12];
+
+ sprintf(s, "%d.%d.%d",
+ ONIGURUMA_VERSION_MAJOR,
+ ONIGURUMA_VERSION_MINOR,
+ ONIGURUMA_VERSION_TEENY);
+ return s;
+}
+
+extern const char*
+onig_copyright(void)
+{
+ static char s[58];
+
+ sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
+ ONIGURUMA_VERSION_MAJOR,
+ ONIGURUMA_VERSION_MINOR,
+ ONIGURUMA_VERSION_TEENY);
+ return s;
+}
diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c
new file mode 100644
index 00000000000..65c2cc58bd7
--- /dev/null
+++ b/ext/mbstring/oniguruma/st.c
@@ -0,0 +1,717 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+
+#include "config.h"
+#include
+#include
+#include
+
+#ifdef _WIN32
+#include
+#endif
+
+#ifdef NOT_RUBY
+#include "regint.h"
+#else
+#ifdef RUBY_PLATFORM
+#define xmalloc ruby_xmalloc
+#define xcalloc ruby_xcalloc
+#define xrealloc ruby_xrealloc
+#define xfree ruby_xfree
+
+void *xmalloc(long);
+void *xcalloc(long, long);
+void *xrealloc(void *, long);
+void xfree(void *);
+#endif
+#endif
+
+#include "st.h"
+
+typedef struct st_table_entry st_table_entry;
+
+struct st_table_entry {
+ unsigned int hash;
+ st_data_t key;
+ st_data_t record;
+ st_table_entry *next;
+};
+
+#define ST_DEFAULT_MAX_DENSITY 5
+#define ST_DEFAULT_INIT_TABLE_SIZE 11
+
+ /*
+ * DEFAULT_MAX_DENSITY is the default for the largest we allow the
+ * average number of items per bin before increasing the number of
+ * bins
+ *
+ * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
+ * allocated initially
+ *
+ */
+
+static int numcmp(long, long);
+static int numhash(long);
+static struct st_hash_type type_numhash = {
+ numcmp,
+ numhash,
+ st_nothing_key_free,
+ st_nothing_key_clone
+};
+
+/* extern int strcmp(const char *, const char *); */
+static int strhash(const char *);
+static struct st_hash_type type_strhash = {
+ strcmp,
+ strhash,
+ st_nothing_key_free,
+ st_nothing_key_clone
+};
+
+static int strend_cmp(st_strend_key*, st_strend_key*);
+static int strend_hash(st_strend_key*);
+static int strend_key_free(st_data_t key);
+static st_data_t strend_key_clone(st_data_t x);
+
+static struct st_hash_type type_strend_hash = {
+ strend_cmp,
+ strend_hash,
+ strend_key_free,
+ strend_key_clone
+};
+
+static void rehash(st_table *);
+
+#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
+#define Calloc(n,s) (char*)xcalloc((n),(s))
+
+#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
+
+#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
+#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
+
+/*
+ * MINSIZE is the minimum size of a dictionary.
+ */
+
+#define MINSIZE 8
+
+/*
+Table of prime numbers 2^n+a, 2<=n<=30.
+*/
+static long primes[] = {
+ 8 + 3,
+ 16 + 3,
+ 32 + 5,
+ 64 + 3,
+ 128 + 3,
+ 256 + 27,
+ 512 + 9,
+ 1024 + 9,
+ 2048 + 5,
+ 4096 + 3,
+ 8192 + 27,
+ 16384 + 43,
+ 32768 + 3,
+ 65536 + 45,
+ 131072 + 29,
+ 262144 + 3,
+ 524288 + 21,
+ 1048576 + 7,
+ 2097152 + 17,
+ 4194304 + 15,
+ 8388608 + 9,
+ 16777216 + 43,
+ 33554432 + 35,
+ 67108864 + 15,
+ 134217728 + 29,
+ 268435456 + 3,
+ 536870912 + 11,
+ 1073741824 + 85,
+ 0
+};
+
+static int
+new_size(size)
+ int size;
+{
+ int i;
+
+#if 0
+ for (i=3; i<31; i++) {
+ if ((1< size) return 1< size) return primes[i];
+ }
+ /* Ran out of polynomials */
+ return -1; /* should raise exception */
+#endif
+}
+
+#ifdef HASH_LOG
+static int collision = 0;
+static int init_st = 0;
+
+static void
+stat_col()
+{
+ FILE *f = fopen("/tmp/col", "w");
+ fprintf(f, "collision: %d\n", collision);
+ fclose(f);
+}
+#endif
+
+st_table*
+st_init_table_with_size(type, size)
+ struct st_hash_type *type;
+ int size;
+{
+ st_table *tbl;
+
+#ifdef HASH_LOG
+ if (init_st == 0) {
+ init_st = 1;
+ atexit(stat_col);
+ }
+#endif
+
+ size = new_size(size); /* round up to prime number */
+
+ tbl = alloc(st_table);
+ tbl->type = type;
+ tbl->num_entries = 0;
+ tbl->num_bins = size;
+ tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+
+ return tbl;
+}
+
+st_table*
+st_init_table(type)
+ struct st_hash_type *type;
+{
+ return st_init_table_with_size(type, 0);
+}
+
+st_table*
+st_init_numtable(void)
+{
+ return st_init_table(&type_numhash);
+}
+
+st_table*
+st_init_numtable_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_numhash, size);
+}
+
+st_table*
+st_init_strtable(void)
+{
+ return st_init_table(&type_strhash);
+}
+
+st_table*
+st_init_strtable_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_strhash, size);
+}
+
+st_table*
+st_init_strend_table_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_strend_hash, size);
+}
+
+void
+st_free_table(table)
+ st_table *table;
+{
+ register st_table_entry *ptr, *next;
+ int i;
+
+ for(i = 0; i < table->num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
+ next = ptr->next;
+ table->type->key_free(ptr->key);
+ free(ptr);
+ ptr = next;
+ }
+ }
+ free(table->bins);
+ free(table);
+}
+
+#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
+((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
+
+#ifdef HASH_LOG
+#define COLLISION collision++
+#else
+#define COLLISION
+#endif
+
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
+ bin_pos = hash_val%(table)->num_bins;\
+ ptr = (table)->bins[bin_pos];\
+ if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
+ COLLISION;\
+ while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
+ ptr = ptr->next;\
+ }\
+ ptr = ptr->next;\
+ }\
+} while (0)
+
+int
+st_lookup(table, key, value)
+ st_table *table;
+ register st_data_t key;
+ st_data_t *value;
+{
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ return 0;
+ }
+ else {
+ if (value != 0) *value = ptr->record;
+ return 1;
+ }
+}
+
+int
+st_lookup_strend(table, str_key, end_key, value)
+ st_table *table;
+ const unsigned char* str_key;
+ const unsigned char* end_key;
+ st_data_t *value;
+{
+ st_strend_key key;
+
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
+
+ return st_lookup(table, (st_data_t )(&key), value);
+}
+
+#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
+do {\
+ st_table_entry *entry;\
+ if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
+ rehash(table);\
+ bin_pos = hash_val % table->num_bins;\
+ }\
+ \
+ entry = alloc(st_table_entry);\
+ \
+ entry->hash = hash_val;\
+ entry->key = key;\
+ entry->record = value;\
+ entry->next = table->bins[bin_pos];\
+ table->bins[bin_pos] = entry;\
+ table->num_entries++;\
+} while (0)
+
+int
+st_insert(table, key, value)
+ register st_table *table;
+ register st_data_t key;
+ st_data_t value;
+{
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+ return 0;
+ }
+ else {
+ ptr->record = value;
+ return 1;
+ }
+}
+
+int
+st_insert_strend(table, str_key, end_key, value)
+ st_table *table;
+ const unsigned char* str_key;
+ const unsigned char* end_key;
+ st_data_t value;
+{
+ st_strend_key* key;
+
+ key = alloc(st_strend_key);
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+
+ return st_insert(table, (st_data_t )key, value);
+}
+
+void
+st_add_direct(table, key, value)
+ st_table *table;
+ st_data_t key;
+ st_data_t value;
+{
+ unsigned int hash_val, bin_pos;
+
+ hash_val = do_hash(key, table);
+ bin_pos = hash_val % table->num_bins;
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+}
+
+void
+st_add_direct_strend(table, str_key, end_key, value)
+ st_table *table;
+ const unsigned char* str_key;
+ const unsigned char* end_key;
+ st_data_t value;
+{
+ st_strend_key* key;
+
+ key = alloc(st_strend_key);
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+ st_add_direct(table, (st_data_t )key, value);
+}
+
+static void
+rehash(table)
+ register st_table *table;
+{
+ register st_table_entry *ptr, *next, **new_bins;
+ int i, old_num_bins = table->num_bins, new_num_bins;
+ unsigned int hash_val;
+
+ new_num_bins = new_size(old_num_bins+1);
+ new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
+
+ for(i = 0; i < old_num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
+ next = ptr->next;
+ hash_val = ptr->hash % new_num_bins;
+ ptr->next = new_bins[hash_val];
+ new_bins[hash_val] = ptr;
+ ptr = next;
+ }
+ }
+ free(table->bins);
+ table->num_bins = new_num_bins;
+ table->bins = new_bins;
+}
+
+st_table*
+st_copy(old_table)
+ st_table *old_table;
+{
+ st_table *new_table;
+ st_table_entry *ptr, *entry;
+ int i, num_bins = old_table->num_bins;
+
+ new_table = alloc(st_table);
+ if (new_table == 0) {
+ return 0;
+ }
+
+ *new_table = *old_table;
+ new_table->bins = (st_table_entry**)
+ Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+
+ if (new_table->bins == 0) {
+ free(new_table);
+ return 0;
+ }
+
+ for(i = 0; i < num_bins; i++) {
+ new_table->bins[i] = 0;
+ ptr = old_table->bins[i];
+ while (ptr != 0) {
+ entry = alloc(st_table_entry);
+ if (entry == 0) {
+ free(new_table->bins);
+ free(new_table);
+ return 0;
+ }
+ *entry = *ptr;
+ entry->key = old_table->type->key_clone(ptr->key);
+ entry->next = new_table->bins[i];
+ new_table->bins[i] = entry;
+ ptr = ptr->next;
+ }
+ }
+ return new_table;
+}
+
+int
+st_delete(table, key, value)
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
+{
+ unsigned int hash_val;
+ st_table_entry *tmp;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
+
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ if (EQUAL(table, *key, ptr->key)) {
+ table->bins[hash_val] = ptr->next;
+ table->num_entries--;
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ }
+
+ for(; ptr->next != 0; ptr = ptr->next) {
+ if (EQUAL(table, ptr->next->key, *key)) {
+ tmp = ptr->next;
+ ptr->next = ptr->next->next;
+ table->num_entries--;
+ if (value != 0) *value = tmp->record;
+ *key = tmp->key;
+ free(tmp);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+st_delete_safe(table, key, value, never)
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
+ st_data_t never;
+{
+ unsigned int hash_val;
+ register st_table_entry *ptr;
+
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
+
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ for(; ptr != 0; ptr = ptr->next) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
+ table->num_entries--;
+ *key = ptr->key;
+ if (value != 0) *value = ptr->record;
+ ptr->key = ptr->record = never;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+delete_never(key, value, never)
+ st_data_t key, value, never;
+{
+ if (value == never) return ST_DELETE;
+ return ST_CONTINUE;
+}
+
+void
+st_cleanup_safe(table, never)
+ st_table *table;
+ st_data_t never;
+{
+ int num_entries = table->num_entries;
+
+ st_foreach(table, delete_never, never);
+ table->num_entries = num_entries;
+}
+
+void
+st_foreach(table, func, arg)
+ st_table *table;
+ int (*func)();
+ st_data_t arg;
+{
+ st_table_entry *ptr, *last, *tmp;
+ enum st_retval retval;
+ int i;
+
+ for(i = 0; i < table->num_bins; i++) {
+ last = 0;
+ for(ptr = table->bins[i]; ptr != 0;) {
+ retval = (*func)(ptr->key, ptr->record, arg, 0);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ tmp = 0;
+ if (i < table->num_bins) {
+ for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
+ if (tmp == ptr) break;
+ }
+ }
+ if (!tmp) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ last = ptr;
+ ptr = ptr->next;
+ break;
+ case ST_STOP:
+ return;
+ case ST_DELETE:
+ tmp = ptr;
+ if (last == 0) {
+ table->bins[i] = ptr->next;
+ }
+ else {
+ last->next = ptr->next;
+ }
+ ptr = ptr->next;
+ table->type->key_free(tmp->key);
+ free(tmp);
+ table->num_entries--;
+ }
+ }
+ }
+}
+
+static int
+strhash(string)
+ register const char *string;
+{
+ register int c;
+
+#ifdef HASH_ELFHASH
+ register unsigned int h = 0, g;
+
+ while ((c = *string++) != '\0') {
+ h = ( h << 4 ) + c;
+ if ( g = h & 0xF0000000 )
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+#elif HASH_PERL
+ register int val = 0;
+
+ while ((c = *string++) != '\0') {
+ val += c;
+ val += (val << 10);
+ val ^= (val >> 6);
+ }
+ val += (val << 3);
+ val ^= (val >> 11);
+
+ return val + (val << 15);
+#else
+ register int val = 0;
+
+ while ((c = *string++) != '\0') {
+ val = val*997 + c;
+ }
+
+ return val + (val>>5);
+#endif
+}
+
+static int
+numcmp(x, y)
+ long x, y;
+{
+ return x != y;
+}
+
+static int
+numhash(n)
+ long n;
+{
+ return n;
+}
+
+extern int
+st_nothing_key_free(st_data_t key) { return 0; }
+
+extern st_data_t
+st_nothing_key_clone(st_data_t x) { return x; }
+
+static int strend_cmp(st_strend_key* x, st_strend_key* y)
+{
+ unsigned char *p, *q;
+ int c;
+
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static int strend_hash(st_strend_key* x)
+{
+ int val;
+ unsigned char *p;
+
+ val = 0;
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ return val + (val >> 5);
+}
+
+static int strend_key_free(st_data_t x)
+{
+ xfree((void* )x);
+ return 0;
+}
+
+static st_data_t strend_key_clone(st_data_t x)
+{
+ st_strend_key* new_key;
+ st_strend_key* key = (st_strend_key* )x;
+
+ new_key = alloc(st_strend_key);
+ *new_key = *key;
+ return (st_data_t )new_key;
+}
diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h
new file mode 100644
index 00000000000..c5cc4e625e0
--- /dev/null
+++ b/ext/mbstring/oniguruma/st.h
@@ -0,0 +1,77 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* @(#) st.h 5.1 89/12/14 */
+
+#ifndef ST_INCLUDED
+
+#define ST_INCLUDED
+
+typedef unsigned long st_data_t;
+#define ST_DATA_T_DEFINED
+
+typedef struct st_table st_table;
+
+struct st_hash_type {
+ int (*compare)();
+ int (*hash)();
+ int (*key_free)();
+ st_data_t (*key_clone)();
+};
+
+struct st_table {
+ struct st_hash_type *type;
+ int num_bins;
+ int num_entries;
+ struct st_table_entry **bins;
+};
+
+typedef struct {
+ unsigned char* s;
+ unsigned char* end;
+} st_strend_key;
+
+#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
+
+enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
+
+#ifndef _
+# define _(args) args
+#endif
+#ifndef ANYARGS
+# ifdef __cplusplus
+# define ANYARGS ...
+# else
+# define ANYARGS
+# endif
+#endif
+
+st_table *st_init_table _((struct st_hash_type *));
+st_table *st_init_table_with_size _((struct st_hash_type *, int));
+st_table *st_init_numtable _((void));
+st_table *st_init_numtable_with_size _((int));
+st_table *st_init_strtable _((void));
+st_table *st_init_strtable_with_size _((int));
+st_table *st_init_strend_table_with_size _((int));
+int st_delete _((st_table *, st_data_t *, st_data_t *));
+int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
+int st_insert _((st_table *, st_data_t, st_data_t));
+int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
+int st_lookup _((st_table *, st_data_t, st_data_t *));
+int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
+void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
+void st_add_direct _((st_table *, st_data_t, st_data_t));
+void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
+void st_free_table _((st_table *));
+void st_cleanup_safe _((st_table *, st_data_t));
+st_table *st_copy _((st_table *));
+
+extern st_data_t st_nothing_key_clone _((st_data_t key));
+extern int st_nothing_key_free _((st_data_t key));
+
+#define ST_NUMCMP ((int (*)()) 0)
+#define ST_NUMHASH ((int (*)()) -2)
+
+#define st_numcmp ST_NUMCMP
+#define st_numhash ST_NUMHASH
+
+#endif /* ST_INCLUDED */
diff --git a/ext/mbstring/oniguruma/testc.c b/ext/mbstring/oniguruma/testc.c
deleted file mode 100644
index e4d197e21db..00000000000
--- a/ext/mbstring/oniguruma/testc.c
+++ /dev/null
@@ -1,833 +0,0 @@
-/*
- * This program was generated by testconv.rb.
- */
-#include
-
-#ifdef POSIX_TEST
-#include "onigposix.h"
-#else
-#include "oniguruma.h"
-#endif
-
-static int nsucc = 0;
-static int nfail = 0;
-static int nerror = 0;
-
-static FILE* err_file;
-
-#ifndef POSIX_TEST
-static OnigRegion* region;
-#endif
-
-static void xx(char* pattern, char* str, int from, int to, int mem, int not)
-{
- int r;
-
-#ifdef POSIX_TEST
- regex_t reg;
- char buf[200];
- regmatch_t pmatch[20];
-
- r = regcomp(®, pattern, REG_EXTENDED | REG_NEWLINE);
- if (r) {
- regerror(r, ®, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- r = regexec(®, str, reg.re_nsub + 1, pmatch, 0);
- if (r != 0 && r != REG_NOMATCH) {
- regerror(r, ®, buf, sizeof(buf));
- fprintf(err_file, "ERROR: %s\n", buf);
- nerror++;
- return ;
- }
-
- if (r == REG_NOMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
- nfail++;
- }
- }
- }
- regfree(®);
-
-#else
- regex_t* reg;
- OnigErrorInfo einfo;
-
- r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
- ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
- if (r) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(s, r, &einfo);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- r = onig_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
- (UChar* )str, (UChar* )(str + strlen(str)),
- region, ONIG_OPTION_NONE);
- if (r < ONIG_MISMATCH) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str(s, r);
- fprintf(err_file, "ERROR: %s\n", s);
- nerror++;
- return ;
- }
-
- if (r == ONIG_MISMATCH) {
- if (not) {
- fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- }
- else {
- if (not) {
- fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
- nfail++;
- }
- else {
- if (region->beg[mem] == from && region->end[mem] == to) {
- fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
- nsucc++;
- }
- else {
- fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, region->beg[mem], region->end[mem]);
- nfail++;
- }
- }
- }
- onig_free(reg);
-#endif
-}
-
-static void x2(char* pattern, char* str, int from, int to)
-{
- xx(pattern, str, from, to, 0, 0);
-}
-
-static void x3(char* pattern, char* str, int from, int to, int mem)
-{
- xx(pattern, str, from, to, mem, 0);
-}
-
-static void n(char* pattern, char* str)
-{
- xx(pattern, str, 0, 0, 0, 1);
-}
-
-extern int main(int argc, char* argv[])
-{
- err_file = stdout;
-
-#ifdef POSIX_TEST
- reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
-#else
- region = onig_region_new();
-#endif
-
- x2("", "", 0, 0);
- x2("^", "", 0, 0);
- x2("$", "", 0, 0);
- x2("\\G", "", 0, 0);
- x2("\\A", "", 0, 0);
- x2("\\Z", "", 0, 0);
- x2("\\z", "", 0, 0);
- x2("^$", "", 0, 0);
- x2("\\ca", "\001", 0, 1);
- x2("\\C-b", "\002", 0, 1);
- x2("\\M-Z", "\xDA", 0, 1);
- x2("", "a", 0, 0);
- x2("a", "a", 0, 1);
- x2("aa", "aa", 0, 2);
- x2("aaa", "aaa", 0, 3);
- x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
- x2("ab", "ab", 0, 2);
- x2("b", "ab", 1, 2);
- x2("bc", "abc", 1, 3);
- x2("\\17", "\017", 0, 1);
- x2("\\x1f", "\x1f", 0, 1);
- x2("\\xFE", "\xfe", 0, 1);
- x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
- x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
- x2(".", "a", 0, 1);
- n(".", "");
- x2("..", "ab", 0, 2);
- x2("\\w", "e", 0, 1);
- n("\\W", "e");
- x2("\\s", " ", 0, 1);
- x2("\\S", "b", 0, 1);
- x2("\\d", "4", 0, 1);
- n("\\D", "4");
- x2("\\b", "z ", 0, 0);
- x2("\\b", " z", 1, 1);
- x2("\\B", "zz ", 1, 1);
- x2("\\B", "z ", 2, 2);
- x2("\\B", " z", 0, 0);
- x2("[ab]", "b", 0, 1);
- n("[ab]", "c");
- x2("[a-z]", "t", 0, 1);
- n("[^a]", "a");
- x2("[^a]", "\n", 0, 1);
- x2("[]]", "]", 0, 1);
- n("[^]]", "]");
- x2("[\\^]+", "0^^1", 1, 3);
- x2("[b-]", "b", 0, 1);
- x2("[b-]", "-", 0, 1);
- x2("[\\w]", "z", 0, 1);
- n("[\\w]", " ");
- x2("[\\W]", "b$", 1, 2);
- x2("[\\d]", "5", 0, 1);
- n("[\\d]", "e");
- x2("[\\D]", "t", 0, 1);
- n("[\\D]", "3");
- x2("[\\s]", " ", 0, 1);
- n("[\\s]", "a");
- x2("[\\S]", "b", 0, 1);
- n("[\\S]", " ");
- x2("[\\w\\d]", "2", 0, 1);
- n("[\\w\\d]", " ");
- x2("[[:upper:]]", "B", 0, 1);
- x2("[*[:xdigit:]+]", "+", 0, 1);
- x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
- x2("[*[:xdigit:]+]", "-@^+", 3, 4);
- n("[[:upper]]", "A");
- x2("[[:upper]]", ":", 0, 1);
- x2("[\\044-\\047]", "\046", 0, 1);
- x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
- x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
- n("[\\x6A-\\x6D]", "\x6E");
- n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
- x2("[\\[]", "[", 0, 1);
- x2("[\\]]", "]", 0, 1);
- x2("[&]", "&", 0, 1);
- x2("[[ab]]", "b", 0, 1);
- x2("[[ab]c]", "c", 0, 1);
- n("[[^a]]", "a");
- n("[^[a]]", "a");
- x2("[[ab]&&bc]", "b", 0, 1);
- n("[[ab]&&bc]", "a");
- n("[[ab]&&bc]", "c");
- x2("[a-z&&b-y&&c-x]", "w", 0, 1);
- n("[^a-z&&b-y&&c-x]", "w");
- x2("[[^a&&a]&&a-z]", "b", 0, 1);
- n("[[^a&&a]&&a-z]", "a");
- x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
- n("[[^a-z&&bcdef]&&[^c-g]]", "c");
- x2("[^[^abc]&&[^cde]]", "c", 0, 1);
- x2("[^[^abc]&&[^cde]]", "e", 0, 1);
- n("[^[^abc]&&[^cde]]", "f");
- x2("[a-&&-a]", "-", 0, 1);
- n("[a-&&-a]", "&");
- n("\\wabc", " abc");
- x2("a\\Wbc", "a bc", 0, 4);
- x2("a.b.c", "aabbc", 0, 5);
- x2(".\\wb\\W..c", "abb bcc", 0, 7);
- x2("\\s\\wzzz", " zzzz", 0, 5);
- x2("aa.b", "aabb", 0, 4);
- n(".a", "ab");
- x2(".a", "aa", 0, 2);
- x2("^a", "a", 0, 1);
- x2("^a$", "a", 0, 1);
- x2("^\\w$", "a", 0, 1);
- n("^\\w$", " ");
- x2("^\\wab$", "zab", 0, 3);
- x2("^\\wabcdef$", "zabcdef", 0, 7);
- x2("^\\w...def$", "zabcdef", 0, 7);
- x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
- x2("\\A\\Z", "", 0, 0);
- x2("\\Axyz", "xyz", 0, 3);
- x2("xyz\\Z", "xyz", 0, 3);
- x2("xyz\\z", "xyz", 0, 3);
- x2("\\Gaz", "az", 0, 2);
- n("\\Gz", "bza");
- n("az\\G", "az");
- n("az\\A", "az");
- n("a\\Az", "az");
- x2("\\^\\$", "^$", 0, 2);
- x2("^x?y", "xy", 0, 2);
- x2("^(x?y)", "xy", 0, 2);
- x2("\\w", "_", 0, 1);
- n("\\W", "_");
- x2("(?=z)z", "z", 0, 1);
- n("(?=z).", "a");
- x2("(?!z)a", "a", 0, 1);
- n("(?!z)a", "z");
- x2("(?i:a)", "a", 0, 1);
- x2("(?i:a)", "A", 0, 1);
- x2("(?i:A)", "a", 0, 1);
- n("(?i:A)", "b");
- x2("(?i:[A-Z])", "a", 0, 1);
- x2("(?i:[f-m])", "H", 0, 1);
- x2("(?i:[f-m])", "h", 0, 1);
- n("(?i:[f-m])", "e");
- x2("(?i:[A-c])", "D", 0, 1);
- x2("(?i:[!-k])", "Z", 0, 1);
- x2("(?i:[!-k])", "7", 0, 1);
- x2("(?i:[T-}])", "b", 0, 1);
- x2("(?i:[T-}])", "{", 0, 1);
- x2("(?i:\\?a)", "?A", 0, 2);
- x2("(?i:\\*A)", "*a", 0, 2);
- n(".", "\n");
- x2("(?m:.)", "\n", 0, 1);
- x2("(?m:a.)", "a\n", 0, 2);
- x2("(?m:.b)", "a\nb", 1, 3);
- n("(?i)(?-i)a", "A");
- n("(?i)(?-i:a)", "A");
- x2("a?", "", 0, 0);
- x2("a?", "b", 0, 0);
- x2("a?", "a", 0, 1);
- x2("a*", "", 0, 0);
- x2("a*", "a", 0, 1);
- x2("a*", "aaa", 0, 3);
- x2("a*", "baaaa", 0, 0);
- n("a+", "");
- x2("a+", "a", 0, 1);
- x2("a+", "aaaa", 0, 4);
- x2("a+", "aabbb", 0, 2);
- x2("a+", "baaaa", 1, 5);
- x2(".?", "", 0, 0);
- x2(".?", "f", 0, 1);
- x2(".?", "\n", 0, 0);
- x2(".*", "", 0, 0);
- x2(".*", "abcde", 0, 5);
- x2(".+", "z", 0, 1);
- x2(".+", "zdswer\n", 0, 6);
- x2("a|b", "a", 0, 1);
- x2("a|b", "b", 0, 1);
- x2("|a", "a", 0, 0);
- x2("(|a)", "a", 0, 0);
- x2("ab|bc", "ab", 0, 2);
- x2("ab|bc", "bc", 0, 2);
- x2("z(?:ab|bc)", "zbc", 0, 3);
- x2("a(?:ab|bc)c", "aabc", 0, 4);
- x2("ab|(?:ac|az)", "az", 0, 2);
- x2("a|b|c", "dc", 1, 2);
- x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
- n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
- x2("a|^z", "ba", 1, 2);
- x2("a|^z", "za", 0, 1);
- x2("a|\\Gz", "bza", 2, 3);
- x2("a|\\Gz", "za", 0, 1);
- x2("a|\\Az", "bza", 2, 3);
- x2("a|\\Az", "za", 0, 1);
- x2("a|b\\Z", "ba", 1, 2);
- x2("a|b\\Z", "b", 0, 1);
- x2("a|b\\z", "ba", 1, 2);
- x2("a|b\\z", "b", 0, 1);
- x2("\\w|\\s", " ", 0, 1);
- n("\\w|\\w", " ");
- x2("\\w|%", "%", 0, 1);
- x2("\\w|[&$]", "&", 0, 1);
- x2("[b-d]|[^e-z]", "a", 0, 1);
- x2("(?:a|[c-f])|bz", "dz", 0, 1);
- x2("(?:a|[c-f])|bz", "bz", 0, 2);
- x2("abc|(?=zz)..f", "zzf", 0, 3);
- x2("abc|(?!zz)..f", "abf", 0, 3);
- x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
- n("(?>a|abd)c", "abdc");
- x2("(?>abd|a)c", "abdc", 0, 4);
- x2("a?|b", "a", 0, 1);
- x2("a?|b", "b", 0, 0);
- x2("a?|b", "", 0, 0);
- x2("a*|b", "aa", 0, 2);
- x2("a*|b*", "ba", 0, 0);
- x2("a*|b*", "ab", 0, 1);
- x2("a+|b*", "", 0, 0);
- x2("a+|b*", "bbb", 0, 3);
- x2("a+|b*", "abbb", 0, 1);
- n("a+|b+", "");
- x2("(a|b)?", "b", 0, 1);
- x2("(a|b)*", "ba", 0, 2);
- x2("(a|b)+", "bab", 0, 3);
- x2("(ab|ca)+", "caabbc", 0, 4);
- x2("(ab|ca)+", "aabca", 1, 5);
- x2("(ab|ca)+", "abzca", 0, 2);
- x2("(a|bab)+", "ababa", 0, 5);
- x2("(a|bab)+", "ba", 1, 2);
- x2("(a|bab)+", "baaaba", 1, 4);
- x2("(?:a|b)(?:a|b)", "ab", 0, 2);
- x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
- x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
- x2("(?:a+|b+){2}", "aaabbb", 0, 6);
- x2("h{0,}", "hhhh", 0, 4);
- x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
- n("ax{2}*a", "0axxxa1");
- n("a.{0,2}a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXa0");
- n("a.{0,2}?a", "0aXXXXa0");
- x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
- n("(?:a+|\\Ab*)cc", "abcc");
- x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
- x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
- x2("a|(?i)c", "C", 0, 1);
- x2("(?i)c|a", "C", 0, 1);
- x2("(?i)c|a", "A", 0, 1);
- x2("(?i:c)|a", "C", 0, 1);
- n("(?i:c)|a", "A");
- x2("[abc]?", "abc", 0, 1);
- x2("[abc]*", "abc", 0, 3);
- x2("[^abc]*", "abc", 0, 0);
- n("[^abc]+", "abc");
- x2("a??", "aaa", 0, 0);
- x2("ba??b", "bab", 0, 3);
- x2("a*?", "aaa", 0, 0);
- x2("ba*?", "baa", 0, 1);
- x2("ba*?b", "baab", 0, 4);
- x2("a+?", "aaa", 0, 1);
- x2("ba+?", "baa", 0, 2);
- x2("ba+?b", "baab", 0, 4);
- x2("(?:a?)??", "a", 0, 0);
- x2("(?:a??)?", "a", 0, 0);
- x2("(?:a?)+?", "aaa", 0, 1);
- x2("(?:a+)??", "aaa", 0, 0);
- x2("(?:a+)??b", "aaab", 0, 4);
- x2("(?:ab)?{2}", "", 0, 0);
- x2("(?:ab)?{2}", "ababa", 0, 4);
- x2("(?:ab)*{0}", "ababa", 0, 0);
- x2("(?:ab){3,}", "abababab", 0, 8);
- n("(?:ab){3,}", "abab");
- x2("(?:ab){2,4}", "ababab", 0, 6);
- x2("(?:ab){2,4}", "ababababab", 0, 8);
- x2("(?:ab){2,4}?", "ababababab", 0, 4);
- x2("(?:ab){,}", "ab{,}", 0, 5);
- x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
- x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
- x2("(d+)([^abc]z)", "dddz", 0, 4);
- x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
- x2("(\\w+)(\\wz)", "dddz", 0, 4);
- x3("(a)", "a", 0, 1, 1);
- x3("(ab)", "ab", 0, 2, 1);
- x2("((ab))", "ab", 0, 2);
- x3("((ab))", "ab", 0, 2, 1);
- x3("((ab))", "ab", 0, 2, 2);
- x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
- x3("(ab)(cd)", "abcd", 0, 2, 1);
- x3("(ab)(cd)", "abcd", 2, 4, 2);
- x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
- x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
- x2("(^a)", "a", 0, 1);
- x3("(a)|(a)", "ba", 1, 2, 1);
- x3("(^a)|(a)", "ba", 1, 2, 2);
- x3("(a?)", "aaa", 0, 1, 1);
- x3("(a*)", "aaa", 0, 3, 1);
- x3("(a*)", "", 0, 0, 1);
- x3("(a+)", "aaaaaaa", 0, 7, 1);
- x3("(a+|b*)", "bbbaa", 0, 3, 1);
- x3("(a+|b?)", "bbbaa", 0, 1, 1);
- x3("(abc)?", "abc", 0, 3, 1);
- x3("(abc)*", "abc", 0, 3, 1);
- x3("(abc)+", "abc", 0, 3, 1);
- x3("(xyz|abc)+", "abc", 0, 3, 1);
- x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
- x3("((?i:abc))", "AbC", 0, 3, 1);
- x2("(abc)(?i:\\1)", "abcABC", 0, 6);
- x3("((?m:a.c))", "a\nc", 0, 3, 1);
- x3("((?=az)a)", "azb", 0, 1, 1);
- x3("abc|(.abd)", "zabd", 0, 4, 1);
- x2("(?:abc)|(ABC)", "abc", 0, 3);
- x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
- x3("a*(.)", "aaaaz", 4, 5, 1);
- x3("a*?(.)", "aaaaz", 0, 1, 1);
- x3("a*?(c)", "aaaac", 4, 5, 1);
- x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
- x3("(\\Abb)cc", "bbcc", 0, 2, 1);
- n("(\\Abb)cc", "zbbcc");
- x3("(^bb)cc", "bbcc", 0, 2, 1);
- n("(^bb)cc", "zbbcc");
- x3("cc(bb$)", "ccbb", 2, 4, 1);
- n("cc(bb$)", "ccbbb");
- n("(\\1)", "");
- n("\\1(a)", "aa");
- n("(a(b)\\1)\\2+", "ababb");
- n("(?:(?:\\1|z)(a))+$", "zaa");
- x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
- x2("(a)(?=\\1)", "aa", 0, 1);
- n("(a)$|\\1", "az");
- x2("(a)\\1", "aa", 0, 2);
- n("(a)\\1", "ab");
- x2("(a?)\\1", "aa", 0, 2);
- x2("(a??)\\1", "aa", 0, 0);
- x2("(a*)\\1", "aaaaa", 0, 4);
- x3("(a*)\\1", "aaaaa", 0, 2, 1);
- x2("a(b*)\\1", "abbbb", 0, 5);
- x2("a(b*)\\1", "ab", 0, 1);
- x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
- x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
- x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
- x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
- x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
- x2("([a-d])\\1", "cc", 0, 2);
- x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
- n("(\\w\\d\\s)\\1", "f5 f5");
- x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
- x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
- x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
- x2("(^a)\\1", "aa", 0, 2);
- n("(^a)\\1", "baa");
- n("(a$)\\1", "aa");
- n("(ab\\Z)\\1", "ab");
- x2("(a*\\Z)\\1", "a", 1, 1);
- x2(".(a*\\Z)\\1", "ba", 1, 2);
- x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
- x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
- x2("((?i:az))\\1", "AzAz", 0, 4);
- n("((?i:az))\\1", "Azaz");
- x2("(?<=a)b", "ab", 1, 2);
- n("(?<=a)b", "bb");
- x2("(?<=a|b)b", "bb", 1, 2);
- x2("(?<=a|bc)b", "bcb", 2, 3);
- x2("(?<=a|bc)b", "ab", 1, 2);
- x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
- x2("(a)\\g<1>", "aa", 0, 2);
- x2("(?a)", "a", 0, 1);
- x2("(?ab)\\g", "abab", 0, 4);
- x2("(?.zv.)\\k", "azvbazvb", 0, 8);
- x2("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3);
- x2("(?|a\\g)+", "", 0, 0);
- x2("(?|\\(\\g\\))+$", "()(())", 0, 6);
- x3("\\g(?.){0}", "X", 0, 1, 1);
- x2("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3);
- x2("\\A(?(a\\g)|)\\z", "aaaa", 0, 4);
- x2("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8);
- x2("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18);
- x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
- x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
- x2("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2);
- x2("(?:(?)|(?efg))\\k", "", 0, 0);
- x2("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9);
- n("(?:(?abc)|(?efg))\\k", "abcefg");
- x2("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10);
- x3("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
- x3("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16);
- x2("(?a|\\(\\g\\))", "a", 0, 1);
- x2("(?a|\\(\\g\\))", "((((((a))))))", 0, 13);
- x3("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1);
- x2("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9);
- x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
- x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
- x2("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7);
- x2("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4);
- x2("(?(a|b\\gc){3,5})", "baaaaca", 1, 5);
- x2("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10);
- x2("()*\\1", "", 0, 0);
- x2("(?:()|())*\\1\\2", "", 0, 0);
- x3("(?:\\1a|())*", "a", 0, 0, 1);
- x2("x((.)*)*x", "0x1x2x3", 1, 6);
- x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
- x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
- x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
- x2("", "¤¢", 0, 0);
- x2("¤¢", "¤¢", 0, 2);
- n("¤¤", "¤¢");
- x2("¤¦¤¦", "¤¦¤¦", 0, 4);
- x2("¤¢¤¤¤¦", "¤¢¤¤¤¦", 0, 6);
- x2("¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", "¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", 0, 70);
- x2("¤¢", "¤¤¤¢", 2, 4);
- x2("¤¤¤¦", "¤¢¤¤¤¦", 2, 6);
- x2("\\xca\\xb8", "\xca\xb8", 0, 2);
- x2(".", "¤¢", 0, 2);
- x2("..", "¤«¤", 0, 4);
- x2("\\w", "¤ª", 0, 2);
- n("\\W", "¤¢");
- x2("[\\W]", "¤¦$", 2, 3);
- x2("\\S", "¤½", 0, 2);
- x2("\\S", "´Á", 0, 2);
- x2("\\b", "µ¤ ", 0, 0);
- x2("\\b", " ¤Û", 1, 1);
- x2("\\B", "¤»¤½ ", 2, 2);
- x2("\\B", "¤¦ ", 3, 3);
- x2("\\B", " ¤¤", 0, 0);
- x2("[¤¿¤Á]", "¤Á", 0, 2);
- n("[¤Ê¤Ë]", "¤Ì");
- x2("[¤¦-¤ª]", "¤¨", 0, 2);
- n("[^¤±]", "¤±");
- x2("[\\w]", "¤Í", 0, 2);
- n("[\\d]", "¤Õ");
- x2("[\\D]", "¤Ï", 0, 2);
- n("[\\s]", "¤¯");
- x2("[\\S]", "¤Ø", 0, 2);
- x2("[\\w\\d]", "¤è", 0, 2);
- x2("[\\w\\d]", " ¤è", 3, 5);
- n("\\wµ´¼Ö", " µ´¼Ö");
- x2("µ´\\W¼Ö", "µ´ ¼Ö", 0, 5);
- x2("¤¢.¤¤.¤¦", "¤¢¤¢¤¤¤¤¤¦", 0, 10);
- x2(".\\w¤¦\\W..¤¾", "¤¨¤¦¤¦ ¤¦¤¾¤¾", 0, 13);
- x2("\\s\\w¤³¤³¤³", " ¤³¤³¤³¤³", 0, 9);
- x2("¤¢¤¢.¤±", "¤¢¤¢¤±¤±", 0, 8);
- n(".¤¤", "¤¤¤¨");
- x2(".¤ª", "¤ª¤ª", 0, 4);
- x2("^¤¢", "¤¢", 0, 2);
- x2("^¤à$", "¤à", 0, 2);
- x2("^\\w$", "¤Ë", 0, 2);
- x2("^\\w¤«¤¤¯¤±¤³$", "z¤«¤¤¯¤±¤³", 0, 11);
- x2("^\\w...¤¦¤¨¤ª$", "z¤¢¤¤¤¦¤¦¤¨¤ª", 0, 13);
- x2("\\w\\w\\s\\W¤ª¤ª¤ª\\d", "a¤ª ¤ª¤ª¤ª4", 0, 12);
- x2("\\A¤¿¤Á¤Ä", "¤¿¤Á¤Ä", 0, 6);
- x2("¤à¤á¤â\\Z", "¤à¤á¤â", 0, 6);
- x2("¤«¤¤¯\\z", "¤«¤¤¯", 0, 6);
- x2("¤«¤¤¯\\Z", "¤«¤¤¯\n", 0, 6);
- x2("\\G¤Ý¤Ô", "¤Ý¤Ô", 0, 4);
- n("\\G¤¨", "¤¦¤¨¤ª");
- n("¤È¤Æ\\G", "¤È¤Æ");
- n("¤Þ¤ß\\A", "¤Þ¤ß");
- n("¤Þ\\A¤ß", "¤Þ¤ß");
- x2("(?=¤»)¤»", "¤»", 0, 2);
- n("(?=¤¦).", "¤¤");
- x2("(?!¤¦)¤«", "¤«", 0, 2);
- n("(?!¤È)¤¢", "¤È");
- x2("(?i:¤¢)", "¤¢", 0, 2);
- x2("(?i:¤Ö¤Ù)", "¤Ö¤Ù", 0, 4);
- n("(?i:¤¤)", "¤¦");
- x2("(?m:¤è.)", "¤è\n", 0, 3);
- x2("(?m:.¤á)", "¤Þ\n¤á", 2, 5);
- x2("¤¢?", "", 0, 0);
- x2("ÊÑ?", "²½", 0, 0);
- x2("ÊÑ?", "ÊÑ", 0, 2);
- x2("ÎÌ*", "", 0, 0);
- x2("ÎÌ*", "ÎÌ", 0, 2);
- x2("»Ò*", "»Ò»Ò»Ò", 0, 6);
- x2("ÇÏ*", "¼¯ÇÏÇÏÇÏÇÏ", 0, 0);
- n("»³+", "");
- x2("²Ï+", "²Ï", 0, 2);
- x2("»þ+", "»þ»þ»þ»þ", 0, 8);
- x2("¤¨+", "¤¨¤¨¤¦¤¦¤¦", 0, 4);
- x2("¤¦+", "¤ª¤¦¤¦¤¦¤¦", 2, 10);
- x2(".?", "¤¿", 0, 2);
- x2(".*", "¤Ñ¤Ô¤×¤Ú", 0, 8);
- x2(".+", "¤í", 0, 2);
- x2(".+", "¤¤¤¦¤¨¤«\n", 0, 8);
- x2("¤¢|¤¤", "¤¢", 0, 2);
- x2("¤¢|¤¤", "¤¤", 0, 2);
- x2("¤¢¤¤|¤¤¤¦", "¤¢¤¤", 0, 4);
- x2("¤¢¤¤|¤¤¤¦", "¤¤¤¦", 0, 4);
- x2("¤ò(?:¤«¤|¤¤¯)", "¤ò¤«¤", 0, 6);
- x2("¤ò(?:¤«¤|¤¤¯)¤±", "¤ò¤¤¯¤±", 0, 8);
- x2("¤¢¤¤|(?:¤¢¤¦|¤¢¤ò)", "¤¢¤ò", 0, 4);
- x2("¤¢|¤¤|¤¦", "¤¨¤¦", 2, 4);
- x2("¤¢|¤¤|¤¦¤¨|¤ª¤«¤|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤·¤¹¤»", 0, 6);
- n("¤¢|¤¤|¤¦¤¨|¤ª¤«¤|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤¹¤»");
- x2("¤¢|^¤ï", "¤Ö¤¢", 2, 4);
- x2("¤¢|^¤ò", "¤ò¤¢", 0, 2);
- x2("µ´|\\G¼Ö", "¤±¼Öµ´", 4, 6);
- x2("µ´|\\G¼Ö", "¼Öµ´", 0, 2);
- x2("µ´|\\A¼Ö", "b¼Öµ´", 3, 5);
- x2("µ´|\\A¼Ö", "¼Ö", 0, 2);
- x2("µ´|¼Ö\\Z", "¼Öµ´", 2, 4);
- x2("µ´|¼Ö\\Z", "¼Ö", 0, 2);
- x2("µ´|¼Ö\\Z", "¼Ö\n", 0, 2);
- x2("µ´|¼Ö\\z", "¼Öµ´", 2, 4);
- x2("µ´|¼Ö\\z", "¼Ö", 0, 2);
- x2("\\w|\\s", "¤ª", 0, 2);
- x2("\\w|%", "%¤ª", 0, 1);
- x2("\\w|[&$]", "¤¦&", 0, 2);
- x2("[¤¤-¤±]", "¤¦", 0, 2);
- x2("[¤¤-¤±]|[^¤«-¤³]", "¤¢", 0, 2);
- x2("[¤¤-¤±]|[^¤«-¤³]", "¤«", 0, 2);
- x2("[^¤¢]", "\n", 0, 1);
- x2("(?:¤¢|[¤¦-¤])|¤¤¤ò", "¤¦¤ò", 0, 2);
- x2("(?:¤¢|[¤¦-¤])|¤¤¤ò", "¤¤¤ò", 0, 4);
- x2("¤¢¤¤¤¦|(?=¤±¤±)..¤Û", "¤±¤±¤Û", 0, 6);
- x2("¤¢¤¤¤¦|(?!¤±¤±)..¤Û", "¤¢¤¤¤Û", 0, 6);
- x2("(?=¤ò¤¢)..¤¢|(?=¤ò¤ò)..¤¢", "¤ò¤ò¤¢", 0, 6);
- x2("(?<=¤¢|¤¤¤¦)¤¤", "¤¤¤¦¤¤", 4, 6);
- n("(?>¤¢|¤¢¤¤¤¨)¤¦", "¤¢¤¤¤¨¤¦");
- x2("(?>¤¢¤¤¤¨|¤¢)¤¦", "¤¢¤¤¤¨¤¦", 0, 8);
- x2("¤¢?|¤¤", "¤¢", 0, 2);
- x2("¤¢?|¤¤", "¤¤", 0, 0);
- x2("¤¢?|¤¤", "", 0, 0);
- x2("¤¢*|¤¤", "¤¢¤¢", 0, 4);
- x2("¤¢*|¤¤*", "¤¤¤¢", 0, 0);
- x2("¤¢*|¤¤*", "¤¢¤¤", 0, 2);
- x2("[a¤¢]*|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 3);
- x2("¤¢+|¤¤*", "", 0, 0);
- x2("¤¢+|¤¤*", "¤¤¤¤¤¤", 0, 6);
- x2("¤¢+|¤¤*", "¤¢¤¤¤¤¤¤", 0, 2);
- x2("¤¢+|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 0);
- n("¤¢+|¤¤+", "");
- x2("(¤¢|¤¤)?", "¤¤", 0, 2);
- x2("(¤¢|¤¤)*", "¤¤¤¢", 0, 4);
- x2("(¤¢|¤¤)+", "¤¤¤¢¤¤", 0, 6);
- x2("(¤¢¤¤|¤¦¤¢)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 0, 8);
- x2("(¤¢¤¤|¤¦¤¨)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 4, 12);
- x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¢¤¤¤¦¤¢", 2, 10);
- x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¤¤ò¤¦¤¢", 0, 4);
- x2("(¤¢¤¤|¤¦¤¢)+", "$$zzzz¤¢¤¤¤ò¤¦¤¢", 6, 10);
- x2("(¤¢|¤¤¤¢¤¤)+", "¤¢¤¤¤¢¤¤¤¢", 0, 10);
- x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢", 2, 4);
- x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢¤¢¤¢¤¤¤¢", 2, 8);
- x2("(?:¤¢|¤¤)(?:¤¢|¤¤)", "¤¢¤¤", 0, 4);
- x2("(?:¤¢*|¤¤*)(?:¤¢*|¤¤*)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 6);
- x2("(?:¤¢*|¤¤*)(?:¤¢+|¤¤+)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
- x2("(?:¤¢+|¤¤+){2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
- x2("(?:¤¢+|¤¤+){1,2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
- x2("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¦¤¦", 0, 4);
- n("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¢¤¤¤¦¤¦");
- x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¢¤¤¤¦", 12, 16);
- x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¤¤¦", 0, 14);
- x2("¤¦{0,}", "¤¦¤¦¤¦¤¦", 0, 8);
- x2("¤¢|(?i)c", "C", 0, 1);
- x2("(?i)c|¤¢", "C", 0, 1);
- x2("(?i:¤¢)|a", "a", 0, 1);
- n("(?i:¤¢)|a", "A");
- x2("[¤¢¤¤¤¦]?", "¤¢¤¤¤¦", 0, 2);
- x2("[¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 6);
- x2("[^¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 0);
- n("[^¤¢¤¤¤¦]+", "¤¢¤¤¤¦");
- x2("¤¢??", "¤¢¤¢¤¢", 0, 0);
- x2("¤¤¤¢??¤¤", "¤¤¤¢¤¤", 0, 6);
- x2("¤¢*?", "¤¢¤¢¤¢", 0, 0);
- x2("¤¤¤¢*?", "¤¤¤¢¤¢", 0, 2);
- x2("¤¤¤¢*?¤¤", "¤¤¤¢¤¢¤¤", 0, 8);
- x2("¤¢+?", "¤¢¤¢¤¢", 0, 2);
- x2("¤¤¤¢+?", "¤¤¤¢¤¢", 0, 4);
- x2("¤¤¤¢+?¤¤", "¤¤¤¢¤¢¤¤", 0, 8);
- x2("(?:Å·?)??", "Å·", 0, 0);
- x2("(?:Å·??)?", "Å·", 0, 0);
- x2("(?:Ì´?)+?", "Ì´Ì´Ì´", 0, 2);
- x2("(?:É÷+)??", "É÷É÷É÷", 0, 0);
- x2("(?:Àã+)??Áú", "ÀãÀãÀãÁú", 0, 8);
- x2("(?:¤¢¤¤)?{2}", "", 0, 0);
- x2("(?:µ´¼Ö)?{2}", "µ´¼Öµ´¼Öµ´", 0, 8);
- x2("(?:µ´¼Ö)*{0}", "µ´¼Öµ´¼Öµ´", 0, 0);
- x2("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16);
- n("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Ö");
- x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Ö", 0, 12);
- x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16);
- x2("(?:µ´¼Ö){2,4}?", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 8);
- x2("(?:µ´¼Ö){,}", "µ´¼Ö{,}", 0, 7);
- x2("(?:¤«¤¤¯)+?{2}", "¤«¤¤¯¤«¤¤¯¤«¤¤¯", 0, 12);
- x3("(²Ð)", "²Ð", 0, 2, 1);
- x3("(²Ð¿å)", "²Ð¿å", 0, 4, 1);
- x2("((»þ´Ö))", "»þ´Ö", 0, 4);
- x3("((É÷¿å))", "É÷¿å", 0, 4, 1);
- x3("((ºòÆü))", "ºòÆü", 0, 4, 2);
- x3("((((((((((((((((((((ÎÌ»Ò))))))))))))))))))))", "ÎÌ»Ò", 0, 4, 20);
- x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 0, 4, 1);
- x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 4, 8, 2);
- x3("()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤¤¯¤±¤³", "¤¢¤¤¤¦¤¨¤ª¤«¤¤¯¤±¤³", 6, 12, 3);
- x3("(()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤¤¯¤±¤³)", "¤¢¤¤¤¦¤¨¤ª¤«¤¤¯¤±¤³", 6, 12, 4);
- x3(".*(¥Õ¥©)¥ó¡¦¥Þ(¥ó()¥·¥å¥¿)¥¤¥ó", "¥Õ¥©¥ó¡¦¥Þ¥ó¥·¥å¥¿¥¤¥ó", 10, 18, 2);
- x2("(^¤¢)", "¤¢", 0, 2);
- x3("(¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 1);
- x3("(^¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 2);
- x3("(¤¢?)", "¤¢¤¢¤¢", 0, 2, 1);
- x3("(¤Þ*)", "¤Þ¤Þ¤Þ", 0, 6, 1);
- x3("(¤È*)", "", 0, 0, 1);
- x3("(¤ë+)", "¤ë¤ë¤ë¤ë¤ë¤ë¤ë", 0, 14, 1);
- x3("(¤Õ+|¤Ø*)", "¤Õ¤Õ¤Õ¤Ø¤Ø", 0, 6, 1);
- x3("(¤¢+|¤¤?)", "¤¤¤¤¤¤¤¢¤¢", 0, 2, 1);
- x3("(¤¢¤¤¤¦)?", "¤¢¤¤¤¦", 0, 6, 1);
- x3("(¤¢¤¤¤¦)*", "¤¢¤¤¤¦", 0, 6, 1);
- x3("(¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1);
- x3("(¤µ¤·¤¹|¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1);
- x3("([¤Ê¤Ë¤Ì][¤«¤¤¯]|¤«¤¤¯)+", "¤«¤¤¯", 0, 6, 1);
- x3("((?i:¤¢¤¤¤¦))", "¤¢¤¤¤¦", 0, 6, 1);
- x3("((?m:¤¢.¤¦))", "¤¢\n¤¦", 0, 5, 1);
- x3("((?=¤¢¤ó)¤¢)", "¤¢¤ó¤¤", 0, 2, 1);
- x3("¤¢¤¤¤¦|(.¤¢¤¤¤¨)", "¤ó¤¢¤¤¤¨", 0, 8, 1);
- x3("¤¢*(.)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1);
- x3("¤¢*?(.)", "¤¢¤¢¤¢¤¢¤ó", 0, 2, 1);
- x3("¤¢*?(¤ó)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1);
- x3("[¤¤¤¦¤¨]¤¢*(.)", "¤¨¤¢¤¢¤¢¤¢¤ó", 10, 12, 1);
- x3("(\\A¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1);
- n("(\\A¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦");
- x3("(^¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1);
- n("(^¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦");
- x3("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë", 4, 8, 1);
- n("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë¤ë");
- x2("(̵)\\1", "̵̵", 0, 4);
- n("(̵)\\1", "̵Éð");
- x2("(¶õ?)\\1", "¶õ¶õ", 0, 4);
- x2("(¶õ??)\\1", "¶õ¶õ", 0, 0);
- x2("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 8);
- x3("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 4, 1);
- x2("¤¢(¤¤*)\\1", "¤¢¤¤¤¤¤¤¤¤", 0, 10);
- x2("¤¢(¤¤*)\\1", "¤¢¤¤", 0, 2);
- x2("(¤¢*)(¤¤*)\\1\\2", "¤¢¤¢¤¢¤¤¤¤¤¢¤¢¤¢¤¤¤¤", 0, 20);
- x2("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 0, 14);
- x3("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 6, 10, 2);
- x2("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 16);
- x3("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 6, 7);
- x2("(¤Ï)(¤Ò)(¤Õ)\\2\\1\\3", "¤Ï¤Ò¤Õ¤Ò¤Ï¤Õ", 0, 12);
- x2("([¤-¤±])\\1", "¤¯¤¯", 0, 4);
- x2("(\\w\\d\\s)\\1", "¤¢5 ¤¢5 ", 0, 8);
- n("(\\w\\d\\s)\\1", "¤¢5 ¤¢5");
- x2("(ï¡©|[¤¢-¤¦]{3})\\1", "ï¡©", 0, 8);
- x2("...(ï¡©|[¤¢-¤¦]{3})\\1", "¤¢a¤¢Ã¯¡©Ã¯¡©", 0, 13);
- x2("(ï¡©|[¤¢-¤¦]{3})\\1", "¤¦¤¤¤¦¤¦¤¤¤¦", 0, 12);
- x2("(^¤³)\\1", "¤³¤³", 0, 4);
- n("(^¤à)\\1", "¤á¤à¤à");
- n("(¤¢$)\\1", "¤¢¤¢");
- n("(¤¢¤¤\\Z)\\1", "¤¢¤¤");
- x2("(¤¢*\\Z)\\1", "¤¢", 2, 2);
- x2(".(¤¢*\\Z)\\1", "¤¤¤¢", 2, 4);
- x3("(.(¤ä¤¤¤æ)\\2)", "z¤ä¤¤¤æ¤ä¤¤¤æ", 0, 13, 1);
- x3("(.(..\\d.)\\2)", "¤¢12341234", 0, 10, 1);
- x2("((?i:¤¢v¤º))\\1", "¤¢v¤º¤¢v¤º", 0, 10);
- x2("(?<¶ò¤«>ÊÑ|\\(\\g<¶ò¤«>\\))", "((((((ÊÑ))))))", 0, 14);
- x2("\\A(?:\\g<°¤_1>|\\g<±¾_2>|\\z½ªÎ» (?<°¤_1>´Ñ|¼«\\g<±¾_2>¼«)(?<±¾_2>ºß|Ê\\g<°¤_1>Ê))$", "Ê¼«Ê¼«ºß¼«Ê¼«Ê", 0, 26);
- x2("[[¤Ò¤Õ]]", "¤Õ", 0, 2);
- x2("[[¤¤¤ª¤¦]¤«]", "¤«", 0, 2);
- n("[[^¤¢]]", "¤¢");
- n("[^[¤¢]]", "¤¢");
- x2("[^[^¤¢]]", "¤¢", 0, 2);
- x2("[[¤«¤¤¯]&&¤¤¯]", "¤¯", 0, 2);
- n("[[¤«¤¤¯]&&¤¤¯]", "¤«");
- n("[[¤«¤¤¯]&&¤¤¯]", "¤±");
- x2("[¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ", 0, 2);
- n("[^¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ");
- x2("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¤", 0, 2);
- n("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¢");
- x2("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤", 0, 2);
- n("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤¤");
- x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¦", 0, 2);
- x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¨", 0, 2);
- n("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤«");
- x2("[¤¢-&&-¤¢]", "-", 0, 1);
- x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]q-w]", "¤¨", 0, 2);
- x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "f", 0, 1);
- x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "g", 0, 1);
- n("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "2");
- fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d\n",
- nsucc, nfail, nerror);
-
-#ifndef POSIX_TEST
- onig_region_free(region, 1);
- onig_end();
-#endif
-
- return 0;
-}
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c
index fdf9195b5fa..b3f812fb48d 100644
--- a/ext/mbstring/php_mbregex.c
+++ b/ext/mbstring/php_mbregex.c
@@ -124,6 +124,22 @@ php_mb_regex_enc_name_map_t enc_name_map[] ={
"UTF-8\0UTF8\0",
ONIG_ENCODING_UTF8
},
+ {
+ "UTF-16\0UTF-16BE\0",
+ ONIG_ENCODING_UTF16_BE
+ },
+ {
+ "UTF-16LE\0",
+ ONIG_ENCODING_UTF16_LE
+ },
+ {
+ "UCS-4\0UTF-32\0UTF-32BE\0",
+ ONIG_ENCODING_UTF32_BE
+ },
+ {
+ "UCS-4LE\0UTF-32LE\0",
+ ONIG_ENCODING_UTF32_LE
+ },
{
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
ONIG_ENCODING_SJIS
diff --git a/ext/mbstring/tests/bug31911.phpt b/ext/mbstring/tests/bug31911.phpt
new file mode 100644
index 00000000000..eb6438d4e7a
--- /dev/null
+++ b/ext/mbstring/tests/bug31911.phpt
@@ -0,0 +1,11 @@
+--TEST--
+Bug #31911 (mb_decode_mimeheader() is case-sensitive to hex escapes)
+--FILE--
+
+--EXPECT--
+Works: ???
+Fails: ???