- Update libmbfl (fixes bug #30549 and #31911).

- Update oniguruma to 3.7.0
This commit is contained in:
Moriyoshi Koizumi 2005-02-20 22:18:09 +00:00
parent 8913ddc295
commit 5b5e012bc2
87 changed files with 19129 additions and 5757 deletions

View File

@ -96,6 +96,12 @@ int main() { return foo(10, "", 3.14); }
oniguruma/reggnu.c
oniguruma/regparse.c
oniguruma/regenc.c
oniguruma/regext.c
oniguruma/regsyntax.c
oniguruma/regtrav.c
oniguruma/regversion.c
oniguruma/st.c
oniguruma/enc/unicode.c
oniguruma/enc/ascii.c
oniguruma/enc/utf8.c
oniguruma/enc/euc_jp.c
@ -120,6 +126,10 @@ int main() { return foo(10, "", 3.14); }
oniguruma/enc/koi8.c
oniguruma/enc/koi8_r.c
oniguruma/enc/big5.c
oniguruma/enc/utf16_be.c
oniguruma/enc/utf16_le.c
oniguruma/enc/utf32_be.c
oniguruma/enc/utf32_le.c
])
fi
])
@ -164,6 +174,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_iso8859_13.c
libmbfl/filters/mbfilter_iso8859_14.c
libmbfl/filters/mbfilter_iso8859_15.c
libmbfl/filters/mbfilter_iso8859_16.c
libmbfl/filters/mbfilter_iso8859_2.c
libmbfl/filters/mbfilter_iso8859_3.c
libmbfl/filters/mbfilter_iso8859_4.c
@ -261,3 +272,5 @@ if test "$PHP_MBSTRING" != "no"; then
PHP_MBSTRING_SETUP_LIBMBFL
PHP_MBSTRING_EXTENSION
fi
# vim600: sts=2 sw=2 et

View File

@ -24,13 +24,14 @@ if (PHP_MBSTRING == "yes") {
mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \
mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \
mbfilter_iso8859_1.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c \
mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_2.c \
mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c \
mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c \
mbfilter_iso8859_9.c mbfilter_jis.c mbfilter_koi8r.c mbfilter_qprint.c \
mbfilter_sjis.c mbfilter_ucs2.c mbfilter_ucs4.c mbfilter_uhc.c \
mbfilter_utf16.c mbfilter_utf32.c mbfilter_utf7.c mbfilter_utf7imap.c \
mbfilter_utf8.c mbfilter_uuencode.c", "mbstring");
mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c \
mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
mbfilter_uuencode.c", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
@ -51,13 +52,15 @@ if (PHP_MBSTRING == "yes") {
AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h');
AC_DEFINE('HAVE_MBREGEX', 1);
ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \
regenc.c regexec.c reggnu.c regparse.c regposerr.c", "mbstring");
regenc.c regexec.c reggnu.c regparse.c regposerr.c \
regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring");
ADD_SOURCES("ext/mbstring/oniguruma/enc", "ascii.c big5.c \
euc_jp.c euc_kr.c euc_tw.c iso8859_1.c iso8859_2.c \
iso8859_3.c iso8859_4.c iso8859_5.c iso8859_6.c \
iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \
iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \
koi8.c koi8_r.c sjis.c utf8.c", "mbstring");
koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \
utf32_be.c utf32_le.c", "mbstring");
ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring");
}
}

View File

@ -1,5 +1,104 @@
EXTRA_DIST=Makefile.bcc32
EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_uhc.h
libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_uhc.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
mbfilter_iso8859_3.c: unicode_table_iso8859_3.h
mbfilter_iso8859_4.c: unicode_table_iso8859_4.h
mbfilter_iso8859_5.c: unicode_table_iso8859_5.h
mbfilter_iso8859_6.c: unicode_table_iso8859_6.h
mbfilter_iso8859_7.c: unicode_table_iso8859_7.h
mbfilter_iso8859_8.c: unicode_table_iso8859_8.h
mbfilter_iso8859_9.c: unicode_table_iso8859_9.h
mbfilter_iso8859_10.c: unicode_table_iso8859_10.h
mbfilter_iso8859_11.c: unicode_table_iso8859_11.h
mbfilter_iso8859_13.c: unicode_table_iso8859_13.h
mbfilter_iso8859_14.c: unicode_table_iso8859_13.h
mbfilter_iso8859_15.c: unicode_table_iso8859_15.h
mbfilter_iso8859_16.c: unicode_table_iso8859_16.h
8859-1.TXT 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT 8859-6.TXT \
8859-7.TXT 8859-8.TXT 8859-9.TXT 8859-10.TXT 8859-11.TXT 8859-13.TXT \
8859-14.TXT 8859-15.TXT 8859-16.TXT:
$(FETCH_VIA_FTP) ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/$@
unicode_table_iso8859_1.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_1_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_1_H -f mk_sb_tbl.awk 8859-1.TXT > $@
unicode_table_iso8859_2.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_2_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_2_H -f mk_sb_tbl.awk 8859-2.TXT > $@
unicode_table_iso8859_3.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_3_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_3_H -f mk_sb_tbl.awk 8859-3.TXT > $@
unicode_table_iso8859_4.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_4_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_4_H -f mk_sb_tbl.awk 8859-4.TXT > $@
unicode_table_iso8859_5.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_5_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_5_H -f mk_sb_tbl.awk 8859-5.TXT > $@
unicode_table_iso8859_6.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_6_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_6_H -f mk_sb_tbl.awk 8859-6.TXT > $@
unicode_table_iso8859_7.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_7_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_7_H -f mk_sb_tbl.awk 8859-7.TXT > $@
unicode_table_iso8859_8.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_8_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_8_H -f mk_sb_tbl.awk 8859-8.TXT > $@
unicode_table_iso8859_9.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_9_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_9_H -f mk_sb_tbl.awk 8859-9.TXT > $@
unicode_table_iso8859_10.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_10_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_10_H -f mk_sb_tbl.awk 8859-10.TXT > $@
unicode_table_iso8859_11.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_11_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_11_H -f mk_sb_tbl.awk 8859-11.TXT > $@
unicode_table_iso8859_13.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_13_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_13_H -f mk_sb_tbl.awk 8859-13.TXT > $@
unicode_table_iso8859_14.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_14_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_14_H -f mk_sb_tbl.awk 8859-14.TXT > $@
unicode_table_iso8859_15.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_15_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLE_ISO8859_15_H -f mk_sb_tbl.awk 8859-15.TXT > $@
unicode_table_iso8859_16.h: mk_sb_tbl.awk
$(AWK) -v TABLE_NAME=iso8859_16_ucs_table \
-v IFNDEF_NAME=UNICODE_TABLEISO8859_16_H -f mk_sb_tbl.awk 8859-16.TXT > $@
unidata: 8859-1.TXT 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT 8859-6.TXT \
8859-7.TXT 8859-8.TXT 8859-9.TXT 8859-10.TXT 8859-11.TXT 8859-13.TXT \
8859-14.TXT 8859-15.TXT 8859-16.TXT
.PHONY: unidata

View File

@ -1,6 +1,6 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I../mbfl
OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj
OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj
all: $(OBJS)

View File

@ -0,0 +1,136 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_16.h"
#include "unicode_table_iso8859_16.h"
static const char *mbfl_encoding_8859_16_aliases[] = {"ISO_8859-16", NULL};
const mbfl_encoding mbfl_encoding_8859_16 = {
mbfl_no_encoding_8859_16,
"ISO-8859-16",
"ISO-8859-16",
(const char *(*)[])&mbfl_encoding_8859_16_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_16 = {
mbfl_no_encoding_8859_16,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_16_wchar = {
mbfl_no_encoding_8859_16,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_16_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_16 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_16,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_16,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-16 => wchar
*/
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_16_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_16;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-16
*/
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_16_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_16) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}

View File

@ -0,0 +1,23 @@
/*
* COPYRIGHT NOTICE
*
* This file is a portion of "streamable kanji code filter and converter"
* library, which is distributed under GNU Lesser General Public License
* version 2.1.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_16_H
#define MBFL_MBFILTER_ISO8859_16_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_16;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_16;
extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16;
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_16_H */

View File

@ -155,11 +155,30 @@ int mbfl_filt_conv_qprintenc_flush(mbfl_convert_filter *filter)
*/
int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
{
int n;
int n, m;
static int hex2code_map[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
switch (filter->status) {
case 1:
if ((c >= 0x30 && c <= 0x39) || (c >= 0x41 && c <= 0x46)) { /* 0 - 9 or A - F */
if (hex2code_map[c & 0xff] >= 0) {
filter->cache = c;
filter->status = 2;
} else if (c == 0x0d) { /* soft line feed */
@ -173,21 +192,13 @@ int mbfl_filt_conv_qprintdec(int c, mbfl_convert_filter *filter)
}
break;
case 2:
n = filter->cache;
if (n >= 0x30 && n <= 0x39) { /* '0' - '9' */
n -= 48; /* 48 = '0' */
} else {
n -= 55; /* 55 = 'A' - 10 */
}
n <<= 4;
if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
n += (c - 48);
} else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F' */
n += (c - 55);
} else {
m = hex2code_map[c & 0xff];
if (m < 0) {
CK((*filter->output_function)(0x3d, filter->data)); /* '=' */
CK((*filter->output_function)(filter->cache, filter->data));
n = c;
} else {
n = hex2code_map[filter->cache] << 4 | m;
}
CK((*filter->output_function)(n, filter->data));
filter->status = 0;

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_10_H
#define UNICODE_TABLE_ISO8859_10_H
static const unsigned short iso8859_10_ucs_table[] = {
0x00A0,0x0104,0x0112,0x0122,0x0124,0x0128,0x0136,0x00A7,
0x013B,0x0110,0x0160,0x0166,0x017D,0x00AD,0x016A,0x014A,
0x00B0,0x0105,0x0113,0x0123,0x012B,0x0129,0x0137,0x00B7,
0x013C,0x0111,0x0161,0x0167,0x017E,0x2015,0x016B,0x014B,
0x0100,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x012E,
0x010C,0x00C9,0x0118,0x00CB,0x0116,0x00CD,0x00CE,0x00CF,
0x00D0,0x0145,0x014C,0x00D3,0x00D4,0x00D5,0x00D6,0x0168,
0x00D8,0x0172,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
0x0101,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x012F,
0x010D,0x00E9,0x0119,0x00EB,0x0117,0x00ED,0x00EE,0x00EF,
0x00F0,0x0146,0x014D,0x00F3,0x00F4,0x00F5,0x00F6,0x0169,
0x00F8,0x0173,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x0138
static const unsigned int iso8859_10_ucs_table[] = {
0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138
};
#endif /* UNICODE_TABLE_ISO8859_10_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_13_H
#define UNICODE_TABLE_ISO8859_13_H
static const unsigned short iso8859_13_ucs_table[] = {
0x00A0,0x201D,0x00A2,0x00A3,0x00A4,0x201E,0x00A6,0x00A7,
0x00D8,0x00A9,0x0156,0x00AB,0x00AC,0x00AD,0x00AE,0x00C6,
0x00B0,0x00B1,0x00B2,0x00B3,0x201C,0x00B5,0x00B6,0x00B7,
0x00F8,0x00B9,0x0157,0x00BB,0x00BC,0x00BD,0x00BE,0x00E6,
0x0104,0x012E,0x0100,0x0106,0x00C4,0x00C5,0x0118,0x0112,
0x010C,0x00C9,0x0179,0x0116,0x0122,0x0136,0x012A,0x013B,
0x0160,0x0143,0x0145,0x00D3,0x014C,0x00D5,0x00D6,0x00D7,
0x0172,0x0141,0x015A,0x016A,0x00DC,0x017B,0x017D,0x00DF,
0x0105,0x012F,0x0101,0x0107,0x00E4,0x00E5,0x0119,0x0113,
0x010D,0x00E9,0x017A,0x0117,0x0123,0x0137,0x012B,0x013C,
0x0161,0x0144,0x0146,0x00F3,0x014D,0x00F5,0x00F6,0x00F7,
0x0173,0x0142,0x015B,0x016B,0x00FC,0x017C,0x017E,0x2019
static const unsigned int iso8859_13_ucs_table[] = {
0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019
};
#endif /* UNICODE_TABLE_ISO8859_13_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_14_H
#define UNICODE_TABLE_ISO8859_14_H
static const unsigned short iso8859_14_ucs_table[] = {
0x00A0,0x1E02,0x1E03,0x00A3,0x010A,0x010B,0x1E0A,0x00A7,
0x1E80,0x00A9,0x1E82,0x1E0B,0x1EF2,0x00AD,0x00AE,0x0178,
0x1E1E,0x1E1F,0x0120,0x0121,0x1E40,0x1E41,0x00B6,0x1E56,
0x1E81,0x1E57,0x1E83,0x1E60,0x1EF3,0x1E84,0x1E85,0x1E61,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x0174,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x1E6A,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x0176,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x0175,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x1E6B,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x0177,0x00FF
static const unsigned int iso8859_14_ucs_table[] = {
0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff
};
#endif /* UNICODE_TABLE_ISO8859_14_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_15_H
#define UNICODE_TABLE_ISO8859_15_H
static const unsigned short iso8859_15_ucs_table[] = {
0x00A0,0x00A1,0x00A2,0x00A3,0x20AC,0x00A5,0x0160,0x00A7,
0x0161,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
0x00B0,0x00B1,0x00B2,0x00B3,0x017D,0x00B5,0x00B6,0x00B7,
0x017E,0x00B9,0x00BA,0x00BB,0x0152,0x0153,0x0178,0x00BF,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
static const unsigned int iso8859_15_ucs_table[] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
};
#endif /* UNICODE_TABLE_ISO8859_15_H */

View File

@ -0,0 +1,17 @@
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLEISO8859_16_H
static const unsigned int iso8859_16_ucs_table[] = {
0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff
};
#endif /* UNICODE_TABLEISO8859_16_H */

View File

@ -1,45 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_2_H
#define UNICODE_TABLE_ISO8859_2_H
static const unsigned short iso8859_2_ucs_table[] = {
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
static const unsigned int iso8859_2_ucs_table[] = {
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
};
#endif /* UNICODE_TABLE_ISO8859_2_H */

View File

@ -1,43 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_3_H
#define UNICODE_TABLE_ISO8859_3_H
static const unsigned short iso8859_3_ucs_table[] = {
0x00A0,0x0126,0x02D8,0x00A3,0x00A4,0x0000,0x0124,0x00A7,
0x00A8,0x0130,0x015E,0x011E,0x0134,0x00AD,0x0000,0x017B,
0x00B0,0x0127,0x00B2,0x00B3,0x00B4,0x00B5,0x0125,0x00B7,
0x00B8,0x0131,0x015F,0x011F,0x0135,0x00BD,0x0000,0x017C,
0x00C0,0x00C1,0x00C2,0x0000,0x00C4,0x010A,0x0108,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x0120,0x00D6,0x00D7,
0x011C,0x00D9,0x00DA,0x00DB,0x00DC,0x016C,0x015C,0x00DF,
0x00E0,0x00E1,0x00E2,0x0000,0x00E4,0x010B,0x0109,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x0121,0x00F6,0x00F7,
0x011D,0x00F9,0x00FA,0x00FB,0x00FC,0x016D,0x015D,0x02D9
static const unsigned int iso8859_3_ucs_table[] = {
0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9
};
#endif /* UNICODE_TABLE_ISO8859_3_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_4_H
#define UNICODE_TABLE_ISO8859_4_H
static const unsigned short iso8859_4_ucs_table[] = {
0x00A0,0x0104,0x0138,0x0156,0x00A4,0x0128,0x013B,0x00A7,
0x00A8,0x0160,0x0112,0x0122,0x0166,0x00AD,0x017D,0x00AF,
0x00B0,0x0105,0x02DB,0x0157,0x00B4,0x0129,0x013C,0x02C7,
0x00B8,0x0161,0x0113,0x0123,0x0167,0x014A,0x017E,0x014B,
0x0100,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x012E,
0x010C,0x00C9,0x0118,0x00CB,0x0116,0x00CD,0x00CE,0x012A,
0x0110,0x0145,0x014C,0x0136,0x00D4,0x00D5,0x00D6,0x00D7,
0x00D8,0x0172,0x00DA,0x00DB,0x00DC,0x0168,0x016A,0x00DF,
0x0101,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x012F,
0x010D,0x00E9,0x0119,0x00EB,0x0117,0x00ED,0x00EE,0x012B,
0x0111,0x0146,0x014D,0x0137,0x00F4,0x00F5,0x00F6,0x00F7,
0x00F8,0x0173,0x00FA,0x00FB,0x00FC,0x0169,0x016B,0x02D9
static const unsigned int iso8859_4_ucs_table[] = {
0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9
};
#endif /* UNICODE_TABLE_ISO8859_4_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_5_H
#define UNICODE_TABLE_ISO8859_5_H
static const unsigned short iso8859_5_ucs_table[] = {
0x00A0,0x0401,0x0402,0x0403,0x0404,0x0405,0x0406,0x0407,
0x0408,0x0409,0x040A,0x040B,0x040C,0x00AD,0x040E,0x040F,
0x0410,0x0411,0x0412,0x0413,0x0414,0x0415,0x0416,0x0417,
0x0418,0x0419,0x041A,0x041B,0x041C,0x041D,0x041E,0x041F,
0x0420,0x0421,0x0422,0x0423,0x0424,0x0425,0x0426,0x0427,
0x0428,0x0429,0x042A,0x042B,0x042C,0x042D,0x042E,0x042F,
0x0430,0x0431,0x0432,0x0433,0x0434,0x0435,0x0436,0x0437,
0x0438,0x0439,0x043A,0x043B,0x043C,0x043D,0x043E,0x043F,
0x0440,0x0441,0x0442,0x0443,0x0444,0x0445,0x0446,0x0447,
0x0448,0x0449,0x044A,0x044B,0x044C,0x044D,0x044E,0x044F,
0x2116,0x0451,0x0452,0x0453,0x0454,0x0455,0x0456,0x0457,
0x0458,0x0459,0x045A,0x045B,0x045C,0x00A7,0x045E,0x045F
static const unsigned int iso8859_5_ucs_table[] = {
0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f
};
#endif /* UNICODE_TABLE_ISO8859_5_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_6_H
#define UNICODE_TABLE_ISO8859_6_H
static const unsigned short iso8859_6_ucs_table[] = {
0x00A0,0x0000,0x0000,0x0000,0x00A4,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x060C,0x00AD,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x061B,0x0000,0x0000,0x0000,0x061F,
0x0000,0x0621,0x0622,0x0623,0x0624,0x0625,0x0626,0x0627,
0x0628,0x0629,0x062A,0x062B,0x062C,0x062D,0x062E,0x062F,
0x0630,0x0631,0x0632,0x0633,0x0634,0x0635,0x0636,0x0637,
0x0638,0x0639,0x063A,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0640,0x0641,0x0642,0x0643,0x0644,0x0645,0x0646,0x0647,
0x0648,0x0649,0x064A,0x064B,0x064C,0x064D,0x064E,0x064F,
0x0650,0x0651,0x0652,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x060C,0x00AD,0x0000,0x0000
static const unsigned int iso8859_6_ucs_table[] = {
0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
#endif /* UNICODE_TABLE_ISO8859_6_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_7_H
#define UNICODE_TABLE_ISO8859_7_H
static const unsigned short iso8859_7_ucs_table[] = {
0x00A0,0x2018,0x2019,0x00A3,0x0000,0x0000,0x00A6,0x00A7,
0x00A8,0x00A9,0x0000,0x00AB,0x00AC,0x00AD,0x0000,0x2015,
0x00B0,0x00B1,0x00B2,0x00B3,0x0384,0x0385,0x0386,0x00B7,
0x0388,0x0389,0x038A,0x00BB,0x038C,0x00BD,0x038E,0x038F,
0x0390,0x0391,0x0392,0x0393,0x0394,0x0395,0x0396,0x0397,
0x0398,0x0399,0x039A,0x039B,0x039C,0x039D,0x039E,0x039F,
0x03A0,0x03A1,0x0000,0x03A3,0x03A4,0x03A5,0x03A6,0x03A7,
0x03A8,0x03A9,0x03AA,0x03AB,0x03AC,0x03AD,0x03AE,0x03AF,
0x03B0,0x03B1,0x03B2,0x03B3,0x03B4,0x03B5,0x03B6,0x03B7,
0x03B8,0x03B9,0x03BA,0x03BB,0x03BC,0x03BD,0x03BE,0x03BF,
0x03C0,0x03C1,0x03C2,0x03C3,0x03C4,0x03C5,0x03C6,0x03C7,
0x03C8,0x03C9,0x03CA,0x03CB,0x03CC,0x03CD,0x03CE,0x0000
static const unsigned int iso8859_7_ucs_table[] = {
0x00a0, 0x2018, 0x2019, 0x00a3, 0x20ac, 0x20af, 0x00a6, 0x00a7,
0x00a8, 0x00a9, 0x037a, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000
};
#endif /* UNICODE_TABLE_ISO8859_7_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_8_H
#define UNICODE_TABLE_ISO8859_8_H
static const unsigned short iso8859_8_ucs_table[] = {
0x00A0,0x0000,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
0x00A8,0x00A9,0x00D7,0x00AB,0x00AC,0x00AD,0x00AE,0x203E,
0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
0x00B8,0x00B9,0x00F7,0x00BB,0x00BC,0x00BD,0x00BE,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2017,
0x05D0,0x05D1,0x05D2,0x05D3,0x05D4,0x05D5,0x05D6,0x05D7,
0x05D8,0x05D9,0x05DA,0x05DB,0x05DC,0x05DD,0x05DE,0x05DF,
0x05E0,0x05E1,0x05E2,0x05E3,0x05E4,0x05E5,0x05E6,0x05E7,
0x05E8,0x05E9,0x05EA,0x0000,0x0000,0x0000,0x0000,0x0000
static const unsigned int iso8859_8_ucs_table[] = {
0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000
};
#endif /* UNICODE_TABLE_ISO8859_8_H */

View File

@ -1,44 +1,17 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The authors of this file: PHP3 internationalization team
* You can contact the primary author   <sgk@happysize.co.jp>.
*
*/
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLE_ISO8859_9_H
#define UNICODE_TABLE_ISO8859_9_H
static const unsigned short iso8859_9_ucs_table[] = {
0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x011E,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0130,0x015E,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x011F,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x0131,0x015F,0x00FF
static const unsigned int iso8859_9_ucs_table[] = {
0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff
};
#endif /* UNICODE_TABLE_ISO8859_9_H */

View File

@ -199,6 +199,10 @@ SOURCE=.\filters\mbfilter_iso8859_15.c
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_iso8859_16.c
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_iso8859_2.c
# End Source File
# Begin Source File
@ -500,6 +504,10 @@ SOURCE=.\filters\mbfilter_iso8859_15.h
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_iso8859_16.h
# End Source File
# Begin Source File
SOURCE=.\filters\mbfilter_iso8859_2.h
# End Source File
# Begin Source File
@ -708,6 +716,10 @@ SOURCE=.\filters\unicode_table_iso8859_15.h
# End Source File
# Begin Source File
SOURCE=.\filters\unicode_table_iso8859_16.h
# End Source File
# Begin Source File
SOURCE=.\filters\unicode_table_iso8859_2.h
# End Source File
# Begin Source File

View File

@ -0,0 +1,21 @@
Microsoft Visual Studio Solution File, Format Version 7.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
ConfigName.0 = Debug
ConfigName.1 = Release
EndGlobalSection
GlobalSection(ProjectDependencies) = postSolution
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,650 @@
<?xml version="1.0" encoding = "shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.00"
Name="libmbfl"
ProjectGUID="{B3636594-A785-4270-A765-8EAE922B5207}"
SccProjectName=""
SccLocalPath="">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="2"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="mbfl,."
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;MBFL_DLL_EXPORT;HAVE_CONFIG_H=1"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="2"
PrecompiledHeaderFile=".\Debug/mbfl.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
DebugInformationFormat="4"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="odbc32.lib odbccp32.lib"
OutputFile=".\Debug/mbfl.dll"
LinkIncremental="2"
SuppressStartupBanner="TRUE"
ModuleDefinitionFile=""
GenerateDebugInformation="TRUE"
ProgramDatabaseFile=".\Debug/mbfl.pdb"
ImportLibrary=".\Debug/mbfl.lib"/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="TRUE"
SuppressStartupBanner="TRUE"
TargetEnvironment="1"
TypeLibraryName=".\Debug/mbfl.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="2"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="mbfl,."
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;HAVE_CONFIG_H"
StringPooling="TRUE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="TRUE"
UsePrecompiledHeader="2"
PrecompiledHeaderFile=".\Release/mbfl.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="odbc32.lib odbccp32.lib"
OutputFile=".\Release/mbfl.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
ModuleDefinitionFile=""
ProgramDatabaseFile=".\Release/mbfl.pdb"
ImportLibrary=".\Release/mbfl.lib"/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="TRUE"
SuppressStartupBanner="TRUE"
TargetEnvironment="1"
TypeLibraryName=".\Release/mbfl.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
</Configurations>
<Files>
<Filter
Name="Source Files"
Filter="vc6">
<File
RelativePath=".\filters\html_entities.c">
</File>
<File
RelativePath=".\mbfl\mbfilter.c">
</File>
<File
RelativePath=".\filters\mbfilter_7bit.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_8bit.c">
</File>
<File
RelativePath=".\filters\mbfilter_ascii.c">
</File>
<File
RelativePath=".\filters\mbfilter_base64.c">
</File>
<File
RelativePath=".\filters\mbfilter_big5.c">
</File>
<File
RelativePath=".\filters\mbfilter_byte2.c">
</File>
<File
RelativePath=".\filters\mbfilter_byte4.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp1251.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp1252.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp866.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp932.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp936.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_cn.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp_win.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_kr.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_tw.c">
</File>
<File
RelativePath=".\filters\mbfilter_htmlent.c">
</File>
<File
RelativePath=".\filters\mbfilter_hz.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso2022_kr.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_1.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_10.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_13.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_14.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_15.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_16.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_2.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_3.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_4.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_5.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_6.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_7.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_8.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_9.c">
</File>
<File
RelativePath=".\filters\mbfilter_jis.c">
</File>
<File
RelativePath=".\filters\mbfilter_koi8r.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_pass.c">
</File>
<File
RelativePath=".\filters\mbfilter_qprint.c">
</File>
<File
RelativePath=".\filters\mbfilter_sjis.c">
</File>
<File
RelativePath=".\filters\mbfilter_ucs2.c">
</File>
<File
RelativePath=".\filters\mbfilter_ucs4.c">
</File>
<File
RelativePath=".\filters\mbfilter_uhc.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf16.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf32.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf7.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf7imap.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf8.c">
</File>
<File
RelativePath=".\filters\mbfilter_uuencode.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_wchar.c">
</File>
<File
RelativePath=".\mbfl\mbfl_allocators.c">
</File>
<File
RelativePath=".\mbfl\mbfl_convert.c">
</File>
<File
RelativePath=".\mbfl\mbfl_encoding.c">
</File>
<File
RelativePath=".\mbfl\mbfl_filter_output.c">
</File>
<File
RelativePath=".\mbfl\mbfl_ident.c">
</File>
<File
RelativePath=".\mbfl\mbfl_language.c">
</File>
<File
RelativePath=".\mbfl\mbfl_memory_device.c">
</File>
<File
RelativePath=".\mbfl\mbfl_string.c">
</File>
<File
RelativePath=".\nls\nls_de.c">
</File>
<File
RelativePath=".\nls\nls_en.c">
</File>
<File
RelativePath=".\nls\nls_ja.c">
</File>
<File
RelativePath=".\nls\nls_kr.c">
</File>
<File
RelativePath=".\nls\nls_neutral.c">
</File>
<File
RelativePath=".\nls\nls_ru.c">
</File>
<File
RelativePath=".\nls\nls_uni.c">
</File>
<File
RelativePath=".\nls\nls_zh.c">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl">
<File
RelativePath=".\config.h.vc6">
<FileConfiguration
Name="Debug|Win32">
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
"
Outputs="$(InputDir)\config.h"/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32">
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
"
Outputs="$(InputDir)\config.h"/>
</FileConfiguration>
</File>
<File
RelativePath=".\filters\cp932_table.h">
</File>
<File
RelativePath=".\filters\html_entities.h">
</File>
<File
RelativePath=".\mbfl\mbfilter.h">
</File>
<File
RelativePath=".\filters\mbfilter_7bit.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_8bit.h">
</File>
<File
RelativePath=".\filters\mbfilter_ascii.h">
</File>
<File
RelativePath=".\filters\mbfilter_base64.h">
</File>
<File
RelativePath=".\filters\mbfilter_big5.h">
</File>
<File
RelativePath=".\filters\mbfilter_byte2.h">
</File>
<File
RelativePath=".\filters\mbfilter_byte4.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp1251.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp1252.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp866.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp932.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp936.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_cn.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp_win.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_kr.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_tw.h">
</File>
<File
RelativePath=".\filters\mbfilter_htmlent.h">
</File>
<File
RelativePath=".\filters\mbfilter_hz.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso2022_kr.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_1.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_10.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_13.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_14.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_15.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_16.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_2.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_3.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_4.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_5.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_6.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_7.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_8.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_9.h">
</File>
<File
RelativePath=".\filters\mbfilter_jis.h">
</File>
<File
RelativePath=".\filters\mbfilter_koi8r.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_pass.h">
</File>
<File
RelativePath=".\filters\mbfilter_qprint.h">
</File>
<File
RelativePath=".\filters\mbfilter_sjis.h">
</File>
<File
RelativePath=".\filters\mbfilter_ucs2.h">
</File>
<File
RelativePath=".\filters\mbfilter_ucs4.h">
</File>
<File
RelativePath=".\filters\mbfilter_uhc.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf16.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf32.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf7.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf7imap.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf8.h">
</File>
<File
RelativePath=".\filters\mbfilter_uuencode.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_wchar.h">
</File>
<File
RelativePath=".\mbfl\mbfl_allocators.h">
</File>
<File
RelativePath=".\mbfl\mbfl_consts.h">
</File>
<File
RelativePath=".\mbfl\mbfl_convert.h">
</File>
<File
RelativePath=".\mbfl\mbfl_encoding.h">
</File>
<File
RelativePath=".\mbfl\mbfl_filter_output.h">
</File>
<File
RelativePath=".\mbfl\mbfl_ident.h">
</File>
<File
RelativePath=".\mbfl\mbfl_language.h">
</File>
<File
RelativePath=".\mbfl\mbfl_memory_device.h">
</File>
<File
RelativePath=".\mbfl\mbfl_string.h">
</File>
<File
RelativePath=".\nls\nls_de.h">
</File>
<File
RelativePath=".\nls\nls_en.h">
</File>
<File
RelativePath=".\nls\nls_ja.h">
</File>
<File
RelativePath=".\nls\nls_kr.h">
</File>
<File
RelativePath=".\nls\nls_neutral.h">
</File>
<File
RelativePath=".\nls\nls_ru.h">
</File>
<File
RelativePath=".\nls\nls_uni.h">
</File>
<File
RelativePath=".\nls\nls_zh.h">
</File>
<File
RelativePath=".\filters\unicode_prop.h">
</File>
<File
RelativePath=".\filters\unicode_table_big5.h">
</File>
<File
RelativePath=".\filters\unicode_table_cns11643.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp1251.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp1252.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp866.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp932_ext.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp936.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_10.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_13.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_14.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_15.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_16.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_2.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_3.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_4.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_5.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_6.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_7.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_8.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_9.h">
</File>
<File
RelativePath=".\filters\unicode_table_jis.h">
</File>
<File
RelativePath=".\filters\unicode_table_koi8r.h">
</File>
<File
RelativePath=".\filters\unicode_table_uhc.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
<File
RelativePath=".\mbfl.rc">
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -1345,7 +1345,6 @@ mbfl_strcut(
}
#include <stdio.h>
/*
* strwidth
*/
@ -1435,6 +1434,7 @@ collector_strimwidth(int c, void* data)
default:
if (pc->outchar >= pc->from) {
pc->outwidth += (is_fullwidth(c) ? 2: 1);
if (pc->outwidth > pc->width) {
if (pc->status == 0) {
pc->endpos = pc->device.pos;

View File

@ -74,6 +74,7 @@
#define MBFL_WCSPLANE_CP1251 0x70f70000
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_KOI8R 0x70f90000
#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
#define MBFL_WCSGROUP_WCHARMAX 0x78000000

View File

@ -80,6 +80,7 @@
#include "filters/mbfilter_iso8859_13.h"
#include "filters/mbfilter_iso8859_14.h"
#include "filters/mbfilter_iso8859_15.h"
#include "filters/mbfilter_iso8859_16.h"
#include "filters/mbfilter_base64.h"
#include "filters/mbfilter_qprint.h"
#include "filters/mbfilter_uuencode.h"
@ -163,6 +164,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_8859_13,
&mbfl_encoding_8859_14,
&mbfl_encoding_8859_15,
&mbfl_encoding_8859_16,
&mbfl_encoding_euc_cn,
&mbfl_encoding_cp936,
&mbfl_encoding_hz,
@ -295,7 +297,3 @@ mbfl_is_support_encoding(const char *name)
return 1;
}
}

View File

@ -97,6 +97,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866,
mbfl_no_encoding_koi8r,
mbfl_no_encoding_8859_16,
mbfl_no_encoding_charset_max
};

View File

@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby.
It follows the BSD license in the case of the one except for it.
/*-
* Copyright (c) 2002 K.Kosako <kosako@sofnec.co.jp>
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

View File

@ -1,18 +1,479 @@
History
2005/02/19: Version 3.7.0
2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
2005/02/19: [new] (thanks Minero Aoki)
add onig_region_set().
2005/02/19: [API] change onig_region_init() to extern.
2005/02/19: [dist] remove reggnu.c from MANIFEST-RUBY.
remove reggnu.c from make 19.
2005/02/19: [dist] update doc/API and doc/API.ja.
2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
2005/02/19: [impl] (thanks Alexey Zakhlestin)
change UChar* to const UChar* in oniguruma.h,
regenc.h and regparse.h.
2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
onigposix.h and st.h.
2005/02/12: [test] success in ruby 1.9.0 (2005-02-11) [i386-cygwin].
2005/02/12: [bug] (thanks nobu) [ruby-dev:25676]
type_cclass_hash() fix overrun.
2005/02/09: [test] success in ruby 1.9.0 (2005-02-09) [i686-linux].
2005/02/09: [spec] add RE_OPTION_FIND_NOT_EMPTY etc.. to oniggnu.h.
2005/02/09: [dist] remove hash.c.patch.
2005/02/07: [impl] remove re_mbctab, mbctab_ascii etc...
(USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY)
2005/02/04: Version 3.6.0
2005/02/04: [test] success in ruby 1.9.0 (2005-02-04) [i686-linux].
2005/02/01: [bug] add key_free() call to st_free_table().
2005/02/01: [new] add onig_get_default_ambig_flag() and
onig_set_default_ambig_flag().
2005/02/01: [dist] update MANIFEST-RUBY.
2005/01/31: [test] success in ruby 1.9.0 (2005-01-29) [i686-linux].
2005/01/31: [spec] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND
from ONIGENC_AMBIGUOUS_MATCH_DEFAULT.
2005/01/31: [dist] update Makefile.in (make 19).
2005/01/29: [memo] (thanks Kazuo Saito)
Oniguruma 3.5.4 was merged to Ruby 1.9.0.
2005/01/28: [impl] (thanks UK-taniyama)
add extern "C" { } directive to oniguruma.h, oniggnu.h
and onigposix.h for C++.
2005/01/25: [impl] remove nested function call for xxx_code_to_mbclen().
(euc_kr.c, euc_tw.c, big5.c)
2005/01/19: Version 3.5.4
2005/01/19: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
2005/01/19: [bug] (thanks Isao Sonobe)
callback function argument name_end of onig_foreach_name()
was wrong.
name key of name table should be null terminated for
character encoding length.
add strdup_with_null(), rename onig_strdup() to k_strdup().
use e->name_len in i_names().
2005/01/17: [impl] (thanks UK-taniyama)
add HAVE_SYS_TYPES_H to config.h.in.
2005/01/13: Version 3.5.3
2005/01/13: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
2005/01/13: [bug] ignore case match bug.
ex. /s+/iu.match("SSSSS") ==> [4..5]
fix OP_EXACT1_IC, OP_EXACTN_IC process.
2005/01/13: [bug] (thanks Isao Sonobe)
ignore case match bug.
ex. /is/iu.match("ss") fail.
fix str_lower_case_match() etc.
2005/01/05: Version 3.5.2
2005/01/05: [test] success in ruby 1.9.0 (2005-01-05) [i686-linux].
2005/01/05: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
2005/01/05: [bug] (thanks Isao Sonobe)
ignore case match bug.
ex. /s+/iu.match("sssss") ==> [4..5]
fix OP_EXACT1_IC, OP_EXACTN_IC process.
2005/01/05: [bug] (thanks Isao Sonobe)
group name table should be renumbered.
add onig_renumber_name_table().
2004/12/24: [dist] remove file onigcmpt200.h.
2004/12/17: Version 3.5.1
2004/12/17: [dist] add INSTALL-RUBY to archive.
2004/12/16: [test] success in ruby 1.9.0 (2004-12-16) [i686-linux].
2004/12/16: [dist] update hash.c.patch.
2004/12/15: [bug] (thanks matz)
char > 127 should be casted to unsigned char. (utf8.c)
2004/12/13: [impl] add HAVE_PROTOTYPES and HAVE_STDARG_PROTOTYPES definition
to oniguruma.h in the case __cplusplus.
2004/12/06: [dist] update doc/RE and doc/RE.ja.
2004/12/03: [impl] (thanks nobu)
st.h fix prototype for C++.
2004/12/03: Version 3.5.0
2004/12/02: [test] success in ruby 1.9.0 (2004-12-02) [i686-linux].
2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-mswin32].
2004/12/01: [dist] add make targets 19 and 19up to win32/Makefile.
2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i386-cygwin].
2004/12/01: [test] success in ruby 1.9.0 (2004-12-01) [i686-linux].
2004/12/01: [impl] double cast for escape warning in Cygwin.
(HashDataType* )((void* )(&e)) in regparse.c
2004/12/01: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
2004/12/01: [tune] change implementation of clear_opt_map_info().
(which was 10-16% cost in gprof result for my test program)
2004/12/01: [dist] remove regex.c from distribution files.
2004/11/30: [memo] remove targets 16 and 18 from Makefile.in.
2004/11/30: [test] success in ruby 1.9.0 (2004-11-30) [i686-linux].
2004/11/30: [inst] add "cp -p st.[ch] st.[ch].ruby_orig" to "make 19".
2004/11/30: [tune] map_position_value() return 20 if code is 0
and minimum enclen > 1.
2004/11/30: [test] success in ruby 1.9.0 (2004-11-29) [i686-linux].
2004/11/30: [impl] minor changes for multi-thread in regexec.c and regcomp.c.
2004/11/30: [impl] change THREAD_PASS_LIMIT_COUNT value from 10 to 8.
2004/11/30: [impl] add THREAD_ATOMIC_XXX to FreeNodeList access in regparse.c
2004/11/29: [impl] add USE_MULTI_THREAD_SYSTEM.
2004/11/29: [memo] add hash.c.patch to CVS.
2004/11/29: [dist] change mail address to 'sndgk393 AT ...'
2004/11/29: [dist] add -s option (silent mode) to test.rb.
2004/11/29: [tune] change THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS value
from 20 to 8.
2004/11/29: [inst] add make target "19up".
2004/11/29: [dist] change Oniguruma Home Page URL.
2004/11/29: [impl] remove onig_is_in_code_range_array().
2004/11/29: [dist] fix doc/RE and RE.ja (character types).
2004/11/26: [dist] fix win32/Makefile.
2004/11/26: [dist] fix doc/RE and RE.ja (multibyte character types).
2004/11/26: [impl] add onig_free_shared_cclass_table().
2004/11/26: [impl] move definition USE_UNICODE_FULL_RANGE_CTYPE to regenc.h.
2004/11/26: [impl] add opcode OP_CCLASS_NODE.
2004/11/26: [impl] move definition of CClassNode to regint.h.
2004/11/26: [impl] add type PointerType in regint.h.
2004/11/25: [impl] remove ONIGENC_CTYPE_MOD_NOT.
2004/11/25: [impl] rename onig_node_new_cclass_by_codepoint_range to
node_new_cclass_by_codepoint_range.
2004/11/25: [impl] remove get_type_cc_node method from OnigEncodingType.
2004/11/25: [impl] move implementation of shared char-class from enc/*.c
to regparse.c.
2004/11/25: [dist] add hash.c.patch for Ruby 1.9 hash.c change.
2004/11/22: [impl] change utf8_get_type_node().
2004/11/22: [impl] add ONIGENC_CTYPE_MOD_NOT.
2004/11/22: [bug] (thanks MIYAMUKO Katsuyuki)
ruby make test fail in HP-UX B.11.23 ia64.
should use tok->u.code instead of tok->u.c in
the case of TK_CODE_POINT.
2004/11/19: [bug] (thanks Yoshida Masato)
invalid multibyte code causes segmentation fault.
ex. /[\xFF-\xFF]/u
2004/11/19: [bug] (thanks Yoshida Masato)
illegal check in char-class range in UTF-8.
ex. s = "[\xC2\xA0-\xC3\xBE]"
p(Regexp.new(s, nil, "u") =~ "\xC3\xBE")
2004/11/18: [impl] add onig_node_new_cclass_by_codepoint_range().
2004/11/18: [impl] remove OnigCodePointRange type. (use OnigCodePoint[].)
2004/11/17: [bug] (thanks nobu)
abort in "a".gsub(/a\Z/, "")
fix ONIGENC_STEP_BACK() argument in onig_search().
2004/11/16: [impl] add key2 member to st_table_entry in st.[ch].
change API of st for non-null terminated string key.
2004/11/16: [impl] add get_type_cc_node method to OnigEncodingType.
2004/11/15: [impl] add st.h and st.c from Ruby 1.9.
use st-hash always.
2004/11/12: [impl] change menber 'not' of CClassNode to 'flags'.
add flags FLAG_CCLASS_NOT and FLAG_CCLASS_SHARE.
2004/11/12: [impl] add onig_is_in_code_range_array() to enc/unicode.c.
2004/11/12: [impl] fix CRWord in enc/unicode.c and MBWord in enc/utf8.c.
2004/11/11: [bug] fix enc/utf8.c.
size 0 array initializer was compile error in VC++.
2004/11/09: [inst] (thanks Hiroki YAGITA)
change installed file mode to 0644.
2004/11/09: [bug] (thanks UK-taniyama)
wrong definitions GET_RELADDR_INC(), GET_ABSADDR_INC()
etc... (NOT PLATFORM_UNALIGNED_WORD_ACCESS)
2004/11/09: [impl] type cast in regexec() for remove compile time warning.
(WIN32, regposix.c)
2004/11/08: [spec] fix Unicode character types.
0x00ad (soft hyphen) should be [:cntrl:] and [:space:] type.
[0x0009..0x000d], 0x0085 should be [:print:] type.
0x00ad should not be [:punct:] type.
2004/11/08: [inst] fix Makefile.in. (for make ctest/ptest/testcu)
2004/11/06: [impl] (thanks Kazuo Saito)
too many alternatives pattern causes core dump.
change implementation of onig_node_free().
2004/11/05: [spec] rename ONIGERR_END_PATTERN_AT_BACKSLASH to
ONIGERR_END_PATTERN_AT_ESCAPE.
2004/11/05: [impl] (thanks matz)
escape compile time warnings for x86-64 Linux.
StackIndex type int -> long
2004/11/05: [memo] (thanks Kazuo Saito)
Oniguruma 3.4.0 was merged to Ruby 1.9.0.
2004/10/30: Version 3.4.0
2004/10/30: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
2004/10/30: [new] add hexadecimal digit char type. (\h, \H)
syntax: ONIG_SYN_OP2_ESC_H_XDIGIT
2004/10/30: [bug] (thanks Guy Decoux)
reluctant infinite repeat bug.
ex. /^[a-z]{2,}?$/.match("aaa") fail.
fix OP_REPEAT_INC_NG process in match_at().
2004/10/18: Version 3.3.1
2004/10/18: [test] success in ruby 1.9.0 (2004-09-24) [i686-linux].
2004/10/18: [impl] (thanks Imai Yasumasa)
enclose #include <sys/types.h> by #ifndef __BORLANDC__.
2004/10/18: [bug] (thanks Imai Yasumasa)
memory acess violation in select_opt_exact_info().
2004/09/25: [dist] fix doc/API and doc/API.ja.
2004/09/25: [bug] fix OP_SEMI_END_BUF process in match_at() for
the case USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
is not defined.
2004/09/17: Version 3.3.0
2004/09/17: [dist] add COPYING to program source files.
2004/09/17: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
2004/09/17: [bug] (thanks Isao Sonobe)
memory access violations in xxx_mbc_enc_len(),
and xxx_mbc_to_normalize() and
xxx_left_adjust_char_head().
add string range check in match_at() and onig_search().
2004/09/08: [dist] change mail address format.(kosako AT sofnec ...)
2004/09/04: Version 3.2.9
2004/09/04: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
2004/09/04: [bug] (thanks Bob Kerstetter and Richard Koch)
search fail in ignore case mode.
fix str_lower_case_match().
2004/09/04: [inst] (thanks Isao Sonobe)
clear sample directory in 'make clean'.
2004/09/04: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
meanings in XXXXX_mbc_to_normalize() and
XXXXX_is_mbc_ambiguous().
2004/08/28: [bug] fix ONIGENC_AMBIGUOUS_MATCH_COMPOUND/ASCII/NONASCII
meanings in iso_8859_XX_mbc_to_normalize() and
iso_8859_XX_is_mbc_ambiguous().
2004/08/24: Version 3.2.8
2004/08/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
2004/08/24: [spec] add ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY.
/a{n}?/ == /(?:a{n})?/
2004/08/24: [dist] fix doc/RE and doc/RE.ja.
2004/08/24: [bug] (thanks starfish)
memory leak in set_optimize_exact_info().
2004/08/21: Version 3.2.7
2004/08/21: [test] success in ruby 1.8.2 (2004-07-28) [i686-linux].
(1.8.2 preview2)
2004/08/21: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
2004/08/21: [bug] (thanks Isao Sonobe) (thanks kage)
memory access violation in bm_search_notrev().
(forgotten to merge from 2.X)
2004/07/24: Version 3.2.6
2004/07/24: [test] success in ruby 1.9.0 (2004-07-23) [i686-linux].
2004/07/24: [test] success in ruby 1.8.2 (2004-07-16) [i686-linux].
2004/07/24: [bug] fix warnings for regexec.c. (gcc 2.91.66)
2004/07/24: [memo] change version control system from Subversion
to CVS 1.11.17.
2004/07/20: [bug] (thanks Isao Sonobe)
illegal result in negative character class in ignore case
mode. fix pair-ambig-codes process in parse_exp().
ex. /[^a]/i.match("A")
2004/07/20: [bug] (thanks Isao Sonobe)
undefined bytecode error happens in UTF-16BE etc..
compile_length_cclass_node() was not consistent with
compile_cclass_node().
2004/07/01: Version 3.2.5
2004/07/01: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
2004/07/01: [new] add onig_get_syntax_{op,op2,behavior,options}.
2004/07/01: [bug] (thanks Isao Sonobe)
invalid result in onig_capture_tree_traverse().
fix make_capture_history_tree().
2004/06/29: Version 3.2.4
2004/06/29: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
2004/06/29: [new] (thanks Isao Sonobe)
add onig_number_of_captures().
2004/06/25: Version 3.2.3
2004/06/25: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
2004/06/25: [bug] (thanks Isao Sonobe)
invalid result in onig_capture_tree_traverse().
fix make_capture_history_tree().
2004/06/24: Version 3.2.2
2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/06/24: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/06/24: [test] success in ruby 1.8.2 (2004-06-23) [i686-linux].
2004/06/24: [new] (thanks Isao Sonobe)
add onig_number_of_capture_histories().
2004/06/24: [bug] (thanks Isao Sonobe)
invalid char position match in UTF-16 and UTF-32.
add onigenc_always_false_is_allowed_reverse_match().
2004/06/17: Version 3.2.1
2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/06/17: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/06/17: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
2004/06/17: [impl] should not use OP_REPEAT for (...)? even if target size
is long.
2004/06/17: [bug] (thanks nobu) [ruby-dev:23703]
should use STACK_AT() instead of stkp in OP_REPEAT_INC.
add IN_VAR_REPEAT flag in setup_tree().
2004/06/16: [impl] change select_opt_exact_info() to use ByteValTable[].
2004/06/16: [impl] change map_position_value() table values.
2004/06/14: [impl] (thanks John Carter)
RelAddrType, AbsAddrType and LengthType change
from short int to int type for the very long string match.
2004/06/14: [bug] (thanks Greg A. Woods)
fix nmatch argument of regexec() is smaller than
reg->num_mem + 1 case. (POSIX API)
2004/06/14: [spec] (thanks Greg A. Woods)
set pmatch to NULL if nmatch is 0 in regexec(). (POSIX API)
2004/06/10: Version 3.2.0
2004/06/10: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/06/10: [test] success in ruby 1.9.0 (2004-05-27) [i386-mswin32].
2004/06/10: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
2004/06/10: [dist] add README.ja.
2004/06/10: [new] add onig_copy_encoding().
2004/06/10: [API] add encoding argument to onig_set_meta_char().
add meta_char_table member to OnigEncodingType.
2004/06/08: [dist] add doc/API.ja.
2004/06/07: [API] add num_of_elements member to OnigCompileInfo.
2004/05/29: [memo] (thanks Kazuo Saito)
Oniguruma 3.1.0 was merged to Ruby 1.9.0.
2004/05/26: [impl] rename NST_SIMPLE_REPEAT to NST_STOP_BT_SIMPLE_REPEAT.
2004/05/26: [impl] doesn't need to check that target's simple repeat-ness
for EFFECT_MEMORY type node in setup_tree().
2004/05/25: Version 3.1.0
2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/05/25: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/05/25: [test] success in ruby 1.9.0 (2004-05-23) [i686-linux].
2004/05/25: [test] success in ruby 1.8.2 (2004-05-18) [i686-linux].
2004/05/25: [bug] (thanks Masahiro Sakai) [ruby-dev:23560]
ruby -ruri -ve 'URI::ABS_URI =~
"http://example.org/Andr\xC3\xA9"'
nested STK_REPEAT type stack can't backtrack repeat_stk[].
add OP_REPEAT_INC_SG and OP_REPEAT_INC_NG_SG.
2004/05/25: [new] support UTF-32LE. (ONIG_ENCODING_UTF32_LE)
2004/05/25: [new] support UTF-32BE. (ONIG_ENCODING_UTF32_BE)
2004/05/24: [impl] divide enc/utf16.c to utf16_be.c and utf16_le.c.
2004/05/24: [impl] add enc/unicode.c.
2004/05/24: [API] change calling sequences of onig_new_deluxe() and
onig_recompile_deluxe().
define OnigCompileInfo type.
2004/05/21: [impl] perform ensure process for rb_trap_exec() in match_at().
add onig_exec_trap() and CHECK_INTERRUPT_IN_MATCH_AT.
2004/05/21: [impl] add regex status check to onig_match().
2004/05/21: [new] add onig_get_capture_tree() and
onig_capture_tree_traverse().
2004/05/20: [spec] (thanks Isao Sonobe)
capture history return capture data tree.
(see sample/listcap.c)
2004/05/19: [bug] (thanks Simon Strandgaard)
Control-C does not work in matching process on Ruby.
add calling of CHECK_INTERRUPT into match_at().
ex. /<(?:[^">]+|"[^"]*")+>/.match('<META http-equiv= \
"Content-Type content="text/html; charset=iso-8859-1">')
2004/05/19: [bug] (thanks Simon Strandgaard)
define virtual codepoint values for invalid encoding
byte 0xfe and 0xff in UTF-8.
ex. /\w+/u.match("%a\xffb\xfec%") ==> "a"
2004/05/19: [spec] (thanks Simon Strandgaard)
too big backref number should be treated as a sequence of
an octal char and number digits.
ex. /b\3777\c/.match("b\3777\c")
2004/05/17: [spec] rename encoding names "UTF-16 BE" and "UTF-16 LE"
to "UTF-16BE" and "UTF-16LE".
2004/05/17: [impl] move ismbchar() and mbclen() from oniguruma.h to oniggnu.h.
2004/05/17: [impl] rename onigenc_single_byte_is_allowed_reverse_match() to
onigenc_always_true_is_allowed_reverse_match().
2004/05/14: Version 3.0.0
2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/05/14: [test] success in ruby 1.9.0 (2004-05-14) [i686-linux].
2004/05/14: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
(* need to edit parse.y:
register int c; ---> int c; in yylex())
2004/05/14: [impl] add regext.c.
2004/05/14: [spec] KOI8 is not included in library archive by default setup.
2004/05/14: [impl] implementation changes are completed for all encoding files.
2004/05/12: [impl] add divide_ambig_string_node().
ambiguous string is divided and normalized before
optimization and compilation process.
2004/05/11: [dist] remove INSTALL-RUBY from distribution.
2004/04/28: [memo] (thanks Kazuo Saito)
Oniguruma 2.2.8 was merged to Ruby 1.9.0.
2004/04/26: [spec] change value DEFAULT_MATCH_STACK_LIMIT_SIZE = 0 : unlimited
2004/04/26: [new] add onig_get_match_stack_limit_size() and
onig_set_match_stack_limit_size().
2004/04/26: [bug] add error check to re.c.181.patch and re.c.168.patch.
2004/04/23: [impl] remove ctype_support_level from OnigEncodingType.
2004/04/22: [spec] allow the range from single byte char to multibyte char in
character class for implementation reason.
ex. /[a-\xbb\xcc]/ in EUC-JP encoding.
2004/04/21: [impl] remove max_enc_len_by_first_byte() from OnigEncodingType.
2004/04/20: [new] add onig_copyright().
2004/04/20: [impl] add regversion.c.
2004/04/15: [new] add onig_get_ambig_flag().
2004/04/14: [bug] (thanks Isao Sonobe)
undefined bytecode error happens if ONIG_OPTION_FIND_LONGEST
is setted.
should finish matching process if find-condition
is fail at OP_END in match_at().
2004/04/12: [impl] add ambig_flag to regex_t.
2004/04/09: [impl] move onig_set_meta_char() to regsyntax.c.
2004/04/09: [bug] (thanks HIROSE Masaaki) fix onig_version().
2004/04/08: [impl] add regsyntax.c.
2004/04/07: [new] support UTF-16 LE. (ONIG_ENCODING_UTF16_LE)
2004/04/05: [impl] add ONIGENC_CTYPE_NEWLINE.
2004/04/05: [memo] (thanks Kazuo Saito)
Oniguruma 2.2.6 was merged to Ruby 1.9.0.
2004/04/02: [memo] Version 2.2.6 was released.
2004/03/26: [new] support UTF-16 BE. (ONIG_ENCODING_UTF16_BE)
2004/03/25: [spec] support non 8-bit encodings.
2004/03/16: [memo] 2.X branch for 8-bit encodings only.
2004/03/16: Version 2.2.5
2004/03/16: [test] success in ruby 1.8.0 (2003-08-08) [i386-mswin32].
2004/03/16: [test] success in ruby 1.9.0 (2004-02-24) [i686-linux].
2004/03/16: [impl] add property name to error message of
ONIGERR_INVALID_CHAR_PROPERTY_NAME.
2004/03/16: [spec] allow prefix 'Is' for \p{...} in ONIG_SYNTAX_PERL.
add syntax op. ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS.
2004/03/15: [dist] add sample/syntax.c.
2004/03/15: [spec] support NOT op. in char property. \p{^...}, \P{^...}.
add syntax op. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT.
2004/03/15: [spec] rename ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY to
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY.
2004/03/10: [impl] move ONIGERR_XXX from regenc.h to oniguruma.h,
rename ONIGERR_XXX to ONIGENCERR_XXX in regenc.h.
2004/03/08: [impl] (thanks eban)
replace defined(__CYGWIN__) to defined(__GNUC__).
2004/03/08: [bug] (thanks eban) [ruby-dev:23172]
need to separate initialization for bcc32.
2004/03/06: [memo] (thanks Kazuo Saito)
Oniguruma 2.2.4 was merged to Ruby 1.9.0.
2004/03/05: [API] change second argument type of onig_set_meta_char()
from unsigned int to OnigCodePoint.
2004/03/05: [dist] (thanks Kazuo Saito)
add MANIFEST-RUBY.
2004/03/04: Version 2.2.4
2004/03/04: [impl] (thanks Moriyoshi Koizumi)
fix many warning in Win32 VC++ with /W3 option.
fix many warnings in Win32 VC++ with /W3 option.
2004/03/02: Version 2.2.3
2004/03/02: [bug] (thanks Isao Sonobe)
return invalid capture region value if capture history
is used. (OP_MEMORY_END_PUSH_REC bug)
ex. /\\g<p>(?@<p>\\(\\g<s>\\)){0}(?<s>(?:\\g<p>)*|){0}/
ex. /\g<p>(?@<p>\(\g<s>\)){0}(?<s>(?:\g<p>)*|){0}/
.match("((())())")
2004/03/02: [impl] add :nodoc: to onig_stat_print() for RDoc.
2004/03/02: [impl] (thanks Kazuo Saito)
add :nodoc: to onig_stat_print() for RDoc.
2004/03/02: [impl] don't use ONIG_SOURCE_IS_WRAPPED.
2004/02/27: Version 2.2.2
@ -91,14 +552,14 @@ History
(0x80 - 0xff is not ASCII)
2004/01/23: [new] support ISO-8859-2. (ONIG_ENCODING_ISO_8859_2)
2004/01/23: [dist] add enc/isotable.c.
2004/01/22; [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
2004/01/22: [new] support EUC-TW. (ONIG_ENCODING_EUC_TW)
2004/01/22: [bug] definition of GET_ALIGNMENT_PAD_SIZE() and
ALIGNMENT_RIGHT() was wrong.
type casting should be unsigned int, not int.
2004/01/22: [impl] add defined(__x86_64) || defined(__x86_64__)
to unaligned word access condition. (AMD64 ?)
2004/01/21: [dist] rename enc/eucjp.c to enc/euc_jp.c.
2004/01/21; [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
2004/01/21: [new] support EUC-KR. (ONIG_ENCODING_EUC_KR)
2004/01/20: [test] success in ruby 1.8.0 (2003-08-08) [i386-cygwin].
2004/01/20: [dist] change Makefile.in.
2004/01/20: [spec] add \p{...}, \P{...} in char class.
@ -883,5 +1344,16 @@ History
[test: test]
[memo: memo]
--
<branch>
svn mkdir http://localhost/repos/branches -m ""
svn mkdir http://localhost/repos/branches/oniguruma -m ""
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only"
<create tag>
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
<show all tags>
cvs history -T
<add tag>
cvs rtag "VERSION_X_X_X" oniguruma

View File

@ -1,7 +1,8 @@
README 2004/02/25
README 2005/02/04
Oniguruma ---- (C) K.Kosako <kosako@sofnec.co.jp>
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
@ -11,45 +12,38 @@ for every regular expression object can be specified.
Supported character encodings:
ASCII, UTF-8,
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, KOI8, KOI8-R,
Shift_JIS, Big5, KOI8-R, KOI8 (*),
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
There are two ways of using of it in this program.
* Built-in regular expression engine of Ruby
* C library (supported APIs: GNU regex, POSIX, Oniguruma native)
* KOI8 is not included in library archive by default setup.
(need to edit Makefile if you want to use it.)
------------------------------------------------------------
Install
(A) Install into Ruby
See INSTALL-RUBY.
(character encodings: ASCII, UTF-8, EUC-JP, Shift_JIS)
(B) Install C library
(B-1) Unix and Cygwin platform
Case 1: Unix and Cygwin platform
1. ./configure
2. make
3. make install
(* uninstall: make uninstall)
library file: libonig.a
* test (ASCII/EUC-JP)
4. make ctest
test (ASCII/EUC-JP)
make ctest
uninstall
make uninstall
(B-2) Win32 platform (VC++)
Case 2: Win32 platform (VC++)
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
@ -77,6 +71,16 @@ Regular Expressions
See doc/RE (or doc/RE.ja for Japanese).
Usage
Include oniguruma.h in your program. (native API)
See doc/API for native API.
If you want to use static link library(onig_s.lib) in Win32,
add option -DONIG_EXTERN=extern to C compiler.
Sample Programs
sample/simple.c example of the minimum (native API)
@ -86,14 +90,12 @@ Sample Programs
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
sample/syntax.c Perl and Java syntax test.
Source Files
oniguruma.h Oniguruma API header file. (public)
oniggnu.h GNU regex API header file. (public)
onigcmpt200.h Oniguruma API backward compatibility header file. (public)
(for 2.0.0 or more older version)
regenc.h character encodings framework header file.
regint.h internal definitions
@ -101,17 +103,31 @@ Source Files
regcomp.c compiling and optimization functions
regenc.c character encodings framework.
regerror.c error message function
regex.c source files wrapper for Ruby
regext.c extended API functions. (deluxe version API)
regexec.c search and match functions
regparse.c parsing functions.
regsyntax.c pattern syntax functions and built-in syntax definitions.
regtrav.c capture history tree data traverse functions.
regversion.c version info function.
st.h hash table functions header file
st.c hash table functions
oniggnu.h GNU regex API header file. (public)
reggnu.c GNU regex API functions
onigposix.h POSIX API header file. (public)
regposerr.c POSIX error message function.
regposix.c POSIX functions.
regposix.c POSIX API functions.
enc/mktable.c character type table generator.
enc/ascii.c ASCII encoding.
enc/euc_jp.c EUC-JP encoding.
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
enc/big5.c Big5 encoding.
enc/koi8.c KOI8 encoding.
enc/koi8_r.c KOI8-R encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
@ -128,18 +144,19 @@ Source Files
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
enc/iso8859_16.c ISO-8859-16 encoding.
(Latin-10 or South-Eastern European with Euro)
enc/utf8.c UTF-8 encoding.
enc/euc_jp.c EUC-JP encoding.
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
enc/koi8.c KOI8 encoding.
enc/koi8_r.c KOI8-R encoding.
enc/big5.c Big5 encoding.
enc/utf8.c UTF-8 encoding.
enc/utf16_be.c UTF-16BE encoding.
enc/utf16_le.c UTF-16LE encoding.
enc/utf32_be.c UTF-32BE encoding.
enc/utf32_le.c UTF-32LE encoding.
enc/unicode.c Unicode information data.
win32/Makefile Makefile for Win32 (VC++)
win32/config.h config.h for Win32
API differences with Japanized GNU regex(version 0.12) of Ruby
API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
+ re_compile_fastmap() is removed.
+ re_recompile_pattern() is added.
@ -148,18 +165,17 @@ API differences with Japanized GNU regex(version 0.12) of Ruby
ToDo
1 support 16-bit encodings. (UTF-16)
2 different encoding pattern with target.
(ex. ASCII/UTF-16, UTF-16 BE and UTF-16 LE)
3 add enc/name.c (onigenc_get_enc_by_name(name))
? transmission stopper. (return ONIG_STOP from match_at())
? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
? better acess to hash table (st.c).
non null-terminated key version st_lookup().
? grep-like tool 'onigrep'.
? return parse tree of regexp pattern to application.
?? /a{n}?/ should be interpreted as /(?:a{n})?/.
?? \h hexadecimal digit char ([0-9a-fA-F]), \H not \h.
? ignore case in full code point range of Unicode.
? Unicode Property.
? ambig-flag Katakana <-> Hiragana.
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
? add ONIG_SYNTAX_ASIS.
?? \X (== \PM\pM*)
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
?? variable line separator.
?? transmission stopper. (return ONIG_STOP from match_at())
and I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

View File

@ -0,0 +1,177 @@
README.ja 2005/02/04
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
鬼車は正規表現ライブラリである。
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
文字エンコーディングを指定できることである。
サポートしている文字エンコーディング:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, KOI8-R, KOI8 (*),
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* KOI8はデフォルトのセットアップではライブラリの中に含まれない。
(必要であればMakefileを編集すること)
------------------------------------------------------------
インストール
ケース1: UnixとCygwin環境
1. ./configure
2. make
3. make install
ライブラリファイル: libonig.a
動作テスト (ASCII/EUC-JP)
make ctest
アンインストール
make uninstall
ケース2: Win32(VC++)環境
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* 動作テスト (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
ライセンス
このソフトウェアがRubyと一緒に使用または配布される場合には、
Rubyのライセンスに従う。
それ以外の場合には、BSDライセンスに従う。
正規表現
doc/RE.jaを参照
使用方法
使用するプログラムで、oniguruma.hをインクルードする(Native APIの場合)。
Native APIについては、doc/API.jaを参照。
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
使用例プログラム
sample/simple.c 最小例 (native API)
sample/names.c 名前付きグループコールバック使用例
sample/encode.c 幾つかの文字エンコーディング使用例
sample/listcap.c 捕獲履歴機能の使用例
sample/posix.c POSIX API使用例
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
sample/syntax.c PerlとJava文法のテスト
ソースファイル
oniguruma.h 鬼車APIヘッダ (公開)
regenc.h 文字エンコーディング枠組みヘッダ
regint.h 内部宣言
regparse.h regparse.cとregcomp.cのための内部宣言
regcomp.c コンパイル、最適化関数
regenc.c 文字エンコーディング枠組み
regerror.c エラーメッセージ関数
regext.c 拡張API関数
regexec.c 検索、照合関数
regparse.c 正規表現パターン解析関数
regsyntax.c 正規表現パターン文法関数、組込み文法定義
regtrav.c 捕獲履歴木巡回関数
regversion.c 版情報関数
st.h ハッシュテーブル関数宣言
st.c ハッシュテーブル関数
oniggnu.h GNU regex APIヘッダ (公開)
reggnu.c GNU regex API関数
onigposix.h POSIX APIヘッダ (公開)
regposerr.c POSIX APIエラーメッセージ関数
regposix.c POSIX API関数
enc/mktable.c 文字タイプテーブル生成プログラム
enc/ascii.c ASCII エンコーディング
enc/euc_jp.c EUC-JP エンコーディング
enc/euc_tw.c EUC-TW エンコーディング
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
enc/sjis.c Shift_JIS エンコーディング
enc/big5.c Big5 エンコーディング
enc/koi8.c KOI8 エンコーディング
enc/koi8_r.c KOI8-R エンコーディング
enc/iso8859_1.c ISO-8859-1 (Latin-1)
enc/iso8859_2.c ISO-8859-2 (Latin-2)
enc/iso8859_3.c ISO-8859-3 (Latin-3)
enc/iso8859_4.c ISO-8859-4 (Latin-4)
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
enc/iso8859_6.c ISO-8859-6 (Arabic)
enc/iso8859_7.c ISO-8859-7 (Greek)
enc/iso8859_8.c ISO-8859-8 (Hebrew)
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
enc/iso8859_11.c ISO-8859-11 (Thai)
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
enc/iso8859_16.c ISO-8859-16
(Latin-10 または South-Eastern European with Euro)
enc/utf8.c UTF-8 エンコーディング
enc/utf16_be.c UTF-16BE エンコーディング
enc/utf16_le.c UTF-16LE エンコーディング
enc/utf32_be.c UTF-32BE エンコーディング
enc/utf32_le.c UTF-32LE エンコーディング
enc/unicode.c Unicode情報
win32/Makefile Win32用 Makefile (for VC++)
win32/config.h Win32用 config.h
Ruby 1.8/1.6の日本語化GNU regexとのAPIの違い
+ re_compile_fastmap() は削除された。
+ re_recompile_pattern() が追加された。
+ re_alloc_pattern() が追加された。
残件
? Unicode全コードポイント領域での大文字小文字照合
? Unicodeプロパティ
? ambig-flag Katakana <-> Hiragana
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
? ONIG_SYNTAX_ASIS追加
?? \X (== \PM\pM*)
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
?? 改行文字(文字列)を変更できる
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
and I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>

View File

@ -49,6 +49,9 @@
/* Define if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H

View File

@ -1,14 +1,36 @@
/**********************************************************************
ascii.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
@ -17,38 +39,29 @@ ascii_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingASCII = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"US-ASCII", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* min byte length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_to_lower,
onigenc_ascii_mbc_is_case_ambig,
ascii_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
onigenc_ascii_mbc_to_normalize,
onigenc_ascii_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
ascii_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,14 +1,61 @@
/**********************************************************************
big5.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int EncLen_BIG5[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
big5_mbc_enc_len(const UChar* p)
{
return EncLen_BIG5[*p];
}
static OnigCodePoint
big5_mbc_to_code(UChar* p, UChar* end)
big5_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
}
@ -20,15 +67,23 @@ big5_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
big5_mbc_to_lower(UChar* p, UChar* lower)
big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_BIG5, p, lower);
return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
pp, end, lower);
}
static int
big5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
return onigenc_mb2_code_is_ctype(ONIG_ENCODING_BIG5, code, ctype);
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
}
static int
big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
}
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
@ -50,16 +105,16 @@ static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
};
#define BIG5_ISMB_FIRST(byte) (OnigEncodingBIG5.len_table[byte] > 1)
#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
static UChar*
big5_left_adjust_char_head(UChar* start, UChar* s)
big5_left_adjust_char_head(const UChar* start, const UChar* s)
{
UChar *p;
const UChar *p;
int len;
if (s <= start) return s;
if (s <= start) return (UChar* )s;
p = s;
if (BIG5_ISMB_TRAIL(*p)) {
@ -70,53 +125,44 @@ big5_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
len = enc_len(ONIG_ENCODING_BIG5, *p);
if (p + len > s) return p;
len = enc_len(ONIG_ENCODING_BIG5, p);
if (p + len > s) return (UChar* )p;
p += len;
return p + ((s - p) & ~1);
return (UChar* )(p + ((s - p) & ~1));
}
static int
big5_is_allowed_reverse_match(UChar* s, UChar* end)
big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
UChar c = *s;
const UChar c = *s;
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingBIG5 = {
big5_mbc_enc_len,
"Big5", /* name */
2, /* max enc length */
1, /* min enc length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
"Big5", /* name */
2, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
onigenc_mb2_code_to_mbclen,
big5_code_to_mbc,
big5_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
big5_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
big5_mbc_to_normalize,
big5_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
big5_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
big5_is_allowed_reverse_match
};

View File

@ -1,23 +1,69 @@
/**********************************************************************
euc_jp.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
static int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
eucjp_mbc_enc_len(const UChar* p)
{
return EncLen_EUCJP[*p];
}
static OnigCodePoint
eucjp_mbc_to_code(UChar* p, UChar* end)
eucjp_mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(ONIG_ENCODING_EUC_JP, c);
n = c;
len = enc_len(ONIG_ENCODING_EUC_JP, p);
n = (OnigCodePoint )*p++;
if (len == 1) return n;
for (i = 1; i < len; i++) {
@ -31,11 +77,13 @@ eucjp_mbc_to_code(UChar* p, UChar* end)
static int
eucjp_code_to_mbclen(OnigCodePoint code)
{
if ((code & 0xff0000) != 0) return 3;
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
else if ((code & 0xff0000) != 0) return 3;
else if ((code & 0xff00) != 0) return 2;
else return 1;
else return 0;
}
#if 0
static int
eucjp_code_to_mbc_first(OnigCodePoint code)
{
@ -43,27 +91,16 @@ eucjp_code_to_mbc_first(OnigCodePoint code)
if ((code & 0xff0000) != 0) {
first = (code >> 16) & 0xff;
/*
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 3)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
}
else if ((code & 0xff00) != 0) {
first = (code >> 8) & 0xff;
/*
if (enc_len(ONIG_ENCODING_EUC_JP, first) != 2)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
}
else {
/*
if (enc_len(ONIG_ENCODING_EUC_JP, code) != 1)
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
*/
return (int )code;
}
return first;
}
#endif
static int
eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
@ -75,44 +112,57 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(ONIG_ENCODING_EUC_JP, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
eucjp_mbc_to_lower(UChar* p, UChar* lower)
eucjp_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
int len;
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1;
}
else {
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
len = enc_len(ONIG_ENCODING_EUC_JP, p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
}
static int
eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = eucjp_code_to_mbc_first(code);
return (enc_len(ONIG_ENCODING_EUC_JP, first) > 1 ? TRUE : FALSE);
}
else
return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@ -125,28 +175,28 @@ eucjp_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
static UChar*
eucjp_left_adjust_char_head(UChar* start, UChar* s)
eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
/* In this encoding
mb-trail bytes doesn't mix with single bytes.
*/
UChar *p;
const UChar *p;
int len;
if (s <= start) return s;
if (s <= start) return (UChar* )s;
p = s;
while (!eucjp_islead(*p) && p > start) p--;
len = enc_len(ONIG_ENCODING_EUC_JP, *p);
if (p + len > s) return p;
len = enc_len(ONIG_ENCODING_EUC_JP, p);
if (p + len > s) return (UChar* )p;
p += len;
return p + ((s - p) & ~1);
return (UChar* )(p + ((s - p) & ~1));
}
static int
eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
UChar c = *s;
const UChar c = *s;
if (c <= 0x7e || c == 0x8e || c == 0x8f)
return TRUE;
else
@ -154,38 +204,29 @@ eucjp_is_allowed_reverse_match(UChar* s, UChar* end)
}
OnigEncodingType OnigEncodingEUC_JP = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
},
eucjp_mbc_enc_len,
"EUC-JP", /* name */
3, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
3, /* max enc length */
1, /* min enc length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
eucjp_mbc_to_code,
eucjp_code_to_mbclen,
eucjp_code_to_mbc,
eucjp_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
eucjp_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
eucjp_mbc_to_normalize,
eucjp_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
eucjp_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
eucjp_left_adjust_char_head,
eucjp_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
eucjp_is_allowed_reverse_match
};

View File

@ -1,14 +1,61 @@
/**********************************************************************
euc_kr.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int EncLen_EUCKR[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euckr_mbc_enc_len(const UChar* p)
{
return EncLen_EUCKR[*p];
}
static OnigCodePoint
euckr_mbc_to_code(UChar* p, UChar* end)
euckr_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
}
@ -20,117 +67,107 @@ euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
euckr_mbc_to_lower(UChar* p, UChar* lower)
euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_EUC_KR, p, lower);
return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
pp, end, lower);
}
static int
euckr_code_is_ctype(OnigCodePoint code, unsigned int ctype)
euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
return onigenc_mb2_code_is_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
}
static int
euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
}
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
static UChar*
euckr_left_adjust_char_head(UChar* start, UChar* s)
euckr_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
UChar *p;
const UChar *p;
int len;
if (s <= start) return s;
if (s <= start) return (UChar* )s;
p = s;
while (!euckr_islead(*p) && p > start) p--;
len = enc_len(ONIG_ENCODING_EUC_KR, *p);
if (p + len > s) return p;
len = enc_len(ONIG_ENCODING_EUC_KR, p);
if (p + len > s) return (UChar* )p;
p += len;
return p + ((s - p) & ~1);
return (UChar* )(p + ((s - p) & ~1));
}
static int
euckr_is_allowed_reverse_match(UChar* s, UChar* end)
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
UChar c = *s;
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_KR = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
},
euckr_mbc_enc_len,
"EUC-KR", /* name */
2, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
2, /* max enc length */
1, /* min enc length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
euckr_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
euckr_mbc_to_normalize,
euckr_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
euckr_is_allowed_reverse_match
};
/* Same with OnigEncodingEUC_KR except the name */
OnigEncodingType OnigEncodingEUC_CN = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
},
euckr_mbc_enc_len,
"EUC-CN", /* name */
2, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
2, /* max enc length */
1, /* min enc length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
onigenc_mb2_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
euckr_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
euckr_mbc_to_normalize,
euckr_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
euckr_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
euckr_is_allowed_reverse_match
};

View File

@ -1,14 +1,61 @@
/**********************************************************************
euc_tw.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int EncLen_EUCTW[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
};
static int
euctw_mbc_enc_len(const UChar* p)
{
return EncLen_EUCTW[*p];
}
static OnigCodePoint
euctw_mbc_to_code(UChar* p, UChar* end)
euctw_mbc_to_code(const UChar* p, const UChar* end)
{
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
}
@ -20,79 +67,78 @@ euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
}
static int
euctw_mbc_to_lower(UChar* p, UChar* lower)
euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
return onigenc_mbn_mbc_to_lower(ONIG_ENCODING_EUC_TW, p, lower);
return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
pp, end, lower);
}
static int
euctw_code_is_ctype(OnigCodePoint code, unsigned int ctype)
euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
return onigenc_mb4_code_is_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
}
static int
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
}
#define euctw_islead(c) (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
static UChar*
euctw_left_adjust_char_head(UChar* start, UChar* s)
euctw_left_adjust_char_head(const UChar* start, const UChar* s)
{
/* Assumed in this encoding,
mb-trail bytes don't mix with single bytes.
*/
UChar *p;
const UChar *p;
int len;
if (s <= start) return s;
if (s <= start) return (UChar* )s;
p = s;
while (!euctw_islead(*p) && p > start) p--;
len = enc_len(ONIG_ENCODING_EUC_TW, *p);
if (p + len > s) return p;
len = enc_len(ONIG_ENCODING_EUC_TW, p);
if (p + len > s) return (UChar* )p;
p += len;
return p + ((s - p) & ~1);
return (UChar* )(p + ((s - p) & ~1));
}
static int
euctw_is_allowed_reverse_match(UChar* s, UChar* end)
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
UChar c = *s;
const UChar c = *s;
if (c <= 0x7e) return TRUE;
else return FALSE;
}
OnigEncodingType OnigEncodingEUC_TW = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
},
euctw_mbc_enc_len,
"EUC-TW", /* name */
4, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
4, /* max enc length */
1, /* min enc length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
euctw_mbc_to_code,
onigenc_mb4_code_to_mbclen,
euctw_code_to_mbc,
euctw_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
euctw_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
euctw_mbc_to_normalize,
euctw_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
euctw_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
euctw_is_allowed_reverse_match
};

View File

@ -1,112 +1,145 @@
/**********************************************************************
iso8859_1.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_1_TO_LOWER_CASE(c) EncISO_8859_1_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
static UChar EncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
static unsigned short EncISO_8859_1_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
0x0050, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x00d0,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_1_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_1_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_1_mbc_is_case_ambig(UChar* p)
iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_1_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
}
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_1_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_1_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
@ -115,38 +148,31 @@ iso_8859_1_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_1 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-1", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_1_mbc_to_lower,
iso_8859_1_mbc_is_case_ambig,
iso_8859_1_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_1_mbc_to_normalize,
iso_8859_1_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_1_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_10.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
@ -47,69 +69,114 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_10_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_10_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_10_mbc_is_case_ambig(UChar* p)
iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_10_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
else if (v != 0) {
return TRUE;
}
}
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_10_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_10_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
@ -117,39 +184,144 @@ iso_8859_10_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa2, 0xb2 },
{ 0xa3, 0xb3 },
{ 0xa4, 0xb4 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa8, 0xb8 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xb1, 0xa1 },
{ 0xb2, 0xa2 },
{ 0xb3, 0xa3 },
{ 0xb4, 0xa4 },
{ 0xb5, 0xa5 },
{ 0xb6, 0xa6 },
{ 0xb8, 0xa8 },
{ 0xb9, 0xa9 },
{ 0xba, 0xaa },
{ 0xbb, 0xab },
{ 0xbc, 0xac },
{ 0xbe, 0xae },
{ 0xbf, 0xaf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_10 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-10", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_10_mbc_to_lower,
iso_8859_10_mbc_is_case_ambig,
iso_8859_10_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_10_mbc_to_normalize,
iso_8859_10_is_mbc_ambiguous,
iso_8859_10_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_10_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,52 +1,74 @@
/**********************************************************************
iso8859_11.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_11_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
iso_8859_11_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
@ -55,38 +77,29 @@ iso_8859_11_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_11 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-11", /* name */
1, /* max enc length */
1, /* min enc length */
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
"ISO-8859-11", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_to_lower,
onigenc_ascii_mbc_is_case_ambig,
iso_8859_11_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
onigenc_ascii_mbc_to_normalize,
onigenc_ascii_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
iso_8859_11_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_13.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
@ -47,69 +69,114 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_13_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x00d0, 0x0050, 0x0050,
0x0a51, 0x0050, 0x0a51, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0a51,
0x0050, 0x0050, 0x0850, 0x0850, 0x00d0, 0x0871, 0x0050, 0x00d0,
0x0871, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x00d0
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0,
0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0
};
static int
iso_8859_13_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_13_mbc_is_case_ambig(UChar* p)
iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_13_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xb5)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
else if (v != 0) {
return TRUE;
}
}
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_13_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xb5 are lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_13_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
@ -117,39 +184,114 @@ iso_8859_13_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-13", /* name */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
"ISO-8859-13", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_13_mbc_to_lower,
iso_8859_13_mbc_is_case_ambig,
iso_8859_13_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_13_mbc_to_normalize,
iso_8859_13_is_mbc_ambiguous,
iso_8859_13_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_13_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_14.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
@ -47,69 +69,114 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_14_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0871, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x0050,
0x0a51, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x00d0, 0x0050, 0x0a51,
0x0a51, 0x0871, 0x0a51, 0x0871, 0x0a51, 0x0871, 0x0050, 0x0a51,
0x0871, 0x0871, 0x0871, 0x0a51, 0x0871, 0x0a51, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0,
0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2,
0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2,
0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_14_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_14_mbc_is_case_ambig(UChar* p)
iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_14_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
else if (v != 0) {
return TRUE;
}
}
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_14_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_14_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
@ -117,39 +184,144 @@ iso_8859_14_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xa2 },
{ 0xa2, 0xa1 },
{ 0xa4, 0xa5 },
{ 0xa5, 0xa4 },
{ 0xa6, 0xab },
{ 0xa8, 0xb8 },
{ 0xaa, 0xba },
{ 0xab, 0xa6 },
{ 0xac, 0xbc },
{ 0xaf, 0xff },
{ 0xb0, 0xb1 },
{ 0xb1, 0xb0 },
{ 0xb2, 0xb3 },
{ 0xb3, 0xb2 },
{ 0xb4, 0xb5 },
{ 0xb5, 0xb4 },
{ 0xb7, 0xb9 },
{ 0xb8, 0xa8 },
{ 0xb9, 0xb7 },
{ 0xba, 0xaa },
{ 0xbb, 0xbf },
{ 0xbc, 0xac },
{ 0xbd, 0xbe },
{ 0xbe, 0xbd },
{ 0xbf, 0xbb },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde },
{ 0xff, 0xaf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-14", /* name */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
"ISO-8859-14", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_14_mbc_to_lower,
iso_8859_14_mbc_is_case_ambig,
iso_8859_14_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_14_mbc_to_normalize,
iso_8859_14_is_mbc_ambiguous,
iso_8859_14_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_14_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,15 +1,33 @@
/**********************************************************************
iso8859_15.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* reference
http://en.wikipedia.org/wiki/ISO_8859-15
*/
#include "regenc.h"
#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
@ -51,65 +69,114 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_15_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0a51, 0x0050,
0x0871, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
0x0050, 0x0050, 0x0850, 0x0850, 0x0a51, 0x0871, 0x0050, 0x00d0,
0x0871, 0x0850, 0x0871, 0x00d0, 0x0a51, 0x0871, 0x0a51, 0x00d0,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0,
0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0,
0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_15_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_15_mbc_is_case_ambig(UChar* p)
iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_15_CtypeTable[*p]
& (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
}
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_15_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_15_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
@ -117,39 +184,124 @@ iso_8859_15_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa6, 0xa8 },
{ 0xa8, 0xa6 },
{ 0xb4, 0xb8 },
{ 0xb8, 0xb4 },
{ 0xbc, 0xbd },
{ 0xbd, 0xbc },
{ 0xbe, 0xff },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde },
{ 0xff, 0xbe }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_15 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-15", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_15_mbc_to_lower,
iso_8859_15_mbc_is_case_ambig,
iso_8859_15_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_15_mbc_to_normalize,
iso_8859_15_is_mbc_ambiguous,
iso_8859_15_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_15_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_16.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
@ -47,69 +69,114 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_16_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0871, 0x0a51, 0x0050, 0x00d0, 0x0a51, 0x0050,
0x0871, 0x0050, 0x0a51, 0x00d0, 0x0a51, 0x00d0, 0x0871, 0x0a51,
0x0050, 0x0050, 0x0a51, 0x0871, 0x0a51, 0x00d0, 0x0050, 0x00d0,
0x0871, 0x0871, 0x0871, 0x00d0, 0x0a51, 0x0871, 0x0a51, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0,
0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2,
0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0,
0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_16_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_16_mbc_is_case_ambig(UChar* p)
iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_16_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
else if (v != 0) {
return TRUE;
}
}
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_16_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_16_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
@ -117,39 +184,138 @@ iso_8859_16_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xa2 },
{ 0xa2, 0xa1 },
{ 0xa3, 0xb3 },
{ 0xa6, 0xa8 },
{ 0xa8, 0xa6 },
{ 0xaa, 0xba },
{ 0xac, 0xae },
{ 0xae, 0xac },
{ 0xaf, 0xbf },
{ 0xb2, 0xb9 },
{ 0xb3, 0xa3 },
{ 0xb4, 0xb8 },
{ 0xb8, 0xb4 },
{ 0xb9, 0xb2 },
{ 0xba, 0xaa },
{ 0xbc, 0xbd },
{ 0xbd, 0xbc },
{ 0xbe, 0xff },
{ 0xbf, 0xaf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde },
{ 0xff, 0xbe }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_single_byte_mbc_enc_len,
"ISO-8859-16", /* name */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
"ISO-8859-16", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_16_mbc_to_lower,
iso_8859_16_mbc_is_case_ambig,
iso_8859_16_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_16_mbc_to_normalize,
iso_8859_16_is_mbc_ambiguous,
iso_8859_16_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_16_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_2.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
@ -47,66 +69,218 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_2_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0050, 0x0a51, 0x0050, 0x0a51, 0x0a51, 0x0050,
0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
0x0050, 0x0871, 0x0050, 0x0871, 0x0050, 0x0871, 0x0871, 0x0050,
0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
iso_8859_2_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_2_mbc_is_case_ambig(UChar* p)
iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_2_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
}
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_2_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_2_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xaf, 0xbf },
{ 0xb1, 0xa1 },
{ 0xb3, 0xa3 },
{ 0xb5, 0xa5 },
{ 0xb6, 0xa6 },
{ 0xb9, 0xa9 },
{ 0xba, 0xaa },
{ 0xbb, 0xab },
{ 0xbc, 0xac },
{ 0xbe, 0xae },
{ 0xbf, 0xaf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
static int
iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
@ -115,38 +289,31 @@ iso_8859_2_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_2 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-2", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_2_mbc_to_lower,
iso_8859_2_mbc_is_case_ambig,
iso_8859_2_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_2_mbc_to_normalize,
iso_8859_2_is_mbc_ambiguous,
iso_8859_2_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_2_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_3.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
@ -47,66 +69,114 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_3_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0050, 0x0050, 0x0050, 0x0000, 0x0a51, 0x0050,
0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0000, 0x0a51,
0x0050, 0x0871, 0x0850, 0x0850, 0x0050, 0x0871, 0x0871, 0x00d0,
0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0850, 0x0000, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0000, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0000, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0,
0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2,
0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0,
0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
iso_8859_3_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_3_mbc_is_case_ambig(UChar* p)
iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_3_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xb5)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
}
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_3_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xb5)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_3_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
@ -114,39 +184,125 @@ iso_8859_3_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xaf, 0xbf },
{ 0xb1, 0xa1 },
{ 0xb6, 0xa6 },
{ 0xb9, 0xa9 },
{ 0xba, 0xaa },
{ 0xbb, 0xab },
{ 0xbc, 0xac },
{ 0xbf, 0xaf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_3 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-3", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_3_mbc_to_lower,
iso_8859_3_mbc_is_case_ambig,
iso_8859_3_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_3_mbc_to_normalize,
iso_8859_3_is_mbc_ambiguous,
iso_8859_3_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_3_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_4.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
@ -47,69 +69,114 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_4_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0871, 0x0a51, 0x0050, 0x0a51, 0x0a51, 0x0050,
0x0050, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0050,
0x0050, 0x0871, 0x0050, 0x0871, 0x0050, 0x0871, 0x0871, 0x0050,
0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0a51, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0,
0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
};
static int
iso_8859_4_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_4_mbc_is_case_ambig(UChar* p)
iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_4_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf is lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xa2)
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
else if (v != 0) {
return TRUE;
}
}
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_4_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || *p == 0xa2)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_4_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
@ -117,39 +184,134 @@ iso_8859_4_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
{ 0xaa, 0xba },
{ 0xab, 0xbb },
{ 0xac, 0xbc },
{ 0xae, 0xbe },
{ 0xb1, 0xa1 },
{ 0xb3, 0xa3 },
{ 0xb5, 0xa5 },
{ 0xb6, 0xa6 },
{ 0xb9, 0xa9 },
{ 0xba, 0xaa },
{ 0xbb, 0xab },
{ 0xbc, 0xac },
{ 0xbe, 0xae },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_4 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-4", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_4_mbc_to_lower,
iso_8859_4_mbc_is_case_ambig,
iso_8859_4_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_4_mbc_to_normalize,
iso_8859_4_is_mbc_ambiguous,
iso_8859_4_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_4_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_5.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
@ -47,58 +69,80 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_5_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0050, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2
};
static int
iso_8859_5_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_5_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_5_mbc_is_case_ambig(UChar* p)
iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_5_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_5_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
@ -106,39 +150,147 @@ iso_8859_5_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xf1 },
{ 0xa2, 0xf2 },
{ 0xa3, 0xf3 },
{ 0xa4, 0xf4 },
{ 0xa5, 0xf5 },
{ 0xa6, 0xf6 },
{ 0xa7, 0xf7 },
{ 0xa8, 0xf8 },
{ 0xa9, 0xf9 },
{ 0xaa, 0xfa },
{ 0xab, 0xfb },
{ 0xac, 0xfc },
{ 0xae, 0xfe },
{ 0xaf, 0xff },
{ 0xb0, 0xd0 },
{ 0xb1, 0xd1 },
{ 0xb2, 0xd2 },
{ 0xb3, 0xd3 },
{ 0xb4, 0xd4 },
{ 0xb5, 0xd5 },
{ 0xb6, 0xd6 },
{ 0xb7, 0xd7 },
{ 0xb8, 0xd8 },
{ 0xb9, 0xd9 },
{ 0xba, 0xda },
{ 0xbb, 0xdb },
{ 0xbc, 0xdc },
{ 0xbd, 0xdd },
{ 0xbe, 0xdf },
{ 0xbf, 0xdf },
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xb0 },
{ 0xd1, 0xb1 },
{ 0xd2, 0xb2 },
{ 0xd3, 0xb3 },
{ 0xd4, 0xb4 },
{ 0xd5, 0xb5 },
{ 0xd6, 0xb6 },
{ 0xd7, 0xb7 },
{ 0xd8, 0xb8 },
{ 0xd9, 0xb9 },
{ 0xda, 0xba },
{ 0xdb, 0xbb },
{ 0xdc, 0xbc },
{ 0xdd, 0xbd },
{ 0xde, 0xbe },
{ 0xdf, 0xbf },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf1, 0xa1 },
{ 0xf2, 0xa2 },
{ 0xf3, 0xa3 },
{ 0xf4, 0xa4 },
{ 0xf5, 0xa5 },
{ 0xf6, 0xa6 },
{ 0xf7, 0xa7 },
{ 0xf8, 0xa8 },
{ 0xf9, 0xa9 },
{ 0xfa, 0xaa },
{ 0xfb, 0xab },
{ 0xfc, 0xac },
{ 0xfe, 0xae },
{ 0xff, 0xaf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_5 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-5", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_5_mbc_to_lower,
iso_8859_5_mbc_is_case_ambig,
iso_8859_5_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_5_mbc_to_normalize,
iso_8859_5_is_mbc_ambiguous,
iso_8859_5_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
iso_8859_5_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,52 +1,74 @@
/**********************************************************************
iso8859_6.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_6_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0000, 0x0000, 0x0000, 0x0050, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x00d0, 0x00d0, 0x0000, 0x0000,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x00d0, 0x0000, 0x0000, 0x0000, 0x00d0,
0x0000, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
iso_8859_6_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
@ -55,38 +77,29 @@ iso_8859_6_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_6 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-6", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_to_lower,
onigenc_ascii_mbc_is_case_ambig,
iso_8859_6_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
onigenc_ascii_mbc_to_normalize,
onigenc_ascii_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
iso_8859_6_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_7.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
@ -47,65 +69,87 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_7_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x00d0, 0x00d0, 0x0050, 0x0000, 0x0000, 0x0050, 0x0050,
0x0050, 0x0050, 0x0000, 0x00d0, 0x0050, 0x00d0, 0x0000, 0x00d0,
0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0050, 0x0a51, 0x00d0,
0x0a51, 0x0a51, 0x0a51, 0x00d0, 0x0a51, 0x0850, 0x0a51, 0x0a51,
0x0871, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0000, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0000
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0,
0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2,
0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000
};
static int
iso_8859_7_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_7_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_7_mbc_is_case_ambig(UChar* p)
iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_7_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
if (*p == 0xc0 || *p == 0xe0)
return FALSE;
else
return TRUE;
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_7_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
if (*p == 0xc0 || *p == 0xe0)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return (v != 0 ? TRUE : FALSE);
return FALSE;
}
static int
iso_8859_7_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
@ -113,39 +157,122 @@ iso_8859_7_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xb6, 0xdc },
{ 0xb8, 0xdd },
{ 0xb9, 0xde },
{ 0xba, 0xdf },
{ 0xbc, 0xfc },
{ 0xbe, 0xfd },
{ 0xbf, 0xfe },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xb6 },
{ 0xdd, 0xb8 },
{ 0xde, 0xb9 },
{ 0xdf, 0xba },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xbc },
{ 0xfd, 0xbe },
{ 0xfe, 0xbf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_7 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-7", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_7_mbc_to_lower,
iso_8859_7_mbc_is_case_ambig,
iso_8859_7_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_7_mbc_to_normalize,
iso_8859_7_is_mbc_ambiguous,
iso_8859_7_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
iso_8859_7_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,52 +1,74 @@
/**********************************************************************
iso8859_8.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
static unsigned short EncISO_8859_8_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0000, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
0x0050, 0x0850, 0x0050, 0x00d0, 0x0850, 0x0850, 0x0850, 0x0000,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d0,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851, 0x0851,
0x0851, 0x0851, 0x0851, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
static int
iso_8859_8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
@ -55,38 +77,29 @@ iso_8859_8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
OnigEncodingType OnigEncodingISO_8859_8 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-8", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
onigenc_ascii_mbc_to_lower,
onigenc_ascii_mbc_is_case_ambig,
iso_8859_8_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
onigenc_ascii_mbc_to_normalize,
onigenc_ascii_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
iso_8859_8_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
iso8859_9.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
@ -47,66 +69,114 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
};
static unsigned short EncISO_8859_9_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x00d0, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0871, 0x00d0, 0x0050, 0x00d0, 0x0050, 0x0050,
0x0050, 0x0050, 0x0850, 0x0850, 0x0050, 0x0871, 0x0050, 0x00d0,
0x0050, 0x0850, 0x0871, 0x00d0, 0x0850, 0x0850, 0x0850, 0x00d0,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0050,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
static int
iso_8859_9_mbc_to_lower(UChar* p, UChar* lower)
iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
*lower = 0xdf;
(*pp) += 2;
return 1;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
iso_8859_9_mbc_is_case_ambig(UChar* p)
iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
int v = (EncISO_8859_9_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
return FALSE;
else
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 1) {
if ((*p == 's' && *(p+1) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+1) == 'S'))) {
(*pp) += 2;
return TRUE;
}
}
if (*p == 0xdf) {
(*pp)++;
return TRUE;
}
}
return (v != 0 ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncISO_8859_9_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xdf etc.. are lower case letter, but can't convert. */
if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
iso_8859_9_code_is_ctype(OnigCodePoint code, unsigned int ctype)
iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
@ -114,39 +184,114 @@ iso_8859_9_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingISO_8859_9 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"ISO-8859-9", /* name */
1, /* max byte length */
TRUE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
iso_8859_9_mbc_to_lower,
iso_8859_9_mbc_is_case_ambig,
iso_8859_9_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
iso_8859_9_mbc_to_normalize,
iso_8859_9_is_mbc_ambiguous,
iso_8859_9_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
iso_8859_9_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_get_all_fold_match_code_ss_0xdf,
onigenc_get_fold_match_info_ss_0xdf
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
koi8.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
@ -47,58 +69,79 @@ static UChar EncKOI8_ToLowerCaseTable[256] = {
};
static unsigned short EncKOI8_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004, 0x0004,
0x0142, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
};
static int
koi8_mbc_to_lower(UChar* p, UChar* lower)
koi8_mbc_to_normalize(OnigAmbigType flag,
UChar** pp, UChar* end, UChar* lower)
{
*lower = ENC_KOI8_TO_LOWER_CASE(*p);
UChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_KOI8_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
koi8_mbc_is_case_ambig(UChar* p)
koi8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end)
{
int v = (EncKOI8_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
UChar* p = *pp;
return ((v != 0) ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncKOI8_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
koi8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_CTYPE(code, ctype);
@ -106,39 +149,116 @@ koi8_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingKOI8 = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"KOI8", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_mbc_to_lower,
koi8_mbc_is_case_ambig,
koi8_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
koi8_mbc_to_normalize,
koi8_is_mbc_ambiguous,
koi8_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
koi8_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,10 +1,32 @@
/**********************************************************************
koi8_r.c - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
@ -47,58 +69,78 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = {
};
static unsigned short EncKOI8_R_CtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0142, 0x0050, 0x0050, 0x0850, 0x00d0, 0x0050,
0x0050, 0x0050, 0x0050, 0x0871, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x0a51, 0x0050, 0x0050, 0x0050, 0x0050,
0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050, 0x0050,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871, 0x0871,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51,
0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51, 0x0a51
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
};
static int
koi8_r_mbc_to_lower(UChar* p, UChar* lower)
koi8_r_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
*lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
const UChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1; /* return byte length of converted char to lower */
}
static int
koi8_r_mbc_is_case_ambig(UChar* p)
koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
int v = (EncKOI8_R_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
const UChar* p = *pp;
return ((v != 0) ? TRUE : FALSE);
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
int v = (EncKOI8_R_CtypeTable[*p] &
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
return (v != 0 ? TRUE : FALSE);
}
return FALSE;
}
static int
koi8_r_code_is_ctype(OnigCodePoint code, unsigned int ctype)
koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_KOI8_R_CTYPE(code, ctype);
@ -106,39 +148,116 @@ koi8_r_code_is_ctype(OnigCodePoint code, unsigned int ctype)
return FALSE;
}
static int
koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
static OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd7, 0xf7 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xdf, 0xff },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf7, 0xd7 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfe, 0xde },
{ 0xff, 0xdf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return 52;
}
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
OnigEncodingType OnigEncodingKOI8_R = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
onigenc_single_byte_mbc_enc_len,
"KOI8-R", /* name */
1, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
TRUE, /* is continuous sb mb codepoint */
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
koi8_r_mbc_to_lower,
koi8_r_mbc_is_case_ambig,
koi8_r_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
koi8_r_mbc_to_normalize,
koi8_r_is_mbc_ambiguous,
koi8_r_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
koi8_r_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_single_byte_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
onigenc_always_true_is_allowed_reverse_match
};

View File

@ -1,32 +1,55 @@
/**********************************************************************
mktable.c
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdio.h>
#define NOT_RUBY
#include "regenc.h"
#define ISO_8859_1 0
#define ISO_8859_2 1
#define ISO_8859_3 2
#define ISO_8859_4 3
#define ISO_8859_5 4
#define ISO_8859_6 5
#define ISO_8859_7 6
#define ISO_8859_8 7
#define ISO_8859_9 8
#define ISO_8859_10 9
#define ISO_8859_11 10
#define ISO_8859_13 11
#define ISO_8859_14 12
#define ISO_8859_15 13
#define ISO_8859_16 14
#define KOI8 15
#define KOI8_R 16
#define UNICODE_ISO_8859_1 0
#define ISO_8859_1 1
#define ISO_8859_2 2
#define ISO_8859_3 3
#define ISO_8859_4 4
#define ISO_8859_5 5
#define ISO_8859_6 6
#define ISO_8859_7 7
#define ISO_8859_8 8
#define ISO_8859_9 9
#define ISO_8859_10 10
#define ISO_8859_11 11
#define ISO_8859_13 12
#define ISO_8859_14 13
#define ISO_8859_15 14
#define ISO_8859_16 15
#define KOI8 16
#define KOI8_R 17
typedef struct {
int num;
@ -34,6 +57,7 @@ typedef struct {
} ENC_INFO;
static ENC_INFO Info[] = {
{ UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" },
{ ISO_8859_1, "ISO_8859_1" },
{ ISO_8859_2, "ISO_8859_2" },
{ ISO_8859_3, "ISO_8859_3" },
@ -60,6 +84,7 @@ static int IsAlpha(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@ -232,6 +257,7 @@ static int IsBlank(int enc, int c)
if (c == 0x09 || c == 0x20) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@ -267,6 +293,9 @@ static int IsCntrl(int enc, int c)
if (c >= 0x00 && c <= 0x1F) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
if (c == 0xad) return 1;
/* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@ -286,6 +315,7 @@ static int IsCntrl(int enc, int c)
if (c >= 0x7f && c <= 0x9F) return 1;
break;
case KOI8_R:
if (c == 0x7f) return 1;
break;
@ -308,6 +338,7 @@ static int IsGraph(int enc, int c)
if (c >= 0x21 && c <= 0x7e) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_4:
@ -376,6 +407,7 @@ static int IsLower(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@ -504,6 +536,10 @@ static int IsPrint(int enc, int c)
if (c >= 0x20 && c <= 0x7e) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
if (c >= 0x09 && c <= 0x0d) return 1;
if (c == 0x85) return 1;
/* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_4:
@ -572,11 +608,11 @@ static int IsPrint(int enc, int c)
static int IsPunct(int enc, int c)
{
#ifndef BY_UNICODE_PROPERTY
if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
c == 0x7c || c == 0x7e) return 1;
if (c >= 0x3c && c <= 0x3e) return 1;
#endif
if (enc == UNICODE_ISO_8859_1) {
if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
c == 0x7c || c == 0x7e) return 1;
if (c >= 0x3c && c <= 0x3e) return 1;
}
if (c >= 0x21 && c <= 0x23) return 1;
if (c >= 0x25 && c <= 0x2a) return 1;
@ -592,9 +628,11 @@ static int IsPunct(int enc, int c)
case ISO_8859_1:
case ISO_8859_9:
case ISO_8859_15:
if (c == 0xad) return 1;
/* fall */
case UNICODE_ISO_8859_1:
if (c == 0xa1) return 1;
if (c == 0xab) return 1;
if (c == 0xad) return 1;
if (c == 0xb7) return 1;
if (c == 0xbb) return 1;
if (c == 0xbf) return 1;
@ -675,6 +713,9 @@ static int IsSpace(int enc, int c)
if (c == 0x20) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
if (c == 0x85) return 1;
/* fall */
case ISO_8859_1:
case ISO_8859_2:
case ISO_8859_3:
@ -710,6 +751,7 @@ static int IsUpper(int enc, int c)
if (c >= 0x41 && c <= 0x5a) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c >= 0xc0 && c <= 0xd6) return 1;
@ -844,6 +886,7 @@ static int IsWord(int enc, int c)
if (c >= 0x61 && c <= 0x7a) return 1;
switch (enc) {
case UNICODE_ISO_8859_1:
case ISO_8859_1:
case ISO_8859_9:
if (c == 0xaa) return 1;
@ -1019,6 +1062,12 @@ static int IsAscii(int enc, int c)
return 0;
}
static int IsNewline(int enc, int c)
{
if (c == 0x0a) return 1;
return 0;
}
static int exec(FILE* fp, ENC_INFO* einfo)
{
#define NCOL 8
@ -1032,19 +1081,20 @@ static int exec(FILE* fp, ENC_INFO* einfo)
for (c = 0; c < 256; c++) {
val = 0;
if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
if (IsNewline(enc, c)) val |= ONIGENC_CTYPE_NEWLINE;
if (IsAlpha (enc, c)) val |= ONIGENC_CTYPE_ALPHA;
if (IsBlank (enc, c)) val |= ONIGENC_CTYPE_BLANK;
if (IsCntrl (enc, c)) val |= ONIGENC_CTYPE_CNTRL;
if (IsDigit (enc, c)) val |= ONIGENC_CTYPE_DIGIT;
if (IsGraph (enc, c)) val |= ONIGENC_CTYPE_GRAPH;
if (IsLower (enc, c)) val |= ONIGENC_CTYPE_LOWER;
if (IsPrint (enc, c)) val |= ONIGENC_CTYPE_PRINT;
if (IsPunct (enc, c)) val |= ONIGENC_CTYPE_PUNCT;
if (IsSpace (enc, c)) val |= ONIGENC_CTYPE_SPACE;
if (IsUpper (enc, c)) val |= ONIGENC_CTYPE_UPPER;
if (IsXDigit(enc, c)) val |= ONIGENC_CTYPE_XDIGIT;
if (IsWord (enc, c)) val |= ONIGENC_CTYPE_WORD;
if (IsAscii (enc, c)) val |= ONIGENC_CTYPE_ASCII;
if (c % NCOL == 0) fputs(" ", fp);
fprintf(fp, "0x%04x", val);

View File

@ -1,12 +1,53 @@
/**********************************************************************
sjis.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
};
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -26,17 +67,39 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
#define SJIS_ISMB_FIRST(byte) (OnigEncodingSJIS.len_table[byte] > 1)
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
static int
sjis_mbc_enc_len(const UChar* p)
{
return EncLen_SJIS[*p];
}
extern int
sjis_code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
if (EncLen_SJIS[(int )code] == 1)
return 1;
else
return 0;
}
else if (code <= 0xffff) {
return 2;
}
else
return 0;
}
static OnigCodePoint
sjis_mbc_to_code(UChar* p, UChar* end)
sjis_mbc_to_code(const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
len = enc_len(ONIG_ENCODING_SJIS, p);
c = *p++;
len = enc_len(ONIG_ENCODING_SJIS, c);
n = c;
if (len == 1) return n;
@ -57,43 +120,58 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 0
if (enc_len(ONIG_ENCODING_SJIS, buf[0]) != (p - buf))
if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
return REGERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
static int
sjis_mbc_to_lower(UChar* p, UChar* lower)
sjis_mbc_to_normalize(OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
int len;
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1;
}
else {
len = enc_len(ONIG_ENCODING_SJIS, *p);
int len = enc_len(ONIG_ENCODING_SJIS, p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
}
static int
sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb2_code_to_mbc_first(code);
return (enc_len(ONIG_ENCODING_SJIS, first) > 1 ? TRUE : FALSE);
return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
ctype &= ~ONIGENC_CTYPE_WORD;
@ -107,12 +185,12 @@ sjis_code_is_ctype(OnigCodePoint code, unsigned int ctype)
}
static UChar*
sjis_left_adjust_char_head(UChar* start, UChar* s)
sjis_left_adjust_char_head(const UChar* start, const UChar* s)
{
UChar *p;
const UChar *p;
int len;
if (s <= start) return s;
if (s <= start) return (UChar* )s;
p = s;
if (SJIS_ISMB_TRAIL(*p)) {
@ -123,52 +201,43 @@ sjis_left_adjust_char_head(UChar* start, UChar* s)
}
}
}
len = enc_len(ONIG_ENCODING_SJIS, *p);
if (p + len > s) return p;
len = enc_len(ONIG_ENCODING_SJIS, p);
if (p + len > s) return (UChar* )p;
p += len;
return p + ((s - p) & ~1);
return (UChar* )(p + ((s - p) & ~1));
}
static int
sjis_is_allowed_reverse_match(UChar* s, UChar* end)
sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
UChar c = *s;
const UChar c = *s;
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
}
OnigEncodingType OnigEncodingSJIS = {
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
},
sjis_mbc_enc_len,
"Shift_JIS", /* name */
2, /* max byte length */
FALSE, /* is_fold_match */
ONIGENC_CTYPE_SUPPORT_LEVEL_SB, /* ctype_support_level */
FALSE, /* is continuous sb mb codepoint */
1, /* min byte length */
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
onigenc_is_mbc_newline_0x0a,
sjis_mbc_to_code,
onigenc_mb2_code_to_mbclen,
sjis_code_to_mbclen,
sjis_code_to_mbc,
sjis_mbc_to_lower,
onigenc_mbn_mbc_is_case_ambig,
sjis_code_is_ctype,
onigenc_nothing_get_ctype_code_range,
sjis_mbc_to_normalize,
sjis_is_mbc_ambiguous,
onigenc_ascii_get_all_pair_ambig_codes,
onigenc_nothing_get_all_comp_ambig_codes,
sjis_is_code_ctype,
onigenc_not_support_get_ctype_code_range,
sjis_left_adjust_char_head,
sjis_is_allowed_reverse_match,
onigenc_nothing_get_all_fold_match_code,
onigenc_nothing_get_fold_match_info
sjis_is_allowed_reverse_match
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,253 @@
/**********************************************************************
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
static int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16be_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*p];
}
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
}
return 0;
}
static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end)
{
OnigCodePoint code;
if (UTF16_IS_SURROGATE_FIRST(*p)) {
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ p[3];
}
else {
code = p[0] * 256 + p[1];
}
return code;
}
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = code >> 16;
*p++ = (plane >> 2) + 0xd8;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (high & 0x02) + 0xdc;
*p = (UChar )(code & 0xff);
return 4;
}
else {
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )(code & 0xff);
return 2;
}
}
static int
utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*p == 0) {
p++;
if (end > p + 2 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+1) == 0) {
*lower++ = '\0';
*lower = 0xdf;
(*pp) += 4;
return 2;
}
*lower++ = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 2;
return 2; /* return byte length of converted char to lower */
}
else {
int len;
len = EncLen_UTF16[*p];
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*p];
if (*p == 0) {
int c, v;
p++;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 2 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+1) == 0) {
(*pp) += 2;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
s -= 2;
return (UChar* )s;
}
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
"UTF-16BE", /* name */
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf16be_is_mbc_newline,
utf16be_mbc_to_code,
utf16be_code_to_mbclen,
utf16be_code_to_mbc,
utf16be_mbc_to_normalize,
utf16be_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

View File

@ -0,0 +1,248 @@
/**********************************************************************
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
static int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16le_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*(p+1)];
}
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
}
return 0;
}
static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end)
{
OnigCodePoint code;
UChar c0 = *p;
UChar c1 = *(p+1);
if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ p[2];
}
else {
code = c1 * 256 + p[0];
}
return code;
}
static int
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = code >> 16;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (plane >> 2) + 0xd8;
*p++ = (UChar )(code & 0xff);
*p = (high & 0x02) + 0xdc;
return 4;
}
else {
*p++ = (UChar )(code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
return 2;
}
}
static int
utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+1) == 0) {
if (end > p + 3 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+3) == 0) {
*lower++ = 0xdf;
*lower = '\0';
(*pp) += 4;
return 2;
}
*(lower+1) = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 2;
return 2; /* return byte length of converted char to lower */
}
else {
int len = EncLen_UTF16[*(p+1)];
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*(p+1)];
if (*(p+1) == 0) {
int c, v;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 3 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+3) == 0) {
(*pp) += 2;
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
s -= 2;
return (UChar* )s;
}
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
"UTF-16LE", /* name */
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf16le_is_mbc_newline,
utf16le_mbc_to_code,
utf16le_code_to_mbclen,
utf16le_code_to_mbc,
utf16le_mbc_to_normalize,
utf16le_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

View File

@ -0,0 +1,208 @@
/**********************************************************************
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32be_mbc_enc_len(const UChar* p)
{
return 4;
}
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
}
return 0;
}
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}
static int
utf32be_code_to_mbclen(OnigCodePoint code)
{
return 4;
}
static int
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar )((code & 0xff000000) >>24);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar ) (code & 0xff);
return 4;
}
static int
utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
p += 3;
if (end > p + 4 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
*lower++ = '\0';
*lower++ = '\0';
*lower++ = '\0';
*lower = 0xdf;
(*pp) += 8;
return 4;
}
*lower++ = '\0';
*lower++ = '\0';
*lower++ = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 4;
return 4; /* return byte length of converted char to lower */
}
else {
int len = 4;
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
int c, v;
p += 3;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 4 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
(*pp) += 4;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
"UTF-32BE", /* name */
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf32be_is_mbc_newline,
utf32be_mbc_to_code,
utf32be_code_to_mbclen,
utf32be_code_to_mbc,
utf32be_mbc_to_normalize,
utf32be_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

View File

@ -0,0 +1,206 @@
/**********************************************************************
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32le_mbc_enc_len(const UChar* p)
{
return 4;
}
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
}
return 0;
}
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}
static int
utf32le_code_to_mbclen(OnigCodePoint code)
{
return 4;
}
static int
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar ) (code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff000000) >>24);
return 4;
}
static int
utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
if (end > p + 7 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
*lower++ = 0xdf;
*lower++ = '\0';
*lower++ = '\0';
*lower = '\0';
(*pp) += 8;
return 4;
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower++ = *p;
}
*lower++ = '\0';
*lower++ = '\0';
*lower = '\0';
(*pp) += 4;
return 4; /* return byte length of converted char to lower */
}
else {
int len = 4;
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
int c, v;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 7 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
(*pp) += 4;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
"UTF-32LE", /* name */
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf32le_is_mbc_newline,
utf32le_mbc_to_code,
utf32le_code_to_mbclen,
utf32le_code_to_mbc,
utf32le_mbc_to_normalize,
utf32le_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};

File diff suppressed because it is too large Load Diff

173
ext/mbstring/oniguruma/index.html Executable file
View File

@ -0,0 +1,173 @@
<html>
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
<title>Oniguruma</title>
</head>
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
<h2>Oniguruma</h2>
<p>
2005/02/19 (C) K.Kosako
</p>
<p>
<a href="http://miuras.net/matsushita.html">
<img src="anti_matsushita.PNG" height="46" width="266">
</a>
</p>
<p>
Oniguruma is a regular expressions library.<br>
The characteristics of this library is that different character encoding
<br>for every regular expression object can be specified.
</p>
<dl>
<dt><b>Supported character encodings:</b><br>
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
Shift_JIS, Big5, KOI8-R, KOI8,<br>
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
</p>
</dl>
<p>
<dl>
<font color="orange">
<dt><b>What's new</b>
</font>
<ul>
<li>Character types(\w, \s, \d and POSIX bracket) were supported in full code point range with the Version 4.0.1 of the <a href="http://www.unicode.org/ucd/">Unicode Standard</a>. (since Version 3.5.0)
</ul>
</dl>
<hr>
<dl>
<dt>There are two ways of using of it in this program.
<ul>
<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br>
In Ruby 1.9, Oniguruma is already incorporated by Kazuo Saito.
</ul>
</dl>
<dl>
<dt><b>Platform:</b>
<ul>
<li> Unix (include Mac OS X)
<li> Cygwin
<li> Win32
</ul>
<br>
<dt><b>License:</b><br>
When this software is partly used or it is distributed with Ruby,
this of Ruby follows the license of Ruby.<br>
It follows the BSD license in the case of the one except for it.
</p>
<dt><b>Download:</b>
<ul>
<li> <a href="archive/onigd20050219.tar.gz">Latest release version 3.7.0</a> (2005/02/19) <a href="HISTORY_3X.txt">Change Log</a>
<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04)
<li> <a href="archive/onigd20050119.tar.gz">3.5.4</a> (2005/01/19)
<li> <a href="archive/onigd2_4_1.tar.gz">Latest release version 2.4.1</a> (2005/01/05) <a href="HISTORY_2X.txt">Change Log</a>
<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01)
<li> <a href="archive/onigd2_3_3.tar.gz">2.3.3</a> (2004/10/30)
</ul>
<br>
<font color="red">
* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
</font>
<br>
<br>
<dt><b>Documents:</b> (version 3.7.0)
<ul>
<li> <a href="doc/RE.txt">Regular Expressions</a>
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
<li> <a href="doc/API.txt">Oniguruma API</a>
<a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
</ul>
<br>
<dt><b>Sample Programs:</b>
<ul>
<li><a href="sample/simple.c">example of the minimum</a>
<li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
</ul>
<br>
<dt><b>Links:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page)
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a>
<li> <a href="http://www.artman21.net/">Jedit X</a>
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page)
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page)
<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
<li> <a href="http://www.ruby-lang.org/">Ruby</a>
<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page)
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a>
</ul>
<br>
<dt><b>References:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
<li> <a href="http://www.pcre.org/">PCRE</a>
<!--
<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page)
<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page)
-->
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
</ul>
<br>
<!--
<dt><b>ToDo:</b>
<ul>
<li> support character types for all code point range.
</ul>
-->
</dl>
<p>
and I'm thankful to Akinori MUSHA.
</p>
<!--
<hr>
<font color="red">
2004-06-14<br>
To: "Greg A. Woods"<br>
I can't send mail to you. (rejected)<br>
Please set the nmatch argument of regexec() to 1,
and use Oniguruma 3.6.0 or 2.4.1.<br>
The nmatch argument should be array size of a pmatch.<br>
But I don't know whether this problem is related to the crash
that you reported.
</font>
-->
<hr>
</body>
</html>

View File

@ -1,15 +1,40 @@
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
Copyright (C) 2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGGNU_H
#define ONIGGNU_H
/**********************************************************************
oniggnu.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "oniguruma.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MBCTYPE_ASCII 0
#define MBCTYPE_EUC 1
#define MBCTYPE_SJIS 2
@ -19,14 +44,31 @@
#ifndef RE_NREGS
#define RE_NREGS ONIG_NREGION
#endif
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
#ifdef RUBY_PLATFORM
#ifndef ONIG_RUBY_M17N
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
#undef ismbchar
#define ismbchar(c) (mbclen((c)) != 1)
#define mbclen(c) \
ONIGENC_MBC_ENC_LEN(OnigEncDefaultCharEncoding, (UChar* )(&c))
#endif /* ifndef ONIG_RUBY_M17N */
#define re_mbcinit ruby_re_mbcinit
#define re_compile_pattern ruby_re_compile_pattern
#define re_recompile_pattern ruby_re_recompile_pattern
@ -74,4 +116,8 @@ void re_free_registers P_((struct re_registers*));
ONIG_EXTERN
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
#ifdef __cplusplus
}
#endif
#endif /* ONIGGNU_H */

View File

@ -1,12 +1,38 @@
/**********************************************************************
onigposix.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGPOSIX_H
#define ONIGPOSIX_H
/**********************************************************************
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/* options */
#define REG_ICASE (1<<0)
@ -38,12 +64,12 @@
#define REG_EONIG_THREAD 17
/* character encodings (for reg_set_encoding()) */
#define REG_POSIX_ENCODING_ASCII 0
#define REG_POSIX_ENCODING_EUC_JP 1
#define REG_POSIX_ENCODING_SJIS 2
#define REG_POSIX_ENCODING_UTF8 3
#include <stdlib.h>
#define REG_POSIX_ENCODING_ASCII 0
#define REG_POSIX_ENCODING_EUC_JP 1
#define REG_POSIX_ENCODING_SJIS 2
#define REG_POSIX_ENCODING_UTF8 3
#define REG_POSIX_ENCODING_UTF16_BE 4
#define REG_POSIX_ENCODING_UTF16_LE 5
typedef int regoff_t;
@ -70,7 +96,7 @@ typedef struct {
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
@ -103,7 +129,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regparse.c) */
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@ -119,6 +145,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN const char* onig_version P_((void));
ONIG_EXTERN const char* onig_copyright P_((void));
#endif /* ONIGURUMA_H */
@ -129,8 +158,12 @@ ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t
/* extended API */
ONIG_EXTERN void reg_set_encoding P_((int enc));
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, unsigned char* name, unsigned char* name_end, int** nums));
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
#ifdef __cplusplus
}
#endif
#endif /* ONIGPOSIX_H */

View File

@ -1,19 +1,53 @@
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
/**********************************************************************
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "php_onig_compat.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 2
#define ONIGURUMA_VERSION_MINOR 2
#define ONIGURUMA_VERSION_TEENY 4
#define ONIGURUMA_VERSION_MAJOR 3
#define ONIGURUMA_VERSION_MINOR 7
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
# define HAVE_PROTOTYPES 1
# endif
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@ -32,7 +66,7 @@
#endif
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(EXPORT) || defined(RUBY_EXPORT)
#define ONIG_EXTERN extern __declspec(dllexport)
#else
@ -53,17 +87,60 @@ typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
/* ambiguous match flag */
typedef unsigned int OnigAmbigType;
ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
#define ONIGENC_AMBIGUOUS_MATCH_FULL \
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
/* code range */
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
typedef struct {
int len;
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
} OnigCompAmbigCodeItem;
typedef struct {
int n;
OnigCodePoint code;
OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
} OnigCompAmbigCodes;
typedef struct {
OnigCodePoint from;
OnigCodePoint to;
} OnigCodePointRange;
} OnigPairAmbigCodes;
#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16
typedef struct {
int target_num;
int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE];
} OnigEncFoldMatchInfo;
OnigCodePoint esc;
OnigCodePoint anychar;
OnigCodePoint anytime;
OnigCodePoint zero_or_one_time;
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
#if defined(RUBY_PLATFORM) && defined(M17N_H)
@ -74,23 +151,24 @@ typedef m17n_encoding* OnigEncoding;
#else
typedef struct {
const char len_table[256];
const char* name;
int max_enc_len;
int is_fold_match;
int ctype_support_level; /* sb-only/full */
int is_continuous_sb_mb; /* code point is continuous from sb to mb */
OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end);
int (*mbc_enc_len)(const UChar* p);
const char* name;
int max_enc_len;
int min_enc_len;
OnigAmbigType support_ambig_flag;
OnigMetaCharTableType meta_char_table;
int (*is_mbc_newline)(const UChar* p, const UChar* end);
OnigCodePoint (*mbc_to_code)(const UChar* p, const UChar* end);
int (*code_to_mbclen)(OnigCodePoint code);
int (*code_to_mbc)(OnigCodePoint code, UChar *buf);
int (*mbc_to_lower)(UChar* p, UChar* lower);
int (*mbc_is_case_ambig)(UChar* p);
int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype);
int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
UChar* (*left_adjust_char_head)(UChar* start, UChar* s);
int (*is_allowed_reverse_match)(UChar* p, UChar* e);
int (*get_all_fold_match_code)(OnigCodePoint** codes);
int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info);
int (*mbc_to_normalize)(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* to);
int (*is_mbc_ambiguous)(OnigAmbigType flag, const UChar** pp, const UChar* end);
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
UChar* (*left_adjust_char_head)(const UChar* start, const UChar* p);
int (*is_allowed_reverse_match)(const UChar* p, const UChar* end);
} OnigEncodingType;
typedef OnigEncodingType* OnigEncoding;
@ -112,6 +190,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
@ -138,6 +220,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
@ -153,35 +239,31 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
/* work size */
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN
/* character types */
#define ONIGENC_CTYPE_ALPHA (1<< 0)
#define ONIGENC_CTYPE_BLANK (1<< 1)
#define ONIGENC_CTYPE_CNTRL (1<< 2)
#define ONIGENC_CTYPE_DIGIT (1<< 3)
#define ONIGENC_CTYPE_GRAPH (1<< 4)
#define ONIGENC_CTYPE_LOWER (1<< 5)
#define ONIGENC_CTYPE_PRINT (1<< 6)
#define ONIGENC_CTYPE_PUNCT (1<< 7)
#define ONIGENC_CTYPE_SPACE (1<< 8)
#define ONIGENC_CTYPE_UPPER (1<< 9)
#define ONIGENC_CTYPE_XDIGIT (1<<10)
#define ONIGENC_CTYPE_WORD (1<<11)
#define ONIGENC_CTYPE_ASCII (1<<12)
#define ONIGENC_CTYPE_NEWLINE (1<< 0)
#define ONIGENC_CTYPE_ALPHA (1<< 1)
#define ONIGENC_CTYPE_BLANK (1<< 2)
#define ONIGENC_CTYPE_CNTRL (1<< 3)
#define ONIGENC_CTYPE_DIGIT (1<< 4)
#define ONIGENC_CTYPE_GRAPH (1<< 5)
#define ONIGENC_CTYPE_LOWER (1<< 6)
#define ONIGENC_CTYPE_PRINT (1<< 7)
#define ONIGENC_CTYPE_PUNCT (1<< 8)
#define ONIGENC_CTYPE_SPACE (1<< 9)
#define ONIGENC_CTYPE_UPPER (1<<10)
#define ONIGENC_CTYPE_XDIGIT (1<<11)
#define ONIGENC_CTYPE_WORD (1<<12)
#define ONIGENC_CTYPE_ASCII (1<<13)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
/* ctype support level */
#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0
#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1
#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte)
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1)
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
@ -194,31 +276,33 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#include <ctype.h> /* for isblank(), isgraph() */
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p)
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
onigenc_is_mbc_ambiguous(enc,flag,pp,end)
#define ONIGENC_IS_FOLD_MATCH(enc) FALSE
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
onigenc_is_allowed_reverse_match(enc, s, end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
onigenc_get_left_adjust_char_head(enc, start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b)
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
#define ONIGENC_MBC_MAXLEN_DIST(enc) \
(ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
: ONIG_INFINITE_DISTANCE)
#define ONIGENC_MBC_MINLEN(enc) 1
#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code))
#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf)
#if 0
#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */
#if 0 /* !! not supported !! */
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
#define ONIGENC_STEP_BACK(enc,start,s,n)
#endif
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
@ -253,42 +337,45 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
ONIG_EXTERN
int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN
int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf));
int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* buf));
ONIG_EXTERN
int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p));
int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** pp, const UChar* end));
ONIG_EXTERN
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end));
int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const UChar* s, const UChar* end));
#else /* ONIG_RUBY_M17N */
#define ONIGENC_NAME(enc) ((enc)->name)
#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf)
#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p)
#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match)
#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb)
#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level)
#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
(enc)->mbc_to_normalize(flag,(const UChar** )pp,end,buf)
#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
(enc)->is_mbc_ambiguous(flag,(const UChar** )pp,end)
#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
(enc)->is_allowed_reverse_match(s,end)
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
(enc)->left_adjust_char_head(start, s)
#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \
(enc)->get_all_fold_match_code(codes)
#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \
(enc)->get_fold_match_info(p,end,info)
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
(enc)->get_all_pair_ambig_codes(ambig_flag,acs)
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
(enc)->get_all_comp_ambig_codes(ambig_flag,acs)
#define ONIGENC_STEP_BACK(enc,start,s,n) \
onigenc_step_back((enc),(start),(s),(n))
#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)])
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e))
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype)
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
#define ONIGENC_IS_CODE_PRINT(enc,code) \
@ -316,11 +403,11 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
(enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
(enc)->get_ctype_code_range(ctype,sbr,mbr)
ONIG_EXTERN
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
UChar* onigenc_step_back P_((OnigEncoding enc, const UChar* start, const UChar* s, int n));
#endif /* is not ONIG_RUBY_M17N */
@ -333,15 +420,21 @@ int onigenc_set_default_encoding P_((OnigEncoding enc));
ONIG_EXTERN
OnigEncoding onigenc_get_default_encoding P_(());
ONIG_EXTERN
void onigenc_set_default_caseconv_table P_((UChar* table));
void onigenc_set_default_caseconv_table P_((const UChar* table));
ONIG_EXTERN
UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev));
UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const UChar* start, const UChar* s, const UChar** prev));
ONIG_EXTERN
UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
ONIG_EXTERN
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
ONIG_EXTERN
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s));
UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const UChar* start, const UChar* s));
ONIG_EXTERN
int onigenc_strlen P_((OnigEncoding enc, const UChar* p, const UChar* end));
ONIG_EXTERN
int onigenc_strlen_null P_((OnigEncoding enc, const UChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const UChar* p));
@ -355,13 +448,6 @@ UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UC
/* constants */
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N)
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
#undef ismbchar
#define ismbchar(c) (mbclen((c)) != 1)
#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)])
#endif
typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
@ -403,7 +489,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
/* predefined syntaxes (see regparse.c) */
/* predefined syntaxes (see regsyntax.c) */
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
@ -466,7 +552,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */
#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */
#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */
#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */
@ -479,6 +568,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */
/* syntax (behavior) in char class [...] */
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */
@ -505,7 +595,10 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_NORMAL 0
#define ONIG_MISMATCH -1
#define ONIG_NO_SUPPORT_CONFIG -2
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
#define ONIGERR_PARSER_BUG -11
#define ONIGERR_STACK_BUG -12
#define ONIGERR_UNDEFINED_BYTECODE -13
@ -520,7 +613,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
@ -560,8 +653,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
/* errors related to thread */
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
@ -569,6 +666,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
typedef struct OnigCaptureTreeNodeStruct {
int group; /* group number */
int beg;
int end;
int allocated;
int num_childs;
struct OnigCaptureTreeNodeStruct** childs;
} OnigCaptureTreeNode;
/* match result region type */
struct re_registers {
int allocated;
@ -576,9 +682,16 @@ struct re_registers {
int* beg;
int* end;
/* extended */
struct re_registers** list; /* capture history. list[1]-list[31] */
OnigCaptureTreeNode* history_root; /* capture history tree root */
};
/* capture tree traverse */
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
#define ONIG_REGION_NOTPOS -1
typedef struct re_registers OnigRegion;
@ -593,8 +706,8 @@ typedef struct {
int upper;
} OnigRepeatRange;
typedef void (*OnigWarnFunc) P_((char* s));
extern void onig_null_warn P_((char* s));
typedef void (*OnigWarnFunc) P_((const char* s));
extern void onig_null_warn P_((const char* s));
#define ONIG_NULL_WARN onig_null_warn
#define ONIG_CHAR_TABLE_SIZE 256
@ -629,6 +742,7 @@ typedef struct re_pattern_buffer {
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigAmbigType ambig_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
@ -640,7 +754,7 @@ typedef struct re_pattern_buffer {
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigDistance dmin; /* min-distance of exact or map */
@ -651,6 +765,15 @@ typedef struct re_pattern_buffer {
} regex_t;
typedef struct {
int num_of_elements;
OnigEncoding pattern_enc;
OnigEncoding target_enc;
OnigSyntaxType* syntax;
OnigOptionType option;
OnigAmbigType ambig_flag;
} OnigCompileInfo;
/* Oniguruma Native API */
ONIG_EXTERN
int onig_init P_((void));
@ -661,18 +784,24 @@ void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
int onig_new P_((regex_t**, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_new_deluxe P_((regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
void onig_free P_((regex_t*));
ONIG_EXTERN
int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
int onig_recompile P_((regex_t*, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option));
int onig_recompile_deluxe P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
ONIG_EXTERN
int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option));
int onig_search P_((regex_t*, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
int onig_match P_((regex_t*, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
void onig_region_init P_((OnigRegion* region));
ONIG_EXTERN
void onig_region_free P_((OnigRegion* region, int free_self));
ONIG_EXTERN
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
@ -681,25 +810,44 @@ void onig_region_clear P_((OnigRegion* region));
ONIG_EXTERN
int onig_region_resize P_((OnigRegion* region, int n));
ONIG_EXTERN
int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end,
int** nums));
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
ONIG_EXTERN
int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region));
int onig_name_to_group_numbers P_((regex_t* reg, const UChar* name, const UChar* name_end, int** nums));
ONIG_EXTERN
int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg));
int onig_name_to_backref_number P_((regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region));
ONIG_EXTERN
int onig_foreach_name P_((regex_t* reg, int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg));
ONIG_EXTERN
int onig_number_of_names P_((regex_t* reg));
ONIG_EXTERN
int onig_number_of_captures P_((regex_t* reg));
ONIG_EXTERN
int onig_number_of_capture_histories P_((regex_t* reg));
ONIG_EXTERN
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
ONIG_EXTERN
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
ONIG_EXTERN
OnigEncoding onig_get_encoding P_((regex_t* reg));
ONIG_EXTERN
OnigOptionType onig_get_options P_((regex_t* reg));
ONIG_EXTERN
OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
ONIG_EXTERN
OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
ONIG_EXTERN
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
ONIG_EXTERN
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
ONIG_EXTERN
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
ONIG_EXTERN
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
ONIG_EXTERN
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
ONIG_EXTERN
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
@ -708,10 +856,26 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
ONIG_EXTERN
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
ONIG_EXTERN
int onig_set_meta_char P_((unsigned int what, unsigned int c));
int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
ONIG_EXTERN
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
ONIG_EXTERN
OnigAmbigType onig_get_default_ambig_flag P_(());
ONIG_EXTERN
int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
ONIG_EXTERN
unsigned int onig_get_match_stack_limit_size P_((void));
ONIG_EXTERN
int onig_set_match_stack_limit_size P_((unsigned int size));
ONIG_EXTERN
int onig_end P_((void));
ONIG_EXTERN
const char* onig_version P_((void));
ONIG_EXTERN
const char* onig_copyright P_((void));
#ifdef __cplusplus
}
#endif
#endif /* ONIGURUMA_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +1,33 @@
/**********************************************************************
regenc.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#include "regenc.h"
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
@ -29,33 +51,33 @@ onigenc_set_default_encoding(OnigEncoding enc)
}
extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
p += enc_len(enc, *p);
p += enc_len(enc, p);
}
return p;
}
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
UChar* start, UChar* s, UChar** prev)
const UChar* start, const UChar* s, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
if (prev) *prev = p;
p += enc_len(enc, *p);
if (prev) *prev = (const UChar* )p;
p += enc_len(enc, p);
}
else {
if (prev) *prev = (UChar* )NULL; /* Sorry */
if (prev) *prev = (const UChar* )NULL; /* Sorry */
}
return p;
}
extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
if (s <= start)
return (UChar* )NULL;
@ -64,7 +86,7 @@ onigenc_get_prev_char_head(OnigEncoding enc, UChar* start, UChar* s)
}
extern UChar*
onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
{
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
if (s <= start)
@ -72,20 +94,127 @@ onigenc_step_back(OnigEncoding enc, UChar* start, UChar* s, int n)
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
}
return s;
return (UChar* )s;
}
extern UChar*
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{
UChar* q = (UChar* )p;
while (n-- > 0) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
}
return (q <= end ? q : NULL);
}
extern int
onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
n++;
}
return n;
}
extern int
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return n;
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return n;
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
n++;
}
}
extern int
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{
UChar* start = (UChar* )s;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return (int )(p - start);
q = p + 1;
while (len > 1) {
if (*q != '\0') break;
q++;
len--;
}
if (len == 1) return (int )(p - start);
}
p += ONIGENC_MBC_ENC_LEN(enc, p);
}
}
#ifndef ONIG_RUBY_M17N
#ifndef NOT_RUBY
#define USE_APPLICATION_TO_LOWER_CASE_TABLE
unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
#endif
UChar* OnigEncAsciiToLowerCaseTable = (UChar* )0;
const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
static UChar BuiltInAsciiToLowerCaseTable[] = {
static const UChar BuiltInAsciiToLowerCaseTable[] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@ -121,23 +250,61 @@ static UChar BuiltInAsciiToLowerCaseTable[] = {
};
#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
#ifdef USE_UPPER_CASE_TABLE
UChar OnigEncAsciiToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
#endif
unsigned short OnigEncAsciiCtypeTable[256] = {
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1106, 0x1104, 0x1104, 0x1104, 0x1104, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004, 0x1004,
0x1142, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58, 0x1c58,
0x1c58, 0x1c58, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x10d0,
0x10d0, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1e51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51, 0x1a51,
0x1a51, 0x1a51, 0x1a51, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x18d0,
0x10d0, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1c71, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871, 0x1871,
0x1871, 0x1871, 0x1871, 0x10d0, 0x10d0, 0x10d0, 0x10d0, 0x1004,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@ -156,10 +323,82 @@ unsigned short OnigEncAsciiCtypeTable[256] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
};
UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
#ifdef USE_UPPER_CASE_TABLE
UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
};
#endif
extern void
onigenc_set_default_caseconv_table(UChar* table)
onigenc_set_default_caseconv_table(const UChar* table)
{
if (table == (UChar* )0) {
if (table == (const UChar* )0) {
#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
table = BuiltInAsciiToLowerCaseTable;
#else
@ -173,47 +412,240 @@ onigenc_set_default_caseconv_table(UChar* table)
}
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
{
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
}
OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
{ 0x41, 0x61 },
{ 0x42, 0x62 },
{ 0x43, 0x63 },
{ 0x44, 0x64 },
{ 0x45, 0x65 },
{ 0x46, 0x66 },
{ 0x47, 0x67 },
{ 0x48, 0x68 },
{ 0x49, 0x69 },
{ 0x4a, 0x6a },
{ 0x4b, 0x6b },
{ 0x4c, 0x6c },
{ 0x4d, 0x6d },
{ 0x4e, 0x6e },
{ 0x4f, 0x6f },
{ 0x50, 0x70 },
{ 0x51, 0x71 },
{ 0x52, 0x72 },
{ 0x53, 0x73 },
{ 0x54, 0x74 },
{ 0x55, 0x75 },
{ 0x56, 0x76 },
{ 0x57, 0x77 },
{ 0x58, 0x78 },
{ 0x59, 0x79 },
{ 0x5a, 0x7a },
{ 0x61, 0x41 },
{ 0x62, 0x42 },
{ 0x63, 0x43 },
{ 0x64, 0x44 },
{ 0x65, 0x45 },
{ 0x66, 0x46 },
{ 0x67, 0x47 },
{ 0x68, 0x48 },
{ 0x69, 0x49 },
{ 0x6a, 0x4a },
{ 0x6b, 0x4b },
{ 0x6c, 0x4c },
{ 0x6d, 0x4d },
{ 0x6e, 0x4e },
{ 0x6f, 0x4f },
{ 0x70, 0x50 },
{ 0x71, 0x51 },
{ 0x72, 0x52 },
{ 0x73, 0x53 },
{ 0x74, 0x54 },
{ 0x75, 0x55 },
{ 0x76, 0x56 },
{ 0x77, 0x57 },
{ 0x78, 0x58 },
{ 0x79, 0x59 },
{ 0x7a, 0x5a }
};
extern int
onigenc_nothing_get_all_fold_match_code(OnigCodePoint** codes)
onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
}
else {
return 0;
}
}
extern int
onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
OnigCompAmbigCodes** ccs)
{
return 0;
}
extern int
onigenc_nothing_get_fold_match_info(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
OnigPairAmbigCodes** ccs)
{
return -1;
static OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
{ 0xc3, 0xe3 },
{ 0xc4, 0xe4 },
{ 0xc5, 0xe5 },
{ 0xc6, 0xe6 },
{ 0xc7, 0xe7 },
{ 0xc8, 0xe8 },
{ 0xc9, 0xe9 },
{ 0xca, 0xea },
{ 0xcb, 0xeb },
{ 0xcc, 0xec },
{ 0xcd, 0xed },
{ 0xce, 0xee },
{ 0xcf, 0xef },
{ 0xd0, 0xf0 },
{ 0xd1, 0xf1 },
{ 0xd2, 0xf2 },
{ 0xd3, 0xf3 },
{ 0xd4, 0xf4 },
{ 0xd5, 0xf5 },
{ 0xd6, 0xf6 },
{ 0xd8, 0xf8 },
{ 0xd9, 0xf9 },
{ 0xda, 0xfa },
{ 0xdb, 0xfb },
{ 0xdc, 0xfc },
{ 0xdd, 0xfd },
{ 0xde, 0xfe },
{ 0xe0, 0xc0 },
{ 0xe1, 0xc1 },
{ 0xe2, 0xc2 },
{ 0xe3, 0xc3 },
{ 0xe4, 0xc4 },
{ 0xe5, 0xc5 },
{ 0xe6, 0xc6 },
{ 0xe7, 0xc7 },
{ 0xe8, 0xc8 },
{ 0xe9, 0xc9 },
{ 0xea, 0xca },
{ 0xeb, 0xcb },
{ 0xec, 0xcc },
{ 0xed, 0xcd },
{ 0xee, 0xce },
{ 0xef, 0xcf },
{ 0xf0, 0xd0 },
{ 0xf1, 0xd1 },
{ 0xf2, 0xd2 },
{ 0xf3, 0xd3 },
{ 0xf4, 0xd4 },
{ 0xf5, 0xd5 },
{ 0xf6, 0xd6 },
{ 0xf8, 0xd8 },
{ 0xf9, 0xd9 },
{ 0xfa, 0xda },
{ 0xfb, 0xdb },
{ 0xfc, 0xdc },
{ 0xfd, 0xdd },
{ 0xfe, 0xde }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
}
else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = cc;
return sizeof(cc) / sizeof(OnigPairAmbigCodes);
}
else
return 0;
}
extern int
onigenc_nothing_get_ctype_code_range(int ctype, int* nsb, int* nmb,
OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
OnigCompAmbigCodes** ccs)
{
return -1;
static OnigCompAmbigCodes folds[] = {
{ 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
*ccs = folds;
return sizeof(folds) / sizeof(OnigCompAmbigCodes);
}
else
return 0;
}
extern int
onigenc_not_support_get_ctype_code_range(int ctype,
OnigCodePoint* sbr[], OnigCodePoint* mbr[])
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
{
if (p < end) {
if (*p == 0x0a) return 1;
}
return 0;
}
/* for single byte encodings */
extern int
onigenc_ascii_mbc_to_lower(UChar* p, UChar* lower)
onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
}
else {
*lower = **p;
}
(*p)++;
return 1; /* return byte length of converted char to lower */
}
extern int
onigenc_ascii_mbc_is_case_ambig(UChar* p)
onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
const UChar* p = *pp;
(*pp)++;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
return FALSE;
}
}
extern int
onigenc_single_byte_mbc_enc_len(const UChar* p)
{
return 1;
}
extern OnigCodePoint
onigenc_single_byte_mbc_to_code(UChar* p, UChar* end)
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(*p);
}
@ -238,26 +670,31 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
}
extern UChar*
onigenc_single_byte_left_adjust_char_head(UChar* start, UChar* s)
onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
{
return s;
return (UChar* )s;
}
extern int
onigenc_single_byte_is_allowed_reverse_match(UChar* s, UChar* end)
onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
return TRUE;
}
extern int
onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
{
return FALSE;
}
extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
{
int c, i, len;
OnigCodePoint n;
c = *p++;
len = enc_len(enc, c);
n = c;
len = enc_len(enc, p);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;
for (i = 1; i < len; i++) {
@ -269,33 +706,52 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, UChar* p, UChar* end)
}
extern int
onigenc_mbn_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* lower)
onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
const UChar** pp, const UChar* end, UChar* lower)
{
int len;
const UChar *p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp)++;
return 1;
}
else {
len = enc_len(enc, *p);
len = enc_len(enc, p);
if (lower != p) {
/* memcpy(lower, p, len); */
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted to lower char */
}
}
extern int
onigenc_mbn_mbc_is_case_ambig(UChar* p)
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
const UChar** pp, const UChar* end)
{
if (ONIGENC_IS_MBC_ASCII(p))
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
(*pp)++;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
return FALSE;
}
}
(*pp) += enc_len(enc, p);
return FALSE;
}
@ -360,8 +816,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
if (enc_len(enc, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
@ -383,23 +839,21 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
if (enc_len(enc, buf[0]) != (p - buf))
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
if (enc_len(enc, buf) != (p - buf))
return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
#endif
return p - buf;
}
extern int
onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb2_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
else
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@ -412,16 +866,14 @@ onigenc_mb2_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
}
extern int
onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
{
if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
else {
int first = onigenc_mb4_code_to_mbc_first(code);
return (enc_len(enc, first) > 1 ? TRUE : FALSE);
}
else
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
ctype &= ~ONIGENC_CTYPE_WORD;
if (ctype == 0) return FALSE;
@ -434,39 +886,22 @@ onigenc_mb4_code_is_ctype(OnigEncoding enc, OnigCodePoint code,
}
extern int
onigenc_get_all_fold_match_code_ss_0xdf(OnigCodePoint** codes)
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
const UChar* sascii /* ascii */, int n)
{
static OnigCodePoint list[] = { 0xdf };
*codes = list;
return 1;
}
int x, c;
extern int
onigenc_get_fold_match_info_ss_0xdf(UChar* p, UChar* end,
OnigEncFoldMatchInfo** info)
{
/* German alphabet ess-tsett(U+00DF) */
static OnigEncFoldMatchInfo ss = {
3,
{ 1, 2, 2 },
{ "\337", "ss", "SS" } /* 0337: 0xdf */
};
while (n-- > 0) {
if (p >= end) return (int )(*sascii);
if (p >= end) return -1;
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
x = *sascii - c;
if (x) return x;
if (*p == 0xdf) {
*info = &ss;
return 1;
sascii++;
p += enc_len(enc, p);
}
else if (p + 1 < end) {
if ((*p == 'S' && *(p+1) == 'S') ||
(*p == 's' && *(p+1) == 's')) {
*info = &ss;
return 2;
}
}
return -1; /* is not a fold string. */
return 0;
}
#else /* ONIG_RUBY_M17N */
@ -475,6 +910,10 @@ extern int
onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
{
switch (ctype) {
case ONIGENC_CTYPE_NEWLINE:
if (code == 0x0a) return 1;
break;
case ONIGENC_CTYPE_ALPHA:
return m17n_isalpha(enc, code);
break;
@ -548,12 +987,22 @@ onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
}
extern int
onigenc_mbc_is_case_ambig(OnigEncoding enc, UChar* p)
onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
UChar** pp, UChar* end)
{
unsigned int c = m17n_codepoint(enc, p, p + enc_len(enc, *p));
int len;
unsigned int c;
UChar* p = *pp;
len = enc_len(enc, *p);
(*pp) += len;
c = m17n_codepoint(enc, p, p + len);
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
if (m17n_isupper(enc, c) || m17n_islower(enc, c))
return TRUE;
}
if (m17n_isupper(enc, c) || m17n_islower(enc, c))
return TRUE;
return FALSE;
}
@ -575,7 +1024,8 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
}
extern int
onigenc_is_allowed_reverse_match(OnigEncoding enc, UChar* s, UChar* end)
onigenc_is_allowed_reverse_match(OnigEncoding enc,
const UChar* s, const UChar* end)
{
return ONIGENC_IS_SINGLEBYTE(enc);
}

View File

@ -1,12 +1,33 @@
/**********************************************************************
regenc.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGENC_H
#define REGENC_H
/**********************************************************************
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef RUBY_PLATFORM
#include "config.h"
@ -26,15 +47,11 @@
#endif
/* error codes */
/* internal error */
#define ONIGERR_MEMORY -5
#define ONIGERR_TYPE_BUG -6
/* syntax error [-400, -999] */
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGENCERR_MEMORY -5
#define ONIGENCERR_TYPE_BUG -6
#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIG_NEWLINE '\n'
#define ONIG_IS_NEWLINE(c) ((c) == ONIG_NEWLINE)
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
@ -47,47 +64,79 @@
#else /* ONIG_RUBY_M17N */
#define USE_UNICODE_FULL_RANGE_CTYPE
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
ONIG_EXTERN int onigenc_nothing_get_all_fold_match_code P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_nothing_get_fold_match_info P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
ONIG_EXTERN int onigenc_nothing_get_ctype_code_range P_((int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]));
ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
/* methods for single byte encoding */
ONIG_EXTERN int onigenc_ascii_mbc_to_lower P_((UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_ascii_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((UChar* p, UChar* end));
ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((UChar* start, UChar* s));
ONIG_EXTERN int onigenc_single_byte_is_allowed_reverse_match P_((UChar* s, UChar* end));
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
/* methods for multi byte encoding */
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, UChar* p, UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* lower));
ONIG_EXTERN int onigenc_mbn_mbc_is_case_ambig P_((UChar* p));
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb2_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
ONIG_EXTERN int onigenc_mb4_code_is_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
ONIG_EXTERN int onigenc_get_fold_match_info_ss_0xdf P_((UChar* p, UChar* end, OnigEncFoldMatchInfo** info));
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
OnigEncISO_8859_1_ToUpperCaseTable[c]
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
#endif /* is not ONIG_RUBY_M17N */
ONIG_EXTERN int
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
ONIG_EXTERN UChar*
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
/* defined in regexec.c, but used in enc/xxx.c */
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
((OnigEncAsciiCtypeTable[code] & ctype) != 0)
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \

View File

@ -1,10 +1,32 @@
/**********************************************************************
regerror.c - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
@ -56,8 +78,8 @@ onig_error_code_to_format(int code)
p = "empty char-class"; break;
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
p = "premature end of char-class"; break;
case ONIGERR_END_PATTERN_AT_BACKSLASH:
p = "end pattern at backslash"; break;
case ONIGERR_END_PATTERN_AT_ESCAPE:
p = "end pattern at escape"; break;
case ONIGERR_END_PATTERN_AT_META:
p = "end pattern at meta"; break;
case ONIGERR_END_PATTERN_AT_CONTROL:
@ -145,7 +167,9 @@ onig_error_code_to_format(int code)
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
p = "group number is too big for capture history"; break;
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
p = "invalid character property name"; break;
p = "invalid character property name {%n}"; break;
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
p = "not supported encoding combination"; break;
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
p = "over thread pass limit count"; break;
@ -184,6 +208,7 @@ onig_error_code_to_str(s, code, va_alist)
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = einfo->par_end - einfo->par;
q = onig_error_code_to_format(code);
@ -218,7 +243,7 @@ onig_error_code_to_str(s, code, va_alist)
default:
q = onig_error_code_to_format(code);
len = strlen(q);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
xmemcpy(s, q, len);
s[len] = '\0';
break;
@ -245,7 +270,8 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
#endif
{
int n, need, len;
UChar *p, *s;
UChar *p, *s, *bp;
char bs[6];
va_list args;
va_init_list(args, fmt);
@ -256,29 +282,41 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
if (n + need < bufsize) {
strcat(buf, ": /");
s = buf + strlen(buf);
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
while (p < (UChar* )pat_end) {
if (*p == MC_ESC) {
if (*p == MC_ESC(enc)) {
*s++ = *p++;
len = enc_len(enc, *p);
len = enc_len(enc, p);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
*s++ = MC_ESC;
*s++ = (unsigned char )MC_ESC(enc);
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, *p)) {
len = enc_len(enc, *p);
while (len-- > 0) *s++ = *p++;
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
len = enc_len(enc, p);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
else { /* for UTF16 */
int blen;
while (len-- > 0) {
sprintf(bs, "\\%03o", *p++ & 0377);
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
}
}
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
char b[5];
sprintf(b, "\\%03o", *p & 0377);
len = strlen(b);
while (len-- > 0) *s++ = *p++;
sprintf(bs, "\\%03o", *p++ & 0377);
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
}
else {
*s++ = *p++;

View File

@ -1,26 +0,0 @@
/**********************************************************************
regex.c - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*
* Source wrapper for Ruby.
*/
#include "regint.h"
#include "regex.h"
#include "regparse.c"
#include "regcomp.c"
#include "regexec.c"
#include "regenc.c"
#include "reggnu.c"
#include "regerror.c"
#ifndef ONIG_RUBY_M17N
#include "enc/ascii.c"
#include "enc/utf8.c"
#include "enc/euc_jp.c"
#include "enc/sjis.c"
#endif

File diff suppressed because it is too large Load Diff

213
ext/mbstring/oniguruma/regext.c Executable file
View File

@ -0,0 +1,213 @@
/**********************************************************************
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
}
}
static void
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
}
}
static void
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[3];
*conv++ = s[2];
*conv++ = s[1];
*conv++ = s[0];
s += 4;
}
}
static void
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[1];
*conv++ = s[0];
s += 2;
}
}
static int
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
UChar** conv, UChar** conv_end)
{
int len = end - s;
if (to == ONIG_ENCODING_UTF16_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 2);
conv_ext0be(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_LE) {
swap16:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + len;
conv_swap2bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF16_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 2);
conv_ext0le(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_BE) {
goto swap16;
}
}
if (to == ONIG_ENCODING_UTF32_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 4);
conv_ext0be32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_LE) {
swap32:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + len;
conv_swap4bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF32_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 4);
conv_ext0le32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_BE) {
goto swap32;
}
}
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
UChar *cpat, *cpat_end;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
&cpat, &cpat_end);
if (r) return r;
}
else {
cpat = (UChar* )pattern;
cpat_end = (UChar* )pattern_end;
}
r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
ci->syntax);
if (r) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
if (r) {
onig_free(*reg);
*reg = NULL;
}
err:
if (cpat != pattern) xfree(cpat);
return r;
}
extern int
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
regex_t *new_reg;
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
if (r) return r;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_transfer(reg, new_reg);
}
else {
onig_chain_link_add(reg, new_reg);
}
return 0;
}

View File

@ -1,26 +1,38 @@
/**********************************************************************
reggnu.c - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
#include "oniggnu.h"
#endif
#if defined(RUBY_PLATFORM) || defined(RUBY)
#ifndef ONIG_RUBY_M17N
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
#endif
#endif
#ifndef NULL
#define NULL ((void* )0)
#endif
extern void
re_free_registers(OnigRegion* r)
{
@ -111,7 +123,9 @@ re_free_pattern(regex_t* reg)
extern int
re_alloc_pattern(regex_t** reg)
{
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT,
ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
@ -121,86 +135,6 @@ re_set_casetable(const char* table)
onigenc_set_default_caseconv_table((UChar* )table);
}
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
static const unsigned char mbctab_ascii[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const unsigned char mbctab_euc[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
static const unsigned char mbctab_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
static const unsigned char mbctab_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
};
const unsigned char *re_mbctab = mbctab_ascii;
#endif
extern void
#ifdef ONIG_RUBY_M17N
re_mbcinit(OnigEncoding enc)
@ -236,21 +170,4 @@ re_mbcinit(int mb_code)
onigenc_set_default_encoding(enc);
#endif
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
switch (mb_code) {
case MBCTYPE_ASCII:
re_mbctab = mbctab_ascii;
break;
case MBCTYPE_EUC:
re_mbctab = mbctab_euc;
break;
case MBCTYPE_SJIS:
re_mbctab = mbctab_sjis;
break;
case MBCTYPE_UTF8:
re_mbctab = mbctab_utf8;
break;
}
#endif
}

View File

@ -1,12 +1,33 @@
/**********************************************************************
regint.h - Oniguruma (regular expression library)
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGINT_H
#define REGINT_H
/**********************************************************************
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* for debug */
/* #define ONIG_DEBUG_PARSE_TREE */
@ -19,7 +40,8 @@
/* #define ONIG_DEBUG_STATISTICS */
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_STATISTICS)
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#endif
@ -34,9 +56,9 @@
/* config */
/* spec. config */
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_FOLD_MATCH /* ess-tsett etc... */
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@ -44,48 +66,55 @@
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
#define USE_ST_HASH_TABLE
#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define MATCH_STACK_LIMIT_SIZE 500000
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
/* interface to external system */
#ifdef NOT_RUBY /* gived from Makefile */
#include "config.h"
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_VARIABLE_SYNTAX
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define CHECK_INTERRUPT /* depend on application */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
#else
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
#define USE_MULTI_THREAD_SYSTEM
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
#define CHECK_INTERRUPT do {\
if (rb_trap_pending) {\
if (! rb_prohibit_interrupt) {\
rb_trap_exec();\
}\
}\
} while (0)
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
#if defined(RUBY_VERSION_MAJOR)
#if RUBY_VERSION_MAJOR > 1 || \
(RUBY_VERSION_MAJOR == 1 && \
defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
#define USE_ST_HASH_TABLE
#endif
#endif
#endif /* else NOT_RUBY */
#define THREAD_PASS_LIMIT_COUNT 10
#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__CYGWIN__)
#if defined(_WIN32) && !defined(__GNUC__)
#define xalloca _alloca
#ifdef NOT_RUBY
#define vsnprintf _vsnprintf
@ -94,6 +123,69 @@
#define xalloca alloca
#endif
#ifdef USE_MULTI_THREAD_SYSTEM
#define ONIG_STATE_INC(reg) (reg)->state++
#define ONIG_STATE_DEC(reg) (reg)->state--
#else
#define ONIG_STATE_INC(reg) /* Nothing */
#define ONIG_STATE_DEC(reg) /* Nothing */
#endif /* USE_MULTI_THREAD_SYSTEM */
#define onig_st_is_member st_is_member
#ifdef NOT_RUBY
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
#define st_init_numtable_with_size onig_st_init_numtable_with_size
#define st_init_strtable onig_st_init_strtable
#define st_init_strtable_with_size onig_st_init_strtable_with_size
#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
#define st_delete onig_st_delete
#define st_delete_safe onig_st_delete_safe
#define st_insert onig_st_insert
#define st_insert_strend onig_st_insert_strend
#define st_lookup onig_st_lookup
#define st_lookup_strend onig_st_lookup_strend
#define st_foreach onig_st_foreach
#define st_add_direct onig_st_add_direct
#define st_add_direct_strend onig_st_add_direct_strend
#define st_free_table onig_st_free_table
#define st_cleanup_safe onig_st_cleanup_safe
#define st_copy onig_st_copy
#define st_nothing_key_clone onig_st_nothing_key_clone
#define st_nothing_key_free onig_st_nothing_key_free
#else /* NOT_RUBY */
#define onig_st_init_table st_init_table
#define onig_st_init_table_with_size st_init_table_with_size
#define onig_st_init_numtable st_init_numtable
#define onig_st_init_numtable_with_size st_init_numtable_with_size
#define onig_st_init_strtable st_init_strtable
#define onig_st_init_strtable_with_size st_init_strtable_with_size
#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
#define onig_st_delete st_delete
#define onig_st_delete_safe st_delete_safe
#define onig_st_insert st_insert
#define onig_st_insert_strend st_insert_strend
#define onig_st_lookup st_lookup
#define onig_st_lookup_strend st_lookup_strend
#define onig_st_foreach st_foreach
#define onig_st_add_direct st_add_direct
#define onig_st_add_direct_strend st_add_direct_strend
#define onig_st_free_table st_free_table
#define onig_st_cleanup_safe st_cleanup_safe
#define onig_st_copy st_copy
#define onig_st_nothing_key_clone st_nothing_key_clone
#define onig_st_nothing_key_free st_nothing_key_free
#endif /* NOT_RUBY */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@ -109,7 +201,11 @@
#endif
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
@ -292,6 +388,8 @@ typedef unsigned int BitStatusType;
/* ignore-case and multibyte status are included in compiled code. */
#define IS_DYNAMIC_OPTION(option) 0
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
/* bitset */
#define BITS_PER_BYTE 8
@ -449,6 +547,7 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@ -501,6 +600,8 @@ enum OpCode {
OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
OP_NULL_CHECK_START, /* null loop checker start */
OP_NULL_CHECK_END, /* null loop checker end */
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
@ -529,11 +630,12 @@ enum OpCode {
#define ARG_MEMNUM 4
#define ARG_OPTION 5
typedef short int RelAddrType;
typedef short int AbsAddrType;
typedef short int LengthType;
typedef short int MemNumType;
typedef int RepeatNumType;
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@ -543,57 +645,33 @@ typedef int RepeatNumType;
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define GET_RELADDR_INC(addr,p) do{\
addr = *((RelAddrType* )(p));\
(p) += SIZE_RELADDR;\
#define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#define GET_ABSADDR_INC(addr,p) do{\
addr = *((AbsAddrType* )(p));\
(p) += SIZE_ABSADDR;\
} while(0)
#define GET_LENGTH_INC(len,p) do{\
len = *((LengthType* )(p));\
(p) += SIZE_LENGTH;\
} while(0)
#define GET_MEMNUM_INC(num,p) do{\
num = *((MemNumType* )(p));\
(p) += SIZE_MEMNUM;\
} while(0)
#define GET_REPEATNUM_INC(num,p) do{\
num = *((RepeatNumType* )(p));\
(p) += SIZE_REPEATNUM;\
} while(0)
#define GET_OPTION_INC(option,p) do{\
option = *((OnigOptionType* )(p));\
(p) += SIZE_OPTION;\
} while(0)
#else
#define GET_RELADDR_INC(addr,p) GET_SHORT_INC(addr,p)
#define GET_ABSADDR_INC(addr,p) GET_SHORT_INC(addr,p)
#define GET_LENGTH_INC(len,p) GET_SHORT_INC(len,p)
#define GET_MEMNUM_INC(num,p) GET_SHORT_INC(num,p)
#define GET_REPEATNUM_INC(num,p) GET_INT_INC(num,p)
#define GET_OPTION_INC(option,p) GET_UINT_INC(option,p)
#define SERIALIZE_RELADDR(addr,p) SERIALIZE_SHORT(addr,p)
#define SERIALIZE_ABSADDR(addr,p) SERIALIZE_SHORT(addr,p)
#define SERIALIZE_LENGTH(len,p) SERIALIZE_SHORT(len,p)
#define SERIALIZE_MEMNUM(num,p) SERIALIZE_SHORT(num,p)
#define SERIALIZE_REPEATNUM(num,p) SERIALIZE_INT(num,p)
#define SERIALIZE_OPTION(option,p) SERIALIZE_UINT(option,p)
#define SERIALIZE_BUFSIZE SIZEOF_INT
#define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
#define GET_BYTE_INC(byte,p) do{\
@ -636,23 +714,53 @@ typedef int RepeatNumType;
#define SIZE_OP_RETURN SIZE_OPCODE
#define MC_ESC(enc) (enc)->meta_char_table.esc
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
#define MC_ANYTIME(enc) (enc)->meta_char_table.anytime
#define MC_ZERO_OR_ONE_TIME(enc) (enc)->meta_char_table.zero_or_one_time
#define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time
#define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime
#define SYN_POSIX_COMMON_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
ONIG_SYN_OP_LINE_ANCHOR | \
ONIG_SYN_OP_ESC_CONTROL_CHARS )
#define SYN_GNU_REGEX_OP \
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
ONIG_SYN_OP_VBAR_ALT | \
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
ONIG_SYN_OP_QMARK_ZERO_ONE | \
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
ONIG_SYN_OP_ESC_W_WORD | \
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
ONIG_SYN_OP_LINE_ANCHOR )
#define SYN_GNU_REGEX_BV \
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
/* cclass node */
#define FLAG_CCLASS_NOT 1
#define FLAG_CCLASS_SHARE (1<<1)
#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
typedef struct {
UChar esc;
UChar anychar;
UChar anytime;
UChar zero_or_one_time;
UChar one_or_more_time;
UChar anychar_anytime;
} OnigMetaCharTableType;
extern OnigMetaCharTableType OnigMetaCharTable;
#define MC_ESC OnigMetaCharTable.esc
#define MC_ANYCHAR OnigMetaCharTable.anychar
#define MC_ANYTIME OnigMetaCharTable.anytime
#define MC_ZERO_OR_ONE_TIME OnigMetaCharTable.zero_or_one_time
#define MC_ONE_OR_MORE_TIME OnigMetaCharTable.one_or_more_time
#define MC_ANYCHAR_ANYTIME OnigMetaCharTable.anychar_anytime
int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
#ifdef ONIG_DEBUG
@ -665,7 +773,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp));
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@ -675,11 +783,11 @@ extern void onig_print_statistics P_((FILE* f));
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
#endif /* REGINT_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,33 @@
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
#ifndef REGPARSE_H
#define REGPARSE_H
/**********************************************************************
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
@ -43,7 +64,8 @@
#define CTYPE_NOT_WHITE_SPACE (1<<3)
#define CTYPE_DIGIT (1<<4)
#define CTYPE_NOT_DIGIT (1<<5)
#define CTYPE_XDIGIT (1<<6)
#define CTYPE_NOT_XDIGIT (1<<7)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
@ -52,29 +74,27 @@
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 7
#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_CASE_AMBIG (1<<1)
#define NSTR_AMBIG (1<<1)
#define NSTR_AMBIG_REDUCE (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
#define NSTRING_IS_AMBIG_REDUCE(node) \
(((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
#define CCLASS_SET_NOT(cc) (cc)->not = 1
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
@ -89,13 +109,17 @@ typedef struct {
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
/* move to regint.h */
#if 0
typedef struct {
int not;
int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
#endif
typedef struct {
int state;
struct _Node* target;
int lower;
int upper;
@ -108,19 +132,19 @@ typedef struct {
} QualifierNode;
/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
@ -133,13 +157,15 @@ typedef struct {
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
typedef struct {
int state;
@ -224,9 +250,10 @@ typedef struct _Node {
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
typedef struct {
OnigOptionType option;
OnigEncoding enc;
OnigSyntaxType* syntax;
OnigOptionType option;
OnigAmbigType ambig_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
BitStatusType capture_history;
BitStatusType bt_mem_start;
BitStatusType bt_mem_end;
@ -254,19 +281,31 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
#ifdef USE_NAMED_GROUP
typedef struct {
int new_val;
} GroupNumRemap;
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
#endif
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP

View File

@ -1,10 +1,32 @@
/**********************************************************************
regposerr.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "config.h"
#include "onigposix.h"
@ -58,7 +80,7 @@ regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size)
s = tbuf;
}
len = strlen(s) + 1;
len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
if (buf != NULL && size > 0) {
strncpy(buf, s, size - 1);

View File

@ -1,10 +1,31 @@
/**********************************************************************
regposix.c - Oniguruma (regular expression library)
Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#define regex_t onig_regex_t
#include "regint.h"
@ -14,16 +35,17 @@
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
#if 1
/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
#define ENC_STRING_LEN(enc,s,len) do { \
UChar* tmps = (UChar* )(s); \
/* while (*tmps != 0) tmps += enc_len(enc,*tmps); */ \
while (*tmps != 0) tmps++; /* OK for UTF-8, EUC-JP, Shift_JIS */ \
len = tmps - (UChar* )(s); \
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
UChar* tmps = (UChar* )(s); \
while (*tmps != 0) tmps++; \
len = tmps - (UChar* )(s); \
} \
else { \
len = onigenc_str_bytelen_null(enc, (UChar* )s); \
} \
} while(0)
#else
#define ENC_STRING_LEN(enc,s,len) len = strlen(s)
#endif
typedef struct {
int onig_err;
@ -50,7 +72,7 @@ onig2posix_error_code(int code)
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
{ ONIGERR_END_PATTERN_AT_BACKSLASH, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
@ -91,6 +113,7 @@ onig2posix_error_code(int code)
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
};
@ -145,24 +168,37 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
{
int r, i, len;
UChar* end;
regmatch_t* pm;
OnigOptionType options;
options = ONIG_OPTION_POSIX_REGION;
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if ((reg->comp_options & REG_NOSUB) != 0) {
pmatch = (regmatch_t* )NULL;
if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
pm = (regmatch_t* )NULL;
nmatch = 0;
}
else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
* (ONIG_C(reg)->num_mem + 1));
if (pm == NULL)
return REG_ESPACE;
}
else {
pm = pmatch;
}
ENC_STRING_LEN(ONIG_C(reg)->code,str,len);
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pmatch, options);
if (r >= 0) {
r = 0; /* Match */
if (pm != pmatch && pm != NULL) {
xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
}
}
else if (r == ONIG_MISMATCH) {
r = REG_NOMATCH;
@ -173,6 +209,9 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
r = onig2posix_error_code(r);
}
if (pm != pmatch && pm != NULL)
xfree(pm);
return r;
}
@ -201,6 +240,13 @@ reg_set_encoding(int mb_code)
case REG_POSIX_ENCODING_UTF8:
enc = ONIG_ENCODING_UTF8;
break;
case REG_POSIX_ENCODING_UTF16_BE:
enc = ONIG_ENCODING_UTF16_BE;
break;
case REG_POSIX_ENCODING_UTF16_LE:
enc = ONIG_ENCODING_UTF16_LE;
break;
default:
return ;
break;
@ -211,18 +257,18 @@ reg_set_encoding(int mb_code)
extern int
reg_name_to_group_numbers(regex_t* reg,
unsigned char* name, unsigned char* name_end, int** nums)
const unsigned char* name, const unsigned char* name_end, int** nums)
{
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*);
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
regex_t* reg;
void* arg;
} i_wrap;
static int i_wrapper(unsigned char* name, unsigned char* name_end,
static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
int ng, int* gs,
onig_regex_t* reg, void* arg)
{
@ -233,8 +279,8 @@ static int i_wrapper(unsigned char* name, unsigned char* name_end,
extern int
reg_foreach_name(regex_t* reg,
int (*func)(unsigned char*,unsigned char*,int,int*,regex_t*,void*),
void* arg)
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
void* arg)
{
i_wrap warg;

View File

@ -0,0 +1,207 @@
/**********************************************************************
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
, 0
, 0
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
OnigSyntaxType OnigSyntaxPosixExtended = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
OnigSyntaxType OnigSyntaxEmacs = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxGrep = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
, 0
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxGnuRegex = {
SYN_GNU_REGEX_OP
, 0
, SYN_GNU_REGEX_BV
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxJava = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ONIG_OPTION_SINGLELINE
};
OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
};
extern int
onig_set_default_syntax(OnigSyntaxType* syntax)
{
if (IS_NULL(syntax))
syntax = ONIG_SYNTAX_RUBY;
OnigDefaultSyntax = syntax;
return 0;
}
extern void
onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
{
*to = *from;
}
extern void
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
{
syntax->op = op;
}
extern void
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
{
syntax->op2 = op2;
}
extern void
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
{
syntax->behavior = behavior;
}
extern void
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
{
syntax->options = options;
}
extern unsigned int
onig_get_syntax_op(OnigSyntaxType* syntax)
{
return syntax->op;
}
extern unsigned int
onig_get_syntax_op2(OnigSyntaxType* syntax)
{
return syntax->op2;
}
extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType* syntax)
{
return syntax->behavior;
}
extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType* syntax)
{
return syntax->options;
}
#ifdef USE_VARIABLE_META_CHARS
extern int onig_set_meta_char(OnigEncoding enc,
unsigned int what, OnigCodePoint code)
{
switch (what) {
case ONIG_META_CHAR_ESCAPE:
enc->meta_char_table.esc = code;
break;
case ONIG_META_CHAR_ANYCHAR:
enc->meta_char_table.anychar = code;
break;
case ONIG_META_CHAR_ANYTIME:
enc->meta_char_table.anytime = code;
break;
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
enc->meta_char_table.zero_or_one_time = code;
break;
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
enc->meta_char_table.one_or_more_time = code;
break;
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
enc->meta_char_table.anychar_anytime = code;
break;
default:
return ONIGERR_INVALID_ARGUMENT;
break;
}
return 0;
}
#endif /* USE_VARIABLE_META_CHARS */

View File

@ -0,0 +1,76 @@
/**********************************************************************
regtrav.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifdef USE_CAPTURE_HISTORY
static int
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
int(*callback_func)(int,int,int,int,int,void*),
int level, void* arg)
{
int r, i;
if (node == (OnigCaptureTreeNode* )0)
return 0;
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
if (r != 0) return r;
}
for (i = 0; i < node->num_childs; i++) {
r = capture_tree_traverse(node->childs[i], at,
callback_func, level + 1, arg);
if (r != 0) return r;
}
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
if (r != 0) return r;
}
return 0;
}
#endif /* USE_CAPTURE_HISTORY */
extern int
onig_capture_tree_traverse(OnigRegion* region, int at,
int(*callback_func)(int,int,int,int,int,void*), void* arg)
{
#ifdef USE_CAPTURE_HISTORY
return capture_tree_traverse(region->history_root, at,
callback_func, 0, arg);
#else
return ONIG_NO_SUPPORT_CONFIG;
#endif
}

View File

@ -0,0 +1,55 @@
/**********************************************************************
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "oniguruma.h"
#include <stdio.h>
extern const char*
onig_version(void)
{
static char s[12];
sprintf(s, "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}
extern const char*
onig_copyright(void)
{
static char s[58];
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}

717
ext/mbstring/oniguruma/st.c Normal file
View File

@ -0,0 +1,717 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <malloc.h>
#endif
#ifdef NOT_RUBY
#include "regint.h"
#else
#ifdef RUBY_PLATFORM
#define xmalloc ruby_xmalloc
#define xcalloc ruby_xcalloc
#define xrealloc ruby_xrealloc
#define xfree ruby_xfree
void *xmalloc(long);
void *xcalloc(long, long);
void *xrealloc(void *, long);
void xfree(void *);
#endif
#endif
#include "st.h"
typedef struct st_table_entry st_table_entry;
struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
};
#define ST_DEFAULT_MAX_DENSITY 5
#define ST_DEFAULT_INIT_TABLE_SIZE 11
/*
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
* average number of items per bin before increasing the number of
* bins
*
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
* allocated initially
*
*/
static int numcmp(long, long);
static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
st_nothing_key_free,
st_nothing_key_clone
};
/* extern int strcmp(const char *, const char *); */
static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
st_nothing_key_free,
st_nothing_key_clone
};
static int strend_cmp(st_strend_key*, st_strend_key*);
static int strend_hash(st_strend_key*);
static int strend_key_free(st_data_t key);
static st_data_t strend_key_clone(st_data_t x);
static struct st_hash_type type_strend_hash = {
strend_cmp,
strend_hash,
strend_key_free,
strend_key_clone
};
static void rehash(st_table *);
#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
#define Calloc(n,s) (char*)xcalloc((n),(s))
#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
/*
* MINSIZE is the minimum size of a dictionary.
*/
#define MINSIZE 8
/*
Table of prime numbers 2^n+a, 2<=n<=30.
*/
static long primes[] = {
8 + 3,
16 + 3,
32 + 5,
64 + 3,
128 + 3,
256 + 27,
512 + 9,
1024 + 9,
2048 + 5,
4096 + 3,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 29,
262144 + 3,
524288 + 21,
1048576 + 7,
2097152 + 17,
4194304 + 15,
8388608 + 9,
16777216 + 43,
33554432 + 35,
67108864 + 15,
134217728 + 29,
268435456 + 3,
536870912 + 11,
1073741824 + 85,
0
};
static int
new_size(size)
int size;
{
int i;
#if 0
for (i=3; i<31; i++) {
if ((1<<i) > size) return 1<<i;
}
return -1;
#else
int newsize;
for (i = 0, newsize = MINSIZE;
i < (int )(sizeof(primes)/sizeof(primes[0]));
i++, newsize <<= 1)
{
if (newsize > size) return primes[i];
}
/* Ran out of polynomials */
return -1; /* should raise exception */
#endif
}
#ifdef HASH_LOG
static int collision = 0;
static int init_st = 0;
static void
stat_col()
{
FILE *f = fopen("/tmp/col", "w");
fprintf(f, "collision: %d\n", collision);
fclose(f);
}
#endif
st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
int size;
{
st_table *tbl;
#ifdef HASH_LOG
if (init_st == 0) {
init_st = 1;
atexit(stat_col);
}
#endif
size = new_size(size); /* round up to prime number */
tbl = alloc(st_table);
tbl->type = type;
tbl->num_entries = 0;
tbl->num_bins = size;
tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
return tbl;
}
st_table*
st_init_table(type)
struct st_hash_type *type;
{
return st_init_table_with_size(type, 0);
}
st_table*
st_init_numtable(void)
{
return st_init_table(&type_numhash);
}
st_table*
st_init_numtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_numhash, size);
}
st_table*
st_init_strtable(void)
{
return st_init_table(&type_strhash);
}
st_table*
st_init_strtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_strhash, size);
}
st_table*
st_init_strend_table_with_size(size)
int size;
{
return st_init_table_with_size(&type_strend_hash, size);
}
void
st_free_table(table)
st_table *table;
{
register st_table_entry *ptr, *next;
int i;
for(i = 0; i < table->num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
table->type->key_free(ptr->key);
free(ptr);
ptr = next;
}
}
free(table->bins);
free(table);
}
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
#ifdef HASH_LOG
#define COLLISION collision++
#else
#define COLLISION
#endif
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
bin_pos = hash_val%(table)->num_bins;\
ptr = (table)->bins[bin_pos];\
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
COLLISION;\
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
ptr = ptr->next;\
}\
ptr = ptr->next;\
}\
} while (0)
int
st_lookup(table, key, value)
st_table *table;
register st_data_t key;
st_data_t *value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
return 0;
}
else {
if (value != 0) *value = ptr->record;
return 1;
}
}
int
st_lookup_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t *value;
{
st_strend_key key;
key.s = (unsigned char* )str_key;
key.end = (unsigned char* )end_key;
return st_lookup(table, (st_data_t )(&key), value);
}
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
rehash(table);\
bin_pos = hash_val % table->num_bins;\
}\
\
entry = alloc(st_table_entry);\
\
entry->hash = hash_val;\
entry->key = key;\
entry->record = value;\
entry->next = table->bins[bin_pos];\
table->bins[bin_pos] = entry;\
table->num_entries++;\
} while (0)
int
st_insert(table, key, value)
register st_table *table;
register st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
ADD_DIRECT(table, key, value, hash_val, bin_pos);
return 0;
}
else {
ptr->record = value;
return 1;
}
}
int
st_insert_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
return st_insert(table, (st_data_t )key, value);
}
void
st_add_direct(table, key, value)
st_table *table;
st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
hash_val = do_hash(key, table);
bin_pos = hash_val % table->num_bins;
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
void
st_add_direct_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
st_add_direct(table, (st_data_t )key, value);
}
static void
rehash(table)
register st_table *table;
{
register st_table_entry *ptr, *next, **new_bins;
int i, old_num_bins = table->num_bins, new_num_bins;
unsigned int hash_val;
new_num_bins = new_size(old_num_bins+1);
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
for(i = 0; i < old_num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
hash_val = ptr->hash % new_num_bins;
ptr->next = new_bins[hash_val];
new_bins[hash_val] = ptr;
ptr = next;
}
}
free(table->bins);
table->num_bins = new_num_bins;
table->bins = new_bins;
}
st_table*
st_copy(old_table)
st_table *old_table;
{
st_table *new_table;
st_table_entry *ptr, *entry;
int i, num_bins = old_table->num_bins;
new_table = alloc(st_table);
if (new_table == 0) {
return 0;
}
*new_table = *old_table;
new_table->bins = (st_table_entry**)
Calloc((unsigned)num_bins, sizeof(st_table_entry*));
if (new_table->bins == 0) {
free(new_table);
return 0;
}
for(i = 0; i < num_bins; i++) {
new_table->bins[i] = 0;
ptr = old_table->bins[i];
while (ptr != 0) {
entry = alloc(st_table_entry);
if (entry == 0) {
free(new_table->bins);
free(new_table);
return 0;
}
*entry = *ptr;
entry->key = old_table->type->key_clone(ptr->key);
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
}
}
return new_table;
}
int
st_delete(table, key, value)
register st_table *table;
register st_data_t *key;
st_data_t *value;
{
unsigned int hash_val;
st_table_entry *tmp;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
if (EQUAL(table, *key, ptr->key)) {
table->bins[hash_val] = ptr->next;
table->num_entries--;
if (value != 0) *value = ptr->record;
*key = ptr->key;
free(ptr);
return 1;
}
for(; ptr->next != 0; ptr = ptr->next) {
if (EQUAL(table, ptr->next->key, *key)) {
tmp = ptr->next;
ptr->next = ptr->next->next;
table->num_entries--;
if (value != 0) *value = tmp->record;
*key = tmp->key;
free(tmp);
return 1;
}
}
return 0;
}
int
st_delete_safe(table, key, value, never)
register st_table *table;
register st_data_t *key;
st_data_t *value;
st_data_t never;
{
unsigned int hash_val;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
for(; ptr != 0; ptr = ptr->next) {
if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
table->num_entries--;
*key = ptr->key;
if (value != 0) *value = ptr->record;
ptr->key = ptr->record = never;
return 1;
}
}
return 0;
}
static int
delete_never(key, value, never)
st_data_t key, value, never;
{
if (value == never) return ST_DELETE;
return ST_CONTINUE;
}
void
st_cleanup_safe(table, never)
st_table *table;
st_data_t never;
{
int num_entries = table->num_entries;
st_foreach(table, delete_never, never);
table->num_entries = num_entries;
}
void
st_foreach(table, func, arg)
st_table *table;
int (*func)();
st_data_t arg;
{
st_table_entry *ptr, *last, *tmp;
enum st_retval retval;
int i;
for(i = 0; i < table->num_bins; i++) {
last = 0;
for(ptr = table->bins[i]; ptr != 0;) {
retval = (*func)(ptr->key, ptr->record, arg, 0);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
tmp = 0;
if (i < table->num_bins) {
for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
if (tmp == ptr) break;
}
}
if (!tmp) {
/* call func with error notice */
retval = (*func)(0, 0, arg, 1);
return;
}
/* fall through */
case ST_CONTINUE:
last = ptr;
ptr = ptr->next;
break;
case ST_STOP:
return;
case ST_DELETE:
tmp = ptr;
if (last == 0) {
table->bins[i] = ptr->next;
}
else {
last->next = ptr->next;
}
ptr = ptr->next;
table->type->key_free(tmp->key);
free(tmp);
table->num_entries--;
}
}
}
}
static int
strhash(string)
register const char *string;
{
register int c;
#ifdef HASH_ELFHASH
register unsigned int h = 0, g;
while ((c = *string++) != '\0') {
h = ( h << 4 ) + c;
if ( g = h & 0xF0000000 )
h ^= g >> 24;
h &= ~g;
}
return h;
#elif HASH_PERL
register int val = 0;
while ((c = *string++) != '\0') {
val += c;
val += (val << 10);
val ^= (val >> 6);
}
val += (val << 3);
val ^= (val >> 11);
return val + (val << 15);
#else
register int val = 0;
while ((c = *string++) != '\0') {
val = val*997 + c;
}
return val + (val>>5);
#endif
}
static int
numcmp(x, y)
long x, y;
{
return x != y;
}
static int
numhash(n)
long n;
{
return n;
}
extern int
st_nothing_key_free(st_data_t key) { return 0; }
extern st_data_t
st_nothing_key_clone(st_data_t x) { return x; }
static int strend_cmp(st_strend_key* x, st_strend_key* y)
{
unsigned char *p, *q;
int c;
if ((x->end - x->s) != (y->end - y->s))
return 1;
p = x->s;
q = y->s;
while (p < x->end) {
c = (int )*p - (int )*q;
if (c != 0) return c;
p++; q++;
}
return 0;
}
static int strend_hash(st_strend_key* x)
{
int val;
unsigned char *p;
val = 0;
p = x->s;
while (p < x->end) {
val = val * 997 + (int )*p++;
}
return val + (val >> 5);
}
static int strend_key_free(st_data_t x)
{
xfree((void* )x);
return 0;
}
static st_data_t strend_key_clone(st_data_t x)
{
st_strend_key* new_key;
st_strend_key* key = (st_strend_key* )x;
new_key = alloc(st_strend_key);
*new_key = *key;
return (st_data_t )new_key;
}

View File

@ -0,0 +1,77 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* @(#) st.h 5.1 89/12/14 */
#ifndef ST_INCLUDED
#define ST_INCLUDED
typedef unsigned long st_data_t;
#define ST_DATA_T_DEFINED
typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
int (*key_free)();
st_data_t (*key_clone)();
};
struct st_table {
struct st_hash_type *type;
int num_bins;
int num_entries;
struct st_table_entry **bins;
};
typedef struct {
unsigned char* s;
unsigned char* end;
} st_strend_key;
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
#ifndef _
# define _(args) args
#endif
#ifndef ANYARGS
# ifdef __cplusplus
# define ANYARGS ...
# else
# define ANYARGS
# endif
#endif
st_table *st_init_table _((struct st_hash_type *));
st_table *st_init_table_with_size _((struct st_hash_type *, int));
st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
st_table *st_init_strend_table_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
extern st_data_t st_nothing_key_clone _((st_data_t key));
extern int st_nothing_key_free _((st_data_t key));
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
#define st_numcmp ST_NUMCMP
#define st_numhash ST_NUMHASH
#endif /* ST_INCLUDED */

View File

@ -1,833 +0,0 @@
/*
* This program was generated by testconv.rb.
*/
#include<stdio.h>
#ifdef POSIX_TEST
#include "onigposix.h"
#else
#include "oniguruma.h"
#endif
static int nsucc = 0;
static int nfail = 0;
static int nerror = 0;
static FILE* err_file;
#ifndef POSIX_TEST
static OnigRegion* region;
#endif
static void xx(char* pattern, char* str, int from, int to, int mem, int not)
{
int r;
#ifdef POSIX_TEST
regex_t reg;
char buf[200];
regmatch_t pmatch[20];
r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
if (r) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
if (r != 0 && r != REG_NOMATCH) {
regerror(r, &reg, buf, sizeof(buf));
fprintf(err_file, "ERROR: %s\n", buf);
nerror++;
return ;
}
if (r == REG_NOMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
nfail++;
}
}
}
regfree(&reg);
#else
regex_t* reg;
OnigErrorInfo einfo;
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + strlen(pattern)),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r, &einfo);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + strlen(str)),
(UChar* )str, (UChar* )(str + strlen(str)),
region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(err_file, "ERROR: %s\n", s);
nerror++;
return ;
}
if (r == ONIG_MISMATCH) {
if (not) {
fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
nfail++;
}
}
else {
if (not) {
fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
nfail++;
}
else {
if (region->beg[mem] == from && region->end[mem] == to) {
fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
nsucc++;
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
}
onig_free(reg);
#endif
}
static void x2(char* pattern, char* str, int from, int to)
{
xx(pattern, str, from, to, 0, 0);
}
static void x3(char* pattern, char* str, int from, int to, int mem)
{
xx(pattern, str, from, to, mem, 0);
}
static void n(char* pattern, char* str)
{
xx(pattern, str, 0, 0, 0, 1);
}
extern int main(int argc, char* argv[])
{
err_file = stdout;
#ifdef POSIX_TEST
reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
#else
region = onig_region_new();
#endif
x2("", "", 0, 0);
x2("^", "", 0, 0);
x2("$", "", 0, 0);
x2("\\G", "", 0, 0);
x2("\\A", "", 0, 0);
x2("\\Z", "", 0, 0);
x2("\\z", "", 0, 0);
x2("^$", "", 0, 0);
x2("\\ca", "\001", 0, 1);
x2("\\C-b", "\002", 0, 1);
x2("\\M-Z", "\xDA", 0, 1);
x2("", "a", 0, 0);
x2("a", "a", 0, 1);
x2("aa", "aa", 0, 2);
x2("aaa", "aaa", 0, 3);
x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
x2("ab", "ab", 0, 2);
x2("b", "ab", 1, 2);
x2("bc", "abc", 1, 3);
x2("\\17", "\017", 0, 1);
x2("\\x1f", "\x1f", 0, 1);
x2("\\xFE", "\xfe", 0, 1);
x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
x2("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
x2(".", "a", 0, 1);
n(".", "");
x2("..", "ab", 0, 2);
x2("\\w", "e", 0, 1);
n("\\W", "e");
x2("\\s", " ", 0, 1);
x2("\\S", "b", 0, 1);
x2("\\d", "4", 0, 1);
n("\\D", "4");
x2("\\b", "z ", 0, 0);
x2("\\b", " z", 1, 1);
x2("\\B", "zz ", 1, 1);
x2("\\B", "z ", 2, 2);
x2("\\B", " z", 0, 0);
x2("[ab]", "b", 0, 1);
n("[ab]", "c");
x2("[a-z]", "t", 0, 1);
n("[^a]", "a");
x2("[^a]", "\n", 0, 1);
x2("[]]", "]", 0, 1);
n("[^]]", "]");
x2("[\\^]+", "0^^1", 1, 3);
x2("[b-]", "b", 0, 1);
x2("[b-]", "-", 0, 1);
x2("[\\w]", "z", 0, 1);
n("[\\w]", " ");
x2("[\\W]", "b$", 1, 2);
x2("[\\d]", "5", 0, 1);
n("[\\d]", "e");
x2("[\\D]", "t", 0, 1);
n("[\\D]", "3");
x2("[\\s]", " ", 0, 1);
n("[\\s]", "a");
x2("[\\S]", "b", 0, 1);
n("[\\S]", " ");
x2("[\\w\\d]", "2", 0, 1);
n("[\\w\\d]", " ");
x2("[[:upper:]]", "B", 0, 1);
x2("[*[:xdigit:]+]", "+", 0, 1);
x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
x2("[*[:xdigit:]+]", "-@^+", 3, 4);
n("[[:upper]]", "A");
x2("[[:upper]]", ":", 0, 1);
x2("[\\044-\\047]", "\046", 0, 1);
x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
n("[\\x6A-\\x6D]", "\x6E");
n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply");
x2("[\\[]", "[", 0, 1);
x2("[\\]]", "]", 0, 1);
x2("[&]", "&", 0, 1);
x2("[[ab]]", "b", 0, 1);
x2("[[ab]c]", "c", 0, 1);
n("[[^a]]", "a");
n("[^[a]]", "a");
x2("[[ab]&&bc]", "b", 0, 1);
n("[[ab]&&bc]", "a");
n("[[ab]&&bc]", "c");
x2("[a-z&&b-y&&c-x]", "w", 0, 1);
n("[^a-z&&b-y&&c-x]", "w");
x2("[[^a&&a]&&a-z]", "b", 0, 1);
n("[[^a&&a]&&a-z]", "a");
x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
n("[[^a-z&&bcdef]&&[^c-g]]", "c");
x2("[^[^abc]&&[^cde]]", "c", 0, 1);
x2("[^[^abc]&&[^cde]]", "e", 0, 1);
n("[^[^abc]&&[^cde]]", "f");
x2("[a-&&-a]", "-", 0, 1);
n("[a-&&-a]", "&");
n("\\wabc", " abc");
x2("a\\Wbc", "a bc", 0, 4);
x2("a.b.c", "aabbc", 0, 5);
x2(".\\wb\\W..c", "abb bcc", 0, 7);
x2("\\s\\wzzz", " zzzz", 0, 5);
x2("aa.b", "aabb", 0, 4);
n(".a", "ab");
x2(".a", "aa", 0, 2);
x2("^a", "a", 0, 1);
x2("^a$", "a", 0, 1);
x2("^\\w$", "a", 0, 1);
n("^\\w$", " ");
x2("^\\wab$", "zab", 0, 3);
x2("^\\wabcdef$", "zabcdef", 0, 7);
x2("^\\w...def$", "zabcdef", 0, 7);
x2("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8);
x2("\\A\\Z", "", 0, 0);
x2("\\Axyz", "xyz", 0, 3);
x2("xyz\\Z", "xyz", 0, 3);
x2("xyz\\z", "xyz", 0, 3);
x2("\\Gaz", "az", 0, 2);
n("\\Gz", "bza");
n("az\\G", "az");
n("az\\A", "az");
n("a\\Az", "az");
x2("\\^\\$", "^$", 0, 2);
x2("^x?y", "xy", 0, 2);
x2("^(x?y)", "xy", 0, 2);
x2("\\w", "_", 0, 1);
n("\\W", "_");
x2("(?=z)z", "z", 0, 1);
n("(?=z).", "a");
x2("(?!z)a", "a", 0, 1);
n("(?!z)a", "z");
x2("(?i:a)", "a", 0, 1);
x2("(?i:a)", "A", 0, 1);
x2("(?i:A)", "a", 0, 1);
n("(?i:A)", "b");
x2("(?i:[A-Z])", "a", 0, 1);
x2("(?i:[f-m])", "H", 0, 1);
x2("(?i:[f-m])", "h", 0, 1);
n("(?i:[f-m])", "e");
x2("(?i:[A-c])", "D", 0, 1);
x2("(?i:[!-k])", "Z", 0, 1);
x2("(?i:[!-k])", "7", 0, 1);
x2("(?i:[T-}])", "b", 0, 1);
x2("(?i:[T-}])", "{", 0, 1);
x2("(?i:\\?a)", "?A", 0, 2);
x2("(?i:\\*A)", "*a", 0, 2);
n(".", "\n");
x2("(?m:.)", "\n", 0, 1);
x2("(?m:a.)", "a\n", 0, 2);
x2("(?m:.b)", "a\nb", 1, 3);
n("(?i)(?-i)a", "A");
n("(?i)(?-i:a)", "A");
x2("a?", "", 0, 0);
x2("a?", "b", 0, 0);
x2("a?", "a", 0, 1);
x2("a*", "", 0, 0);
x2("a*", "a", 0, 1);
x2("a*", "aaa", 0, 3);
x2("a*", "baaaa", 0, 0);
n("a+", "");
x2("a+", "a", 0, 1);
x2("a+", "aaaa", 0, 4);
x2("a+", "aabbb", 0, 2);
x2("a+", "baaaa", 1, 5);
x2(".?", "", 0, 0);
x2(".?", "f", 0, 1);
x2(".?", "\n", 0, 0);
x2(".*", "", 0, 0);
x2(".*", "abcde", 0, 5);
x2(".+", "z", 0, 1);
x2(".+", "zdswer\n", 0, 6);
x2("a|b", "a", 0, 1);
x2("a|b", "b", 0, 1);
x2("|a", "a", 0, 0);
x2("(|a)", "a", 0, 0);
x2("ab|bc", "ab", 0, 2);
x2("ab|bc", "bc", 0, 2);
x2("z(?:ab|bc)", "zbc", 0, 3);
x2("a(?:ab|bc)c", "aabc", 0, 4);
x2("ab|(?:ac|az)", "az", 0, 2);
x2("a|b|c", "dc", 1, 2);
x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
x2("a|^z", "ba", 1, 2);
x2("a|^z", "za", 0, 1);
x2("a|\\Gz", "bza", 2, 3);
x2("a|\\Gz", "za", 0, 1);
x2("a|\\Az", "bza", 2, 3);
x2("a|\\Az", "za", 0, 1);
x2("a|b\\Z", "ba", 1, 2);
x2("a|b\\Z", "b", 0, 1);
x2("a|b\\z", "ba", 1, 2);
x2("a|b\\z", "b", 0, 1);
x2("\\w|\\s", " ", 0, 1);
n("\\w|\\w", " ");
x2("\\w|%", "%", 0, 1);
x2("\\w|[&$]", "&", 0, 1);
x2("[b-d]|[^e-z]", "a", 0, 1);
x2("(?:a|[c-f])|bz", "dz", 0, 1);
x2("(?:a|[c-f])|bz", "bz", 0, 2);
x2("abc|(?=zz)..f", "zzf", 0, 3);
x2("abc|(?!zz)..f", "abf", 0, 3);
x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
n("(?>a|abd)c", "abdc");
x2("(?>abd|a)c", "abdc", 0, 4);
x2("a?|b", "a", 0, 1);
x2("a?|b", "b", 0, 0);
x2("a?|b", "", 0, 0);
x2("a*|b", "aa", 0, 2);
x2("a*|b*", "ba", 0, 0);
x2("a*|b*", "ab", 0, 1);
x2("a+|b*", "", 0, 0);
x2("a+|b*", "bbb", 0, 3);
x2("a+|b*", "abbb", 0, 1);
n("a+|b+", "");
x2("(a|b)?", "b", 0, 1);
x2("(a|b)*", "ba", 0, 2);
x2("(a|b)+", "bab", 0, 3);
x2("(ab|ca)+", "caabbc", 0, 4);
x2("(ab|ca)+", "aabca", 1, 5);
x2("(ab|ca)+", "abzca", 0, 2);
x2("(a|bab)+", "ababa", 0, 5);
x2("(a|bab)+", "ba", 1, 2);
x2("(a|bab)+", "baaaba", 1, 4);
x2("(?:a|b)(?:a|b)", "ab", 0, 2);
x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
x2("(?:a+|b+){2}", "aaabbb", 0, 6);
x2("h{0,}", "hhhh", 0, 4);
x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
n("ax{2}*a", "0axxxa1");
n("a.{0,2}a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXa0");
n("a.{0,2}?a", "0aXXXXa0");
x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
n("(?:a+|\\Ab*)cc", "abcc");
x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
x2("a|(?i)c", "C", 0, 1);
x2("(?i)c|a", "C", 0, 1);
x2("(?i)c|a", "A", 0, 1);
x2("(?i:c)|a", "C", 0, 1);
n("(?i:c)|a", "A");
x2("[abc]?", "abc", 0, 1);
x2("[abc]*", "abc", 0, 3);
x2("[^abc]*", "abc", 0, 0);
n("[^abc]+", "abc");
x2("a??", "aaa", 0, 0);
x2("ba??b", "bab", 0, 3);
x2("a*?", "aaa", 0, 0);
x2("ba*?", "baa", 0, 1);
x2("ba*?b", "baab", 0, 4);
x2("a+?", "aaa", 0, 1);
x2("ba+?", "baa", 0, 2);
x2("ba+?b", "baab", 0, 4);
x2("(?:a?)??", "a", 0, 0);
x2("(?:a??)?", "a", 0, 0);
x2("(?:a?)+?", "aaa", 0, 1);
x2("(?:a+)??", "aaa", 0, 0);
x2("(?:a+)??b", "aaab", 0, 4);
x2("(?:ab)?{2}", "", 0, 0);
x2("(?:ab)?{2}", "ababa", 0, 4);
x2("(?:ab)*{0}", "ababa", 0, 0);
x2("(?:ab){3,}", "abababab", 0, 8);
n("(?:ab){3,}", "abab");
x2("(?:ab){2,4}", "ababab", 0, 6);
x2("(?:ab){2,4}", "ababababab", 0, 8);
x2("(?:ab){2,4}?", "ababababab", 0, 4);
x2("(?:ab){,}", "ab{,}", 0, 5);
x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
x2("(d+)([^abc]z)", "dddz", 0, 4);
x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
x2("(\\w+)(\\wz)", "dddz", 0, 4);
x3("(a)", "a", 0, 1, 1);
x3("(ab)", "ab", 0, 2, 1);
x2("((ab))", "ab", 0, 2);
x3("((ab))", "ab", 0, 2, 1);
x3("((ab))", "ab", 0, 2, 2);
x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
x3("(ab)(cd)", "abcd", 0, 2, 1);
x3("(ab)(cd)", "abcd", 2, 4, 2);
x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
x2("(^a)", "a", 0, 1);
x3("(a)|(a)", "ba", 1, 2, 1);
x3("(^a)|(a)", "ba", 1, 2, 2);
x3("(a?)", "aaa", 0, 1, 1);
x3("(a*)", "aaa", 0, 3, 1);
x3("(a*)", "", 0, 0, 1);
x3("(a+)", "aaaaaaa", 0, 7, 1);
x3("(a+|b*)", "bbbaa", 0, 3, 1);
x3("(a+|b?)", "bbbaa", 0, 1, 1);
x3("(abc)?", "abc", 0, 3, 1);
x3("(abc)*", "abc", 0, 3, 1);
x3("(abc)+", "abc", 0, 3, 1);
x3("(xyz|abc)+", "abc", 0, 3, 1);
x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
x3("((?i:abc))", "AbC", 0, 3, 1);
x2("(abc)(?i:\\1)", "abcABC", 0, 6);
x3("((?m:a.c))", "a\nc", 0, 3, 1);
x3("((?=az)a)", "azb", 0, 1, 1);
x3("abc|(.abd)", "zabd", 0, 4, 1);
x2("(?:abc)|(ABC)", "abc", 0, 3);
x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
x3("a*(.)", "aaaaz", 4, 5, 1);
x3("a*?(.)", "aaaaz", 0, 1, 1);
x3("a*?(c)", "aaaac", 4, 5, 1);
x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
x3("(\\Abb)cc", "bbcc", 0, 2, 1);
n("(\\Abb)cc", "zbbcc");
x3("(^bb)cc", "bbcc", 0, 2, 1);
n("(^bb)cc", "zbbcc");
x3("cc(bb$)", "ccbb", 2, 4, 1);
n("cc(bb$)", "ccbbb");
n("(\\1)", "");
n("\\1(a)", "aa");
n("(a(b)\\1)\\2+", "ababb");
n("(?:(?:\\1|z)(a))+$", "zaa");
x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
x2("(a)(?=\\1)", "aa", 0, 1);
n("(a)$|\\1", "az");
x2("(a)\\1", "aa", 0, 2);
n("(a)\\1", "ab");
x2("(a?)\\1", "aa", 0, 2);
x2("(a??)\\1", "aa", 0, 0);
x2("(a*)\\1", "aaaaa", 0, 4);
x3("(a*)\\1", "aaaaa", 0, 2, 1);
x2("a(b*)\\1", "abbbb", 0, 5);
x2("a(b*)\\1", "ab", 0, 1);
x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
x2("([a-d])\\1", "cc", 0, 2);
x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
n("(\\w\\d\\s)\\1", "f5 f5");
x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
x2("(^a)\\1", "aa", 0, 2);
n("(^a)\\1", "baa");
n("(a$)\\1", "aa");
n("(ab\\Z)\\1", "ab");
x2("(a*\\Z)\\1", "a", 1, 1);
x2(".(a*\\Z)\\1", "ba", 1, 2);
x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
x2("((?i:az))\\1", "AzAz", 0, 4);
n("((?i:az))\\1", "Azaz");
x2("(?<=a)b", "ab", 1, 2);
n("(?<=a)b", "bb");
x2("(?<=a|b)b", "bb", 1, 2);
x2("(?<=a|bc)b", "bcb", 2, 3);
x2("(?<=a|bc)b", "ab", 1, 2);
x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
x2("(a)\\g<1>", "aa", 0, 2);
x2("(?<!a)b", "cb", 1, 2);
n("(?<!a)b", "ab");
x2("(?<!a|bc)b", "bbb", 0, 1);
n("(?<!a|bc)z", "bcz");
x2("(?<name1>a)", "a", 0, 1);
x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
x2("(?<n>|a\\g<n>)+", "", 0, 0);
x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", " fg xaaaaaaaafg x", 2, 18);
x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
x2("()*\\1", "", 0, 0);
x2("(?:()|())*\\1\\2", "", 0, 0);
x3("(?:\\1a|())*", "a", 0, 0, 1);
x2("x((.)*)*x", "0x1x2x3", 1, 6);
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
x2("", "", 0, 0);
x2("", "", 0, 2);
n("", "");
x2("うう", "うう", 0, 4);
x2("あいう", "あいう", 0, 6);
x2("こここここここここここここここここここここここここここここここここここ", "こここここここここここここここここここここここここここここここここここ", 0, 70);
x2("", "いあ", 2, 4);
x2("いう", "あいう", 2, 6);
x2("\\xca\\xb8", "\xca\xb8", 0, 2);
x2(".", "", 0, 2);
x2("..", "かき", 0, 4);
x2("\\w", "", 0, 2);
n("\\W", "");
x2("[\\W]", "う$", 2, 3);
x2("\\S", "", 0, 2);
x2("\\S", "", 0, 2);
x2("\\b", "", 0, 0);
x2("\\b", "", 1, 1);
x2("\\B", "せそ ", 2, 2);
x2("\\B", "", 3, 3);
x2("\\B", "", 0, 0);
x2("[たち]", "", 0, 2);
n("[なに]", "");
x2("[う-お]", "", 0, 2);
n("[^け]", "");
x2("[\\w]", "", 0, 2);
n("[\\d]", "");
x2("[\\D]", "", 0, 2);
n("[\\s]", "");
x2("[\\S]", "", 0, 2);
x2("[\\w\\d]", "", 0, 2);
x2("[\\w\\d]", "", 3, 5);
n("\\w鬼車", " 鬼車");
x2("\\W車", "鬼 車", 0, 5);
x2("あ.い.う", "ああいいう", 0, 10);
x2(".\\wう\\W..ぞ", "えうう うぞぞ", 0, 13);
x2("\\s\\wこここ", " ここここ", 0, 9);
x2("ああ.け", "ああけけ", 0, 8);
n(".い", "いえ");
x2(".お", "おお", 0, 4);
x2("^あ", "", 0, 2);
x2("^む$", "", 0, 2);
x2("^\\w$", "", 0, 2);
x2("^\\wかきくけこ$", "zかきくけこ", 0, 11);
x2("^\\w...うえお$", "zあいううえお", 0, 13);
x2("\\w\\w\\s\\Wおおお\\d", "aお おおお4", 0, 12);
x2("\\Aたちつ", "たちつ", 0, 6);
x2("むめも\\Z", "むめも", 0, 6);
x2("かきく\\z", "かきく", 0, 6);
x2("かきく\\Z", "かきく\n", 0, 6);
x2("\\Gぽぴ", "ぽぴ", 0, 4);
n("\\Gえ", "うえお");
n("とて\\G", "とて");
n("まみ\\A", "まみ");
n("\\Aみ", "まみ");
x2("(?=せ)せ", "", 0, 2);
n("(?=う).", "");
x2("(?!う)か", "", 0, 2);
n("(?!と)あ", "");
x2("(?i:あ)", "", 0, 2);
x2("(?i:ぶべ)", "ぶべ", 0, 4);
n("(?i:い)", "");
x2("(?m:よ.)", "\n", 0, 3);
x2("(?m:.め)", "\n", 2, 5);
x2("あ?", "", 0, 0);
x2("変?", "", 0, 0);
x2("変?", "", 0, 2);
x2("量*", "", 0, 0);
x2("量*", "", 0, 2);
x2("子*", "子子子", 0, 6);
x2("馬*", "鹿馬馬馬馬", 0, 0);
n("山+", "");
x2("河+", "", 0, 2);
x2("時+", "時時時時", 0, 8);
x2("え+", "ええううう", 0, 4);
x2("う+", "おうううう", 2, 10);
x2(".?", "", 0, 2);
x2(".*", "ぱぴぷぺ", 0, 8);
x2(".+", "", 0, 2);
x2(".+", "いうえか\n", 0, 8);
x2("あ|い", "", 0, 2);
x2("あ|い", "", 0, 2);
x2("あい|いう", "あい", 0, 4);
x2("あい|いう", "いう", 0, 4);
x2("を(?:かき|きく)", "をかき", 0, 6);
x2("を(?:かき|きく)け", "をきくけ", 0, 8);
x2("あい|(?:あう|あを)", "あを", 0, 4);
x2("あ|い|う", "えう", 2, 4);
x2("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "しすせ", 0, 6);
n("あ|い|うえ|おかき|く|けこさ|しすせ|そ|たち|つてとなに|ぬね", "すせ");
x2("あ|^わ", "ぶあ", 2, 4);
x2("あ|^を", "をあ", 0, 2);
x2("鬼|\\G車", "け車鬼", 4, 6);
x2("鬼|\\G車", "車鬼", 0, 2);
x2("鬼|\\A車", "b車鬼", 3, 5);
x2("鬼|\\A車", "", 0, 2);
x2("鬼|車\\Z", "車鬼", 2, 4);
x2("鬼|車\\Z", "", 0, 2);
x2("鬼|車\\Z", "\n", 0, 2);
x2("鬼|車\\z", "車鬼", 2, 4);
x2("鬼|車\\z", "", 0, 2);
x2("\\w|\\s", "", 0, 2);
x2("\\w|%", "%お", 0, 1);
x2("\\w|[&$]", "う&", 0, 2);
x2("[い-け]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[い-け]|[^か-こ]", "", 0, 2);
x2("[^あ]", "\n", 0, 1);
x2("(?:あ|[う-き])|いを", "うを", 0, 2);
x2("(?:あ|[う-き])|いを", "いを", 0, 4);
x2("あいう|(?=けけ)..ほ", "けけほ", 0, 6);
x2("あいう|(?!けけ)..ほ", "あいほ", 0, 6);
x2("(?=をあ)..あ|(?=をを)..あ", "ををあ", 0, 6);
x2("(?<=あ|いう)い", "いうい", 4, 6);
n("(?>あ|あいえ)う", "あいえう");
x2("(?>あいえ|あ)う", "あいえう", 0, 8);
x2("あ?|い", "", 0, 2);
x2("あ?|い", "", 0, 0);
x2("あ?|い", "", 0, 0);
x2("あ*|い", "ああ", 0, 4);
x2("あ*|い*", "いあ", 0, 0);
x2("あ*|い*", "あい", 0, 2);
x2("[aあ]*|い*", "aあいいい", 0, 3);
x2("あ+|い*", "", 0, 0);
x2("あ+|い*", "いいい", 0, 6);
x2("あ+|い*", "あいいい", 0, 2);
x2("あ+|い*", "aあいいい", 0, 0);
n("あ+|い+", "");
x2("(あ|い)?", "", 0, 2);
x2("(あ|い)*", "いあ", 0, 4);
x2("(あ|い)+", "いあい", 0, 6);
x2("(あい|うあ)+", "うああいうえ", 0, 8);
x2("(あい|うえ)+", "うああいうえ", 4, 12);
x2("(あい|うあ)+", "ああいうあ", 2, 10);
x2("(あい|うあ)+", "あいをうあ", 0, 4);
x2("(あい|うあ)+", "$$zzzzあいをうあ", 6, 10);
x2("(あ|いあい)+", "あいあいあ", 0, 10);
x2("(あ|いあい)+", "いあ", 2, 4);
x2("(あ|いあい)+", "いあああいあ", 2, 8);
x2("(?:あ|い)(?:あ|い)", "あい", 0, 4);
x2("(?:あ*|い*)(?:あ*|い*)", "あああいいい", 0, 6);
x2("(?:あ*|い*)(?:あ+|い+)", "あああいいい", 0, 12);
x2("(?:あ+|い+){2}", "あああいいい", 0, 12);
x2("(?:あ+|い+){1,2}", "あああいいい", 0, 12);
x2("(?:あ+|\\Aい*)うう", "うう", 0, 4);
n("(?:あ+|\\Aい*)うう", "あいうう");
x2("(?:^あ+|い+)*う", "ああいいいあいう", 12, 16);
x2("(?:^あ+|い+)*う", "ああいいいいう", 0, 14);
x2("う{0,}", "うううう", 0, 8);
x2("あ|(?i)c", "C", 0, 1);
x2("(?i)c|あ", "C", 0, 1);
x2("(?i:あ)|a", "a", 0, 1);
n("(?i:あ)|a", "A");
x2("[あいう]?", "あいう", 0, 2);
x2("[あいう]*", "あいう", 0, 6);
x2("[^あいう]*", "あいう", 0, 0);
n("[^あいう]+", "あいう");
x2("あ??", "あああ", 0, 0);
x2("いあ??い", "いあい", 0, 6);
x2("あ*?", "あああ", 0, 0);
x2("いあ*?", "いああ", 0, 2);
x2("いあ*?い", "いああい", 0, 8);
x2("あ+?", "あああ", 0, 2);
x2("いあ+?", "いああ", 0, 4);
x2("いあ+?い", "いああい", 0, 8);
x2("(?:天?)??", "", 0, 0);
x2("(?:天??)?", "", 0, 0);
x2("(?:夢?)+?", "夢夢夢", 0, 2);
x2("(?:風+)??", "風風風", 0, 0);
x2("(?:雪+)??霜", "雪雪雪霜", 0, 8);
x2("(?:あい)?{2}", "", 0, 0);
x2("(?:鬼車)?{2}", "鬼車鬼車鬼", 0, 8);
x2("(?:鬼車)*{0}", "鬼車鬼車鬼", 0, 0);
x2("(?:鬼車){3,}", "鬼車鬼車鬼車鬼車", 0, 16);
n("(?:鬼車){3,}", "鬼車鬼車");
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車", 0, 12);
x2("(?:鬼車){2,4}", "鬼車鬼車鬼車鬼車鬼車", 0, 16);
x2("(?:鬼車){2,4}?", "鬼車鬼車鬼車鬼車鬼車", 0, 8);
x2("(?:鬼車){,}", "鬼車{,}", 0, 7);
x2("(?:かきく)+?{2}", "かきくかきくかきく", 0, 12);
x3("(火)", "", 0, 2, 1);
x3("(火水)", "火水", 0, 4, 1);
x2("((時間))", "時間", 0, 4);
x3("((風水))", "風水", 0, 4, 1);
x3("((昨日))", "昨日", 0, 4, 2);
x3("((((((((((((((((((((量子))))))))))))))))))))", "量子", 0, 4, 20);
x3("(あい)(うえ)", "あいうえ", 0, 4, 1);
x3("(あい)(うえ)", "あいうえ", 4, 8, 2);
x3("()(あ)いう(えおか)きくけこ", "あいうえおかきくけこ", 6, 12, 3);
x3("(()(あ)いう(えおか)きくけこ)", "あいうえおかきくけこ", 6, 12, 4);
x3(".*(フォ)ン・マ(ン()シュタ)イン", "フォン・マンシュタイン", 10, 18, 2);
x2("(^あ)", "", 0, 2);
x3("(あ)|(あ)", "いあ", 2, 4, 1);
x3("(^あ)|(あ)", "いあ", 2, 4, 2);
x3("(あ?)", "あああ", 0, 2, 1);
x3("(ま*)", "ままま", 0, 6, 1);
x3("(と*)", "", 0, 0, 1);
x3("(る+)", "るるるるるるる", 0, 14, 1);
x3("(ふ+|へ*)", "ふふふへへ", 0, 6, 1);
x3("(あ+|い?)", "いいいああ", 0, 2, 1);
x3("(あいう)?", "あいう", 0, 6, 1);
x3("(あいう)*", "あいう", 0, 6, 1);
x3("(あいう)+", "あいう", 0, 6, 1);
x3("(さしす|あいう)+", "あいう", 0, 6, 1);
x3("([なにぬ][かきく]|かきく)+", "かきく", 0, 6, 1);
x3("((?i:あいう))", "あいう", 0, 6, 1);
x3("((?m:あ.う))", "\n", 0, 5, 1);
x3("((?=あん)あ)", "あんい", 0, 2, 1);
x3("あいう|(.あいえ)", "んあいえ", 0, 8, 1);
x3("あ*(.)", "ああああん", 8, 10, 1);
x3("あ*?(.)", "ああああん", 0, 2, 1);
x3("あ*?(ん)", "ああああん", 8, 10, 1);
x3("[いうえ]あ*(.)", "えああああん", 10, 12, 1);
x3("(\\Aいい)うう", "いいうう", 0, 4, 1);
n("(\\Aいい)うう", "んいいうう");
x3("(^いい)うう", "いいうう", 0, 4, 1);
n("(^いい)うう", "んいいうう");
x3("ろろ(るる$)", "ろろるる", 4, 8, 1);
n("ろろ(るる$)", "ろろるるる");
x2("(無)\\1", "無無", 0, 4);
n("(無)\\1", "無武");
x2("(空?)\\1", "空空", 0, 4);
x2("(空??)\\1", "空空", 0, 0);
x2("(空*)\\1", "空空空空空", 0, 8);
x3("(空*)\\1", "空空空空空", 0, 4, 1);
x2("あ(い*)\\1", "あいいいい", 0, 10);
x2("あ(い*)\\1", "あい", 0, 2);
x2("(あ*)(い*)\\1\\2", "あああいいあああいい", 0, 20);
x2("(あ*)(い*)\\2", "あああいいいい", 0, 14);
x3("(あ*)(い*)\\2", "あああいいいい", 6, 10, 2);
x2("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 16);
x3("(((((((ぽ*)ぺ))))))ぴ\\7", "ぽぽぽぺぴぽぽぽ", 0, 6, 7);
x2("(は)(ひ)(ふ)\\2\\1\\3", "はひふひはふ", 0, 12);
x2("([き-け])\\1", "くく", 0, 4);
x2("(\\w\\d\\s)\\1", "あ5 あ5 ", 0, 8);
n("(\\w\\d\\s)\\1", "あ5 あ5");
x2("(誰?|[あ-う]{3})\\1", "誰?誰?", 0, 8);
x2("...(誰?|[あ-う]{3})\\1", "あaあ誰", 0, 13);
x2("(誰?|[あ-う]{3})\\1", "ういうういう", 0, 12);
x2("(^こ)\\1", "ここ", 0, 4);
n("(^む)\\1", "めむむ");
n("(あ$)\\1", "ああ");
n("(あい\\Z)\\1", "あい");
x2("(あ*\\Z)\\1", "", 2, 2);
x2(".(あ*\\Z)\\1", "いあ", 2, 4);
x3("(.(やいゆ)\\2)", "zやいゆやいゆ", 0, 13, 1);
x3("(.(..\\d.)\\2)", "あ12341234", 0, 10, 1);
x2("((?i:あvず))\\1", "あvずあvず", 0, 10);
x2("(?<愚か>変|\\(\\g<愚か>\\))", "((((((変))))))", 0, 14);
x2("\\A(?:\\g<阿_1>|\\g<云_2>|\\z終了 (?<阿_1>観|自\\g<云_2>自)(?<云_2>在|菩薩\\g<阿_1>菩薩))$", "菩薩自菩薩自在自菩薩自菩薩", 0, 26);
x2("[[ひふ]]", "", 0, 2);
x2("[[いおう]か]", "", 0, 2);
n("[[^あ]]", "");
n("[^[あ]]", "");
x2("[^[^あ]]", "", 0, 2);
x2("[[かきく]&&きく]", "", 0, 2);
n("[[かきく]&&きく]", "");
n("[[かきく]&&きく]", "");
x2("[あ-ん&&い-を&&う-ゑ]", "", 0, 2);
n("[^あ-ん&&い-を&&う-ゑ]", "");
x2("[[^あ&&あ]&&あ-ん]", "", 0, 2);
n("[[^あ&&あ]&&あ-ん]", "");
x2("[[^あ-ん&&いうえお]&&[^う-か]]", "", 0, 2);
n("[[^あ-ん&&いうえお]&&[^う-か]]", "");
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
x2("[^[^あいう]&&[^うえお]]", "", 0, 2);
n("[^[^あいう]&&[^うえお]]", "");
x2("[あ-&&-あ]", "-", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]q-w]", "", 0, 2);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "f", 0, 1);
x2("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "g", 0, 1);
n("[^[^a-zあいう]&&[^bcdefgうえお]g-w]", "2");
fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d\n",
nsucc, nfail, nerror);
#ifndef POSIX_TEST
onig_region_free(region, 1);
onig_end();
#endif
return 0;
}

View File

@ -124,6 +124,22 @@ php_mb_regex_enc_name_map_t enc_name_map[] ={
"UTF-8\0UTF8\0",
ONIG_ENCODING_UTF8
},
{
"UTF-16\0UTF-16BE\0",
ONIG_ENCODING_UTF16_BE
},
{
"UTF-16LE\0",
ONIG_ENCODING_UTF16_LE
},
{
"UCS-4\0UTF-32\0UTF-32BE\0",
ONIG_ENCODING_UTF32_BE
},
{
"UCS-4LE\0UTF-32LE\0",
ONIG_ENCODING_UTF32_LE
},
{
"SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
ONIG_ENCODING_SJIS

View File

@ -0,0 +1,11 @@
--TEST--
Bug #31911 (mb_decode_mimeheader() is case-sensitive to hex escapes)
--FILE--
<?php
echo mb_decode_mimeheader("Works: =?iso-8859-1?q?=3F=3F=3F?=");
echo "\n";
echo mb_decode_mimeheader("Fails: =?iso-8859-1?q?=3f=3f=3f?=")
?>
--EXPECT--
Works: ???
Fails: ???