mirror of
https://github.com/php/php-src.git
synced 2024-12-01 13:54:10 +08:00
updated bundled oniguruma from 4.4.0 to 4.7.1
This commit is contained in:
parent
64cf2c2f50
commit
7aab46a2f1
@ -1,5 +1,98 @@
|
||||
History
|
||||
|
||||
2007/08/16: Version 4.7.1
|
||||
|
||||
2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
|
||||
2007/07/04: [spec] (thanks K.Takata)
|
||||
ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl)
|
||||
2007/07/04: [dist] (thanks K.Takata)
|
||||
fix documents API and API.ja.
|
||||
|
||||
2007/06/18: Version 4.7.0
|
||||
|
||||
2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
|
||||
2007/06/18: [bug] (thanks KUBO Takehiro)
|
||||
WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint).
|
||||
2007/06/05: [impl] add #ifndef vsnprintf in regint.h.
|
||||
2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case
|
||||
in onig_search().
|
||||
|
||||
2007/04/12: Version 4.6.2
|
||||
|
||||
2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000
|
||||
to 0x4000.
|
||||
2007/03/26: [impl] add 'void' to function declarations.
|
||||
|
||||
2007/03/06: Version 4.6.1
|
||||
|
||||
2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2007/03/06: [bug] add #include <malloc.h> for bcc32.
|
||||
(In bcc32, alloca() is declared in malloc.h.)
|
||||
2007/03/06: [impl] remove including version.h of Ruby.
|
||||
2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search().
|
||||
ex. /\n\Z/.match("aaaaaaaaaa\n")
|
||||
2007/03/02: [impl] move range > start check position in end_buf process.
|
||||
|
||||
2007/02/08: Version 4.6.0
|
||||
|
||||
2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2007/01/09: [tune] select_opt_exact_info() didn't work for empty info.
|
||||
ex. /.a/ make MAP info instead of EXACT info.
|
||||
2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode.
|
||||
2006/12/22: [spec] should check too short multibyte char in parse_exp().
|
||||
add USE_PAD_TO_SHORT_BYTE_CHAR.
|
||||
ex. /\x00/ in UTF16 should be error.
|
||||
|
||||
2006/11/17: Version 4.5.1
|
||||
|
||||
2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2006/11/15: [impl] remove CHECK_INTERRUPT.
|
||||
2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e
|
||||
should be [:punct:].
|
||||
2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER.
|
||||
2006/11/07: [bug] (thanks Byte)
|
||||
add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R.
|
||||
|
||||
2006/11/06: Version 4.5.0
|
||||
|
||||
2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND.
|
||||
2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of
|
||||
the string range.
|
||||
add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
|
||||
|
||||
2006/10/30: Version 4.4.6
|
||||
|
||||
2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2006/10/30: [impl] (thanks K.Takata)
|
||||
add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END.
|
||||
2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner)
|
||||
invalid offset value was used in STATE_CHECK_BUFF_INIT().
|
||||
|
||||
2006/10/24: Version 4.4.5
|
||||
|
||||
2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
|
||||
2006/10/24: [impl] escape -Wall warning.
|
||||
2006/10/24: [tune] (thanks Kornelius Kalnbach)
|
||||
String#scan for long string needs long time compare with
|
||||
old Ruby
|
||||
by initialization time for combination explosion check
|
||||
ex. ("test " * 100_000).scan(/\w*\s?/)
|
||||
change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000.
|
||||
reduce initialization area of state_check_buff.
|
||||
2006/10/16: [bug] (thanks Akinori Musha)
|
||||
first argument of rb_warn() should be format string.
|
||||
2006/10/10: [impl] add msa.state_check_buff_size initialization
|
||||
in onig_search().
|
||||
2006/10/10: [bug] should call onig_st_free_table() in
|
||||
onig_free_shared_cclass_table().
|
||||
2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB.
|
||||
2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT().
|
||||
make valgrind happy.
|
||||
2006/09/22: [impl] convert to ascii for parameter string in
|
||||
onig_error_code_to_str().
|
||||
add enc member into OnigErrorInfo.
|
||||
|
||||
2006/09/19: Version 4.4.4
|
||||
|
||||
2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
|
||||
@ -1717,15 +1810,6 @@ History
|
||||
[test: test]
|
||||
[memo: memo]
|
||||
--
|
||||
<branch>
|
||||
svn mkdir http://localhost/repos/branches -m ""
|
||||
svn mkdir http://localhost/repos/branches/oniguruma -m ""
|
||||
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only"
|
||||
|
||||
<create tag>
|
||||
svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
|
||||
|
||||
|
||||
<CVS: show all tags>
|
||||
cvs history -T
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
README 2006/05/15
|
||||
README 2007/06/18
|
||||
|
||||
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
|
||||
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
|
||||
|
||||
Oniguruma is a regular expressions library.
|
||||
@ -183,18 +182,8 @@ API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
|
||||
+ re_alloc_pattern() is added.
|
||||
|
||||
|
||||
ToDo
|
||||
|
||||
? ignore case in full code point range of Unicode.
|
||||
? Unicode Property.
|
||||
? ambig-flag Katakana <-> Hiragana.
|
||||
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
|
||||
?? \X (== \PM\pM*)
|
||||
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
|
||||
?? variable line separator.
|
||||
?? transmission stopper. (return ONIG_STOP from match_at())
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
@ -1,9 +1,8 @@
|
||||
README.ja 2006/05/15
|
||||
README.ja 2007/06/18
|
||||
|
||||
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
|
||||
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
|
||||
|
||||
鬼車は正規表現ライブラリである。
|
||||
@ -187,18 +186,7 @@ Ruby 1.8/1.6
|
||||
+ re_alloc_pattern() が追加された。
|
||||
|
||||
|
||||
残件
|
||||
|
||||
? Unicode全コードポイント領域での大文字小文字照合
|
||||
? Unicodeプロパティ
|
||||
? ambig-flag Katakana <-> Hiragana
|
||||
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
|
||||
?? \X (== \PM\pM*)
|
||||
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
|
||||
?? 改行文字(文字列)を変更できる
|
||||
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
アドレス: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
@ -1,4 +1,4 @@
|
||||
Oniguruma API Version 4.1.0 2006/05/15
|
||||
Oniguruma API Version 4.7.1 2007/07/04
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
@ -63,7 +63,7 @@ Oniguruma API Version 4.1.0 2006/05/15
|
||||
4 option: compile time options.
|
||||
|
||||
ONIG_OPTION_NONE no option
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
|
||||
ONIG_OPTION_MULTILINE '.' match with newline
|
||||
ONIG_OPTION_IGNORECASE ambiguity match on
|
||||
ONIG_OPTION_EXTEND extended pattern form
|
||||
@ -159,7 +159,6 @@ Oniguruma API Version 4.1.0 2006/05/15
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONE: exact
|
||||
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ignore case for ASCII
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND: grapheme cluster as a char
|
||||
ONIGENC_AMBIGUOUS_MATCH_FULL: all ambiguity on
|
||||
ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
|
||||
onig_set_default_ambig_flag()
|
||||
|
@ -1,4 +1,4 @@
|
||||
鬼車インターフェース Version 4.1.0 2006/05/15
|
||||
鬼車インターフェース Version 4.7.1 2007/07/04
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
@ -64,7 +64,7 @@
|
||||
4 option: 正規表現コンパイル時オプション
|
||||
|
||||
ONIG_OPTION_NONE オプションなし
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\z', '\Z' -> '\z'
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
|
||||
ONIG_OPTION_MULTILINE '.'が改行にマッチする
|
||||
ONIG_OPTION_IGNORECASE 曖昧マッチ オン
|
||||
ONIG_OPTION_EXTEND パターン拡張形式
|
||||
@ -159,7 +159,6 @@
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONE: 曖昧無し
|
||||
ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE: ASCIIの大文字小文字
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII以外の大文字小文字
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND: 合成文字
|
||||
ONIGENC_AMBIGUOUS_MATCH_FULL: 全ての曖昧フラグ有効
|
||||
ONIGENC_AMBIGUOUS_MATCH_DEFAULT: (ASCII | NONASCII)
|
||||
onig_set_default_ambig_flag()
|
||||
|
@ -1,4 +1,4 @@
|
||||
FAQ 2006/05/15
|
||||
FAQ 2006/10/30
|
||||
|
||||
1. Lognest match
|
||||
|
||||
@ -19,6 +19,10 @@ FAQ 2006/05/15
|
||||
THREAD_ATOMIC_END
|
||||
THREAD_PASS
|
||||
|
||||
THREAD_SYSTEM_INIT
|
||||
THREAD_SYSTEM_END
|
||||
|
||||
|
||||
(B) Application Layer
|
||||
|
||||
The plural threads should not do simultaneously that making
|
||||
|
@ -1,4 +1,4 @@
|
||||
FAQ 2006/05/15
|
||||
FAQ 2006/10/30
|
||||
|
||||
1. 最長マッチ
|
||||
|
||||
@ -20,6 +20,11 @@ FAQ 2006/05/15
|
||||
THREAD_ATOMIC_END
|
||||
THREAD_PASS
|
||||
|
||||
何らかの初期化/終了処理が必要であれば、以下のマクロに定義する。
|
||||
THREAD_SYSTEM_INIT
|
||||
THREAD_SYSTEM_END
|
||||
|
||||
|
||||
(B) Application Layer
|
||||
|
||||
同時に複数のスレッドが、正規表現オブジェクトを作成する、
|
||||
@ -85,6 +90,8 @@ Ruby
|
||||
なります。
|
||||
|
||||
#define USE_MULTI_THREAD_SYSTEM
|
||||
#define THREAD_SYSTEM_INIT
|
||||
#define THREAD_SYSTEM_END
|
||||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_1.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_1_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -72,16 +72,6 @@ iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* e
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -101,22 +91,6 @@ iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -153,8 +127,7 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_10.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_10_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -109,16 +109,6 @@ iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -138,22 +128,6 @@ iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -302,8 +276,7 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_11_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_13.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_13_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_13_CtypeTable[256] = {
|
||||
};
|
||||
|
||||
static int
|
||||
iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -133,27 +123,10 @@ iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -176,7 +149,7 @@ iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
|
||||
@ -185,74 +158,73 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
|
||||
{
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde }
|
||||
};
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde }
|
||||
};
|
||||
|
||||
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
|
||||
*ccs = OnigAsciiPairAmbigCodes;
|
||||
@ -272,8 +244,7 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
@ -286,11 +257,11 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
iso_8859_13_mbc_to_normalize,
|
||||
iso_8859_13_is_mbc_ambiguous,
|
||||
iso_8859_13_get_all_pair_ambig_codes,
|
||||
mbc_to_normalize,
|
||||
is_mbc_ambiguous,
|
||||
get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
iso_8859_13_is_code_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_14.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_14_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_14_CtypeTable[256] = {
|
||||
};
|
||||
|
||||
static int
|
||||
iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -133,27 +123,10 @@ iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -176,7 +149,7 @@ iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
|
||||
@ -185,103 +158,102 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
|
||||
{
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa2, 0xa1 },
|
||||
{ 0xa4, 0xa5 },
|
||||
{ 0xa5, 0xa4 },
|
||||
{ 0xa6, 0xab },
|
||||
{ 0xa8, 0xb8 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xa6 },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xaf, 0xff },
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa2, 0xa1 },
|
||||
{ 0xa4, 0xa5 },
|
||||
{ 0xa5, 0xa4 },
|
||||
{ 0xa6, 0xab },
|
||||
{ 0xa8, 0xb8 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xa6 },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xaf, 0xff },
|
||||
|
||||
{ 0xb0, 0xb1 },
|
||||
{ 0xb1, 0xb0 },
|
||||
{ 0xb2, 0xb3 },
|
||||
{ 0xb3, 0xb2 },
|
||||
{ 0xb4, 0xb5 },
|
||||
{ 0xb5, 0xb4 },
|
||||
{ 0xb7, 0xb9 },
|
||||
{ 0xb8, 0xa8 },
|
||||
{ 0xb9, 0xb7 },
|
||||
{ 0xba, 0xaa },
|
||||
{ 0xbb, 0xbf },
|
||||
{ 0xbc, 0xac },
|
||||
{ 0xbd, 0xbe },
|
||||
{ 0xbe, 0xbd },
|
||||
{ 0xbf, 0xbb },
|
||||
{ 0xb0, 0xb1 },
|
||||
{ 0xb1, 0xb0 },
|
||||
{ 0xb2, 0xb3 },
|
||||
{ 0xb3, 0xb2 },
|
||||
{ 0xb4, 0xb5 },
|
||||
{ 0xb5, 0xb4 },
|
||||
{ 0xb7, 0xb9 },
|
||||
{ 0xb8, 0xa8 },
|
||||
{ 0xb9, 0xb7 },
|
||||
{ 0xba, 0xaa },
|
||||
{ 0xbb, 0xbf },
|
||||
{ 0xbc, 0xac },
|
||||
{ 0xbd, 0xbe },
|
||||
{ 0xbe, 0xbd },
|
||||
{ 0xbf, 0xbb },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xaf }
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xaf }
|
||||
};
|
||||
|
||||
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
|
||||
@ -302,8 +274,7 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
@ -316,11 +287,11 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
iso_8859_14_mbc_to_normalize,
|
||||
iso_8859_14_is_mbc_ambiguous,
|
||||
iso_8859_14_get_all_pair_ambig_codes,
|
||||
mbc_to_normalize,
|
||||
is_mbc_ambiguous,
|
||||
get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
iso_8859_14_is_code_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_15.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_15_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_15_CtypeTable[256] = {
|
||||
};
|
||||
|
||||
static int
|
||||
iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -133,27 +123,10 @@ iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -176,7 +149,7 @@ iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
|
||||
@ -185,8 +158,8 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
{
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xa6, 0xa8 },
|
||||
@ -282,8 +255,7 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
@ -296,11 +268,11 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
iso_8859_15_mbc_to_normalize,
|
||||
iso_8859_15_is_mbc_ambiguous,
|
||||
iso_8859_15_get_all_pair_ambig_codes,
|
||||
mbc_to_normalize,
|
||||
is_mbc_ambiguous,
|
||||
get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
iso_8859_15_is_code_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_16.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_16_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -104,21 +104,11 @@ static const unsigned short EncISO_8859_16_CtypeTable[256] = {
|
||||
};
|
||||
|
||||
static int
|
||||
iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
mbc_to_normalize(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -133,27 +123,10 @@ iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -176,7 +149,7 @@ iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
|
||||
@ -185,97 +158,96 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
}
|
||||
|
||||
static int
|
||||
iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
|
||||
{
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa2, 0xa1 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa6, 0xa8 },
|
||||
{ 0xa8, 0xa6 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xac, 0xae },
|
||||
{ 0xae, 0xac },
|
||||
{ 0xaf, 0xbf },
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa2, 0xa1 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa6, 0xa8 },
|
||||
{ 0xa8, 0xa6 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xac, 0xae },
|
||||
{ 0xae, 0xac },
|
||||
{ 0xaf, 0xbf },
|
||||
|
||||
{ 0xb2, 0xb9 },
|
||||
{ 0xb3, 0xa3 },
|
||||
{ 0xb4, 0xb8 },
|
||||
{ 0xb8, 0xb4 },
|
||||
{ 0xb9, 0xb2 },
|
||||
{ 0xba, 0xaa },
|
||||
{ 0xbc, 0xbd },
|
||||
{ 0xbd, 0xbc },
|
||||
{ 0xbe, 0xff },
|
||||
{ 0xbf, 0xaf },
|
||||
{ 0xb2, 0xb9 },
|
||||
{ 0xb3, 0xa3 },
|
||||
{ 0xb4, 0xb8 },
|
||||
{ 0xb8, 0xb4 },
|
||||
{ 0xb9, 0xb2 },
|
||||
{ 0xba, 0xaa },
|
||||
{ 0xbc, 0xbd },
|
||||
{ 0xbd, 0xbc },
|
||||
{ 0xbe, 0xff },
|
||||
{ 0xbf, 0xaf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xbe }
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xbe }
|
||||
};
|
||||
|
||||
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
|
||||
@ -296,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
@ -310,11 +281,11 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
iso_8859_16_mbc_to_normalize,
|
||||
iso_8859_16_is_mbc_ambiguous,
|
||||
iso_8859_16_get_all_pair_ambig_codes,
|
||||
mbc_to_normalize,
|
||||
is_mbc_ambiguous,
|
||||
get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
iso_8859_16_is_code_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_2.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_2_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -109,16 +109,6 @@ iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -138,22 +128,6 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -294,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_3.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_3_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -109,16 +109,6 @@ iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -138,22 +128,6 @@ iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -283,8 +257,7 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_4.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_4_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -109,16 +109,6 @@ iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -138,22 +128,6 @@ iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -292,8 +266,7 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_5_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_6_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_7_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -37,18 +37,18 @@ static const unsigned short EncISO_8859_8_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -2,7 +2,7 @@
|
||||
iso8859_9.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncISO_8859_9_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
@ -109,16 +109,6 @@ iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
*lower = 0xdf;
|
||||
(*pp) += 2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -138,22 +128,6 @@ iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 1) {
|
||||
if ((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S'))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p == 0xdf) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -272,8 +246,7 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -73,18 +73,18 @@ static const unsigned short EncKOI8_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
|
@ -2,7 +2,7 @@
|
||||
koi8_r.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -73,18 +73,18 @@ static const unsigned short EncKOI8_R_CtypeTable[256] = {
|
||||
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
|
||||
0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
|
||||
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
|
||||
0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
|
||||
0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
@ -153,6 +153,9 @@ koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
|
||||
const OnigPairAmbigCodes** ccs)
|
||||
{
|
||||
static const OnigPairAmbigCodes cc[] = {
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xb3, 0xa3 },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
|
@ -2,7 +2,7 @@
|
||||
mktable.c
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -27,6 +27,7 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define NOT_RUBY
|
||||
@ -614,15 +615,10 @@ static int IsPunct(int enc, int c)
|
||||
if (c >= 0x3c && c <= 0x3e) return 1;
|
||||
}
|
||||
|
||||
if (c >= 0x21 && c <= 0x23) return 1;
|
||||
if (c >= 0x25 && c <= 0x2a) return 1;
|
||||
if (c >= 0x2c && c <= 0x2f) return 1;
|
||||
if (c >= 0x3a && c <= 0x3b) return 1;
|
||||
if (c >= 0x3f && c <= 0x40) return 1;
|
||||
if (c >= 0x5b && c <= 0x5d) return 1;
|
||||
if (c == 0x5f) return 1;
|
||||
if (c == 0x7b) return 1;
|
||||
if (c == 0x7d) return 1;
|
||||
if (c >= 0x21 && c <= 0x2f) return 1;
|
||||
if (c >= 0x3a && c <= 0x40) return 1;
|
||||
if (c >= 0x5b && c <= 0x60) return 1;
|
||||
if (c >= 0x7b && c <= 0x7e) return 1;
|
||||
|
||||
switch (enc) {
|
||||
case ISO_8859_1:
|
||||
|
@ -2,7 +2,7 @@
|
||||
utf16_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -126,18 +126,6 @@ utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
|
||||
if (*p == 0) {
|
||||
p++;
|
||||
if (end > p + 2 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+1) == 0) {
|
||||
*lower++ = '\0';
|
||||
*lower = 0xdf;
|
||||
(*pp) += 4;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower++ = '\0';
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -177,20 +165,6 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
int c, v;
|
||||
|
||||
p++;
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 2 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+1) == 0) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -234,8 +208,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
utf16_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -128,18 +128,6 @@ utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*(p+1) == 0) {
|
||||
if (end > p + 3 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+3) == 0) {
|
||||
*lower++ = 0xdf;
|
||||
*lower = '\0';
|
||||
(*pp) += 4;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*(lower+1) = '\0';
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
@ -176,17 +164,6 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
if (*(p+1) == 0) {
|
||||
int c, v;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 3 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+3) == 0) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -229,8 +206,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
utf32_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -85,20 +85,6 @@ utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
|
||||
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
|
||||
p += 3;
|
||||
if (end > p + 4 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower = 0xdf;
|
||||
(*pp) += 8;
|
||||
return 4;
|
||||
}
|
||||
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
@ -139,20 +125,6 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
int c, v;
|
||||
|
||||
p += 3;
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 4 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
|
||||
(*pp) += 4;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -191,8 +163,7 @@ OnigEncodingType OnigEncodingUTF32_BE = {
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
utf32_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -84,20 +84,6 @@ utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
if (end > p + 7 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
|
||||
*lower++ = 0xdf;
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower = '\0';
|
||||
(*pp) += 8;
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -137,20 +123,6 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
int c, v;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 7 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
|
||||
(*pp) += 4;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
@ -189,8 +161,7 @@ OnigEncodingType OnigEncodingUTF32_LE = {
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -2,7 +2,7 @@
|
||||
utf8.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -223,17 +223,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
if (end > p + 1 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S')))) {
|
||||
*lower++ = '\303';
|
||||
*lower = '\237';
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
}
|
||||
@ -258,15 +247,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c == (UChar )'\237' &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
*lower++ = '\303';
|
||||
*lower = '\237';
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -288,15 +268,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
if (end > p + 1 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+1) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+1) == 'S')))) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
@ -318,10 +289,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
else if (c == (UChar )'\237' &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3739,8 +3706,7 @@ OnigEncodingType OnigEncodingUTF8 = {
|
||||
6, /* max byte length */
|
||||
1, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
|
@ -5,53 +5,50 @@
|
||||
</head>
|
||||
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
|
||||
|
||||
<h1>Oniguruma</h1>
|
||||
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
|
||||
|
||||
<p>
|
||||
2006/09/19 (C) K.Kosako
|
||||
(c) K.Kosako, updated at: 2007/08/16
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Oniguruma is a regular expressions library.<br>
|
||||
The characteristics of this library is that different character encoding
|
||||
<br>for every regular expression object can be specified.
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt><b>Supported character encodings:</b><br>
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
|
||||
Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,<br>
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
<font color="red">
|
||||
(GB 18030 encoding was contributed by KUBO Takehiro)
|
||||
</font>
|
||||
</p>
|
||||
</dl>
|
||||
<p>
|
||||
|
||||
<dl>
|
||||
<font color="orange">
|
||||
<dt><b>What's new</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>Version 4.4.4 released. (2006/09/19)
|
||||
<li>Version 2.5.7 released. (2006/07/28)
|
||||
<li>2007/08/16: Version 4.7.1 released.</li>
|
||||
<li>2007/07/14: Version 5.9.0 released.</li>
|
||||
<li>2007/06/20: Version 2.5.9 released.</li>
|
||||
<li>2007/06/20: Maintainer of 2.x was changed.</li>
|
||||
</ul>
|
||||
</dl>
|
||||
<hr>
|
||||
|
||||
<p>
|
||||
Oniguruma is a regular expressions library.<br>
|
||||
The characteristics of this library is that different character encoding
|
||||
<br>for every regular expression object can be specified.
|
||||
<br>(supported APIs: GNU regex, POSIX and Oniguruma native)
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt><b>Supported character encodings:</b><br>
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
|
||||
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
<font color="orange">
|
||||
(GB18030 encoding was contributed by KUBO Takehiro)<br>
|
||||
(CP1251 encoding was contributed by Byte)
|
||||
</font>
|
||||
</p>
|
||||
</dl>
|
||||
|
||||
<hr>
|
||||
|
||||
<dl>
|
||||
<dt>There are two ways of using of it in this program.
|
||||
<ul>
|
||||
<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
|
||||
<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br>
|
||||
In Ruby 1.9, Oniguruma is already integrated by Kazuo Saito.
|
||||
</ul>
|
||||
</dl>
|
||||
<dt><b>License:</b> BSD license.
|
||||
|
||||
<dl>
|
||||
<dt><b>Platform:</b>
|
||||
@ -62,32 +59,27 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>License:</b><br>
|
||||
When this software is partly used or it is distributed with Ruby,
|
||||
this of Ruby follows the license of Ruby.<br>
|
||||
It follows the BSD license in the case of the one except for it.
|
||||
</p>
|
||||
|
||||
<dt><b>Download:</b>
|
||||
<ul>
|
||||
<li> <a href="archive/onig-4.4.4.tar.gz">Latest release version 4.4.4</a> (2006/09/19) <a href="HISTORY_4X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.4.3.tar.gz">4.4.3</a> (2006/09/15)
|
||||
<li> <a href="archive/onig-4.4.2.tar.gz">4.4.2</a> (2006/09/08)
|
||||
<li> <a href="archive/onig-4.4.1.tar.gz">4.4.1</a> (2006/08/29)
|
||||
<li> <a href="archive/onigd2_5_7.tar.gz">Latest release version 2.5.7</a> (2006/07/28) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
<li> <a href="archive/onigd2_5_6.tar.gz">2.5.6</a> (2006/05/29)
|
||||
<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08)
|
||||
<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14) <a href="HISTORY_5X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04)
|
||||
<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27)
|
||||
<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
|
||||
<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<font color="red">
|
||||
* 4.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
|
||||
* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.[2-4]
|
||||
Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.<br>
|
||||
About 2.x, please contact him.<br>
|
||||
</font>
|
||||
* 5.x supports Unicode Property/Script.<br>
|
||||
* 2.x supports Ruby1.6/1.8.<br>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<dt><b>Documents:</b> (version 4.4.4)
|
||||
<dt><b>Documents:</b> (version 5.9.0)
|
||||
<ul>
|
||||
<li> <a href="doc/RE.txt">Regular Expressions</a>
|
||||
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
|
||||
@ -103,63 +95,66 @@ It follows the BSD license in the case of the one except for it.
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Links:</b>
|
||||
<dt><b>Site Links:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
|
||||
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
|
||||
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
|
||||
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page)
|
||||
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Links:</b>
|
||||
<ul>
|
||||
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://kmaebashi.com/">new script language crowbar</a> (Japanese page)
|
||||
<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page)
|
||||
<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
|
||||
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
|
||||
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
|
||||
<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page)
|
||||
<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
|
||||
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
|
||||
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
|
||||
<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page)
|
||||
<li> <a href="http://monaos.org/">Mona OS</a>
|
||||
<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page)
|
||||
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
|
||||
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
|
||||
<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
|
||||
<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
|
||||
<li> <a href="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
|
||||
<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
|
||||
<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
|
||||
<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
|
||||
<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
|
||||
<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
|
||||
<li> <a href="http://www.ruby-lang.org/">Ruby</a>
|
||||
<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
|
||||
<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm</a>
|
||||
<li> <a href="http://macromates.com/">TextMate (Mac OS X)</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
|
||||
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
|
||||
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
|
||||
<li> <a href="http://www.yokkasoft.net/">YokkaSoft</a>
|
||||
<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>References:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
|
||||
<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
|
||||
<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
|
||||
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
|
||||
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
|
||||
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
|
||||
<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Resources:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
|
||||
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
|
||||
<li> <a href="http://staff.aist.go.jp/tanaka-akira/textprocess/">"Text Processing" Lecture documents (Tanaka Akira)</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
@ -173,11 +168,13 @@ and I'm thankful to Akinori MUSHA.
|
||||
<dt><b>Other Libraries:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
|
||||
<li> <a href="http://www.pcre.org/">PCRE</a>
|
||||
<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
|
||||
<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
|
||||
<li> <a href="http://www.pcre.org/">PCRE</a>
|
||||
<li> <a href="http://re2c.org/">re2c</a>
|
||||
<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
|
||||
<li> <a href="http://laurikari.net/tre/">TRE</a>
|
||||
<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
|
||||
<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
|
||||
<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
|
||||
<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
|
||||
|
@ -4,7 +4,7 @@
|
||||
oniguruma.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -37,8 +37,8 @@ extern "C" {
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 4
|
||||
#define ONIGURUMA_VERSION_MINOR 4
|
||||
#define ONIGURUMA_VERSION_TEENY 4
|
||||
#define ONIGURUMA_VERSION_MINOR 7
|
||||
#define ONIGURUMA_VERSION_TEENY 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
@ -106,17 +106,11 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1)
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */
|
||||
/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */
|
||||
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1)
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30)
|
||||
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_FULL \
|
||||
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
|
||||
( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE )
|
||||
#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag
|
||||
|
||||
|
||||
@ -427,11 +421,11 @@ OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
int onigenc_init P_(());
|
||||
int onigenc_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onigenc_get_default_encoding P_(());
|
||||
OnigEncoding onigenc_get_default_encoding P_((void));
|
||||
ONIG_EXTERN
|
||||
void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
|
||||
ONIG_EXTERN
|
||||
@ -717,6 +711,7 @@ struct re_registers {
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
||||
typedef struct {
|
||||
OnigEncoding enc;
|
||||
OnigUChar* par;
|
||||
OnigUChar* par_end;
|
||||
} OnigErrorInfo;
|
||||
@ -889,7 +884,7 @@ int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint co
|
||||
ONIG_EXTERN
|
||||
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
|
||||
ONIG_EXTERN
|
||||
OnigAmbigType onig_get_default_ambig_flag P_(());
|
||||
OnigAmbigType onig_get_default_ambig_flag P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
|
||||
ONIG_EXTERN
|
||||
|
@ -2,7 +2,7 @@
|
||||
regcomp.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -34,7 +34,7 @@ OnigAmbigType OnigDefaultAmbigFlag =
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
|
||||
|
||||
extern OnigAmbigType
|
||||
onig_get_default_ambig_flag()
|
||||
onig_get_default_ambig_flag(void)
|
||||
{
|
||||
return OnigDefaultAmbigFlag;
|
||||
}
|
||||
@ -47,10 +47,6 @@ onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
|
||||
}
|
||||
|
||||
|
||||
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
k_strdup(UChar* s, UChar* end)
|
||||
{
|
||||
@ -539,6 +535,8 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
||||
add_length(reg, mbuf->used);
|
||||
return add_bytes(reg, mbuf->p, mbuf->used);
|
||||
#else
|
||||
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
||||
|
||||
int r, pad_size;
|
||||
UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
||||
|
||||
@ -660,7 +658,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
||||
}
|
||||
|
||||
static int
|
||||
compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
|
||||
compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
|
||||
regex_t* reg)
|
||||
{
|
||||
int r;
|
||||
@ -684,7 +682,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
reg->num_call > 0 ||
|
||||
#endif
|
||||
IS_QUALIFIER_IN_REPEAT(qn)) {
|
||||
IS_QUANTIFIER_IN_REPEAT(qn)) {
|
||||
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
||||
}
|
||||
else {
|
||||
@ -696,7 +694,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
|
||||
}
|
||||
|
||||
static int
|
||||
is_anychar_star_qualifier(QualifierNode* qn)
|
||||
is_anychar_star_quantifier(QuantifierNode* qn)
|
||||
{
|
||||
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
||||
NTYPE(qn->target) == N_ANYCHAR)
|
||||
@ -705,13 +703,13 @@ is_anychar_star_qualifier(QualifierNode* qn)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUALIFIER_EXPAND_LIMIT_SIZE 50
|
||||
#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
||||
#define CKN_ON (ckn > 0)
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
static int
|
||||
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int len, mod_tlen, cklen;
|
||||
int ckn;
|
||||
@ -791,7 +789,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
}
|
||||
|
||||
static int
|
||||
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int r, mod_tlen;
|
||||
int ckn;
|
||||
@ -803,7 +801,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
|
||||
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
||||
|
||||
if (is_anychar_star_qualifier(qn)) {
|
||||
if (is_anychar_star_quantifier(qn)) {
|
||||
r = compile_tree_n_times(qn->target, qn->lower, reg);
|
||||
if (r) return r;
|
||||
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
|
||||
@ -945,7 +943,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
#else /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
static int
|
||||
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int len, mod_tlen;
|
||||
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
||||
@ -970,8 +968,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
mod_tlen = tlen;
|
||||
|
||||
if (infinite &&
|
||||
(qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
|
||||
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
||||
len = SIZE_OP_JUMP;
|
||||
}
|
||||
else {
|
||||
@ -994,7 +992,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
}
|
||||
else if (!infinite && qn->greedy &&
|
||||
(qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
||||
<= QUALIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
<= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
len = tlen * qn->lower;
|
||||
len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
||||
}
|
||||
@ -1010,7 +1008,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
}
|
||||
|
||||
static int
|
||||
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int i, r, mod_tlen;
|
||||
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
||||
@ -1019,7 +1017,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
|
||||
if (tlen < 0) return tlen;
|
||||
|
||||
if (is_anychar_star_qualifier(qn)) {
|
||||
if (is_anychar_star_quantifier(qn)) {
|
||||
r = compile_tree_n_times(qn->target, qn->lower, reg);
|
||||
if (r) return r;
|
||||
if (IS_NOT_NULL(qn->next_head_exact)) {
|
||||
@ -1044,8 +1042,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
mod_tlen = tlen;
|
||||
|
||||
if (infinite &&
|
||||
(qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
|
||||
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
||||
if (qn->greedy) {
|
||||
if (IS_NOT_NULL(qn->head_exact))
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
||||
@ -1109,7 +1107,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
}
|
||||
else if (!infinite && qn->greedy &&
|
||||
(qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
||||
<= QUALIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
<= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
||||
int n = qn->upper - qn->lower;
|
||||
|
||||
r = compile_tree_n_times(qn->target, qn->lower, reg);
|
||||
@ -1227,7 +1225,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
|
||||
|
||||
case EFFECT_STOP_BACKTRACK:
|
||||
if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
|
||||
QualifierNode* qn = &NQUALIFIER(node->target);
|
||||
QuantifierNode* qn = &NQUANTIFIER(node->target);
|
||||
tlen = compile_length_tree(qn->target, reg);
|
||||
if (tlen < 0) return tlen;
|
||||
|
||||
@ -1317,7 +1315,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
|
||||
|
||||
case EFFECT_STOP_BACKTRACK:
|
||||
if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
|
||||
QualifierNode* qn = &NQUALIFIER(node->target);
|
||||
QuantifierNode* qn = &NQUANTIFIER(node->target);
|
||||
r = compile_tree_n_times(qn->target, qn->lower, reg);
|
||||
if (r) return r;
|
||||
|
||||
@ -1540,8 +1538,8 @@ compile_length_tree(Node* node, regex_t* reg)
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg);
|
||||
case N_QUANTIFIER:
|
||||
r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
|
||||
break;
|
||||
|
||||
case N_EFFECT:
|
||||
@ -1703,8 +1701,8 @@ compile_tree(Node* node, regex_t* reg)
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = compile_qualifier_node(&(NQUALIFIER(node)), reg);
|
||||
case N_QUANTIFIER:
|
||||
r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
|
||||
break;
|
||||
|
||||
case N_EFFECT:
|
||||
@ -1741,13 +1739,13 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
Node** ptarget = &(NQUALIFIER(node).target);
|
||||
Node** ptarget = &(NQUANTIFIER(node).target);
|
||||
Node* old = *ptarget;
|
||||
r = noname_disable_map(ptarget, map, counter);
|
||||
if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
|
||||
onig_reduce_nested_qualifier(node, *ptarget);
|
||||
if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
|
||||
onig_reduce_nested_quantifier(node, *ptarget);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -1821,8 +1819,8 @@ renumber_by_map(Node* node, GroupNumRemap* map)
|
||||
r = renumber_by_map(NCONS(node).left, map);
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
case N_QUALIFIER:
|
||||
r = renumber_by_map(NQUALIFIER(node).target, map);
|
||||
case N_QUANTIFIER:
|
||||
r = renumber_by_map(NQUANTIFIER(node).target, map);
|
||||
break;
|
||||
case N_EFFECT:
|
||||
r = renumber_by_map(NEFFECT(node).target, map);
|
||||
@ -1851,8 +1849,8 @@ numbered_ref_check(Node* node)
|
||||
r = numbered_ref_check(NCONS(node).left);
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
case N_QUALIFIER:
|
||||
r = numbered_ref_check(NQUALIFIER(node).target);
|
||||
case N_QUANTIFIER:
|
||||
r = numbered_ref_check(NQUANTIFIER(node).target);
|
||||
break;
|
||||
case N_EFFECT:
|
||||
r = numbered_ref_check(NEFFECT(node).target);
|
||||
@ -1933,7 +1931,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
|
||||
|
||||
#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
|
||||
static int
|
||||
qualifiers_memory_node_info(Node* node)
|
||||
quantifiers_memory_node_info(Node* node)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
@ -1943,7 +1941,7 @@ qualifiers_memory_node_info(Node* node)
|
||||
{
|
||||
int v;
|
||||
do {
|
||||
v = qualifiers_memory_node_info(NCONS(node).left);
|
||||
v = quantifiers_memory_node_info(NCONS(node).left);
|
||||
if (v > r) r = v;
|
||||
} while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
}
|
||||
@ -1955,15 +1953,15 @@ qualifiers_memory_node_info(Node* node)
|
||||
return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
|
||||
}
|
||||
else
|
||||
r = qualifiers_memory_node_info(NCALL(node).target);
|
||||
r = quantifiers_memory_node_info(NCALL(node).target);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
if (qn->upper != 0) {
|
||||
r = qualifiers_memory_node_info(qn->target);
|
||||
r = quantifiers_memory_node_info(qn->target);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -1978,7 +1976,7 @@ qualifiers_memory_node_info(Node* node)
|
||||
|
||||
case EFFECT_OPTION:
|
||||
case EFFECT_STOP_BACKTRACK:
|
||||
r = qualifiers_memory_node_info(en->target);
|
||||
r = quantifiers_memory_node_info(en->target);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -2083,9 +2081,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
|
||||
*min = 1;
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
|
||||
if (qn->lower > 0) {
|
||||
r = get_min_match_length(qn->target, min, env);
|
||||
@ -2204,9 +2202,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
|
||||
if (qn->upper != 0) {
|
||||
r = get_max_match_length(qn->target, max, env);
|
||||
@ -2311,9 +2309,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
if (qn->lower == qn->upper) {
|
||||
r = get_char_length_tree1(qn->target, reg, &tlen, level);
|
||||
if (r == 0)
|
||||
@ -2623,9 +2621,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
if (qn->lower > 0) {
|
||||
if (IS_NOT_NULL(qn->head_exact))
|
||||
n = qn->head_exact;
|
||||
@ -2686,8 +2684,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask,
|
||||
case N_QUANTIFIER:
|
||||
r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
|
||||
anchor_mask);
|
||||
break;
|
||||
|
||||
@ -2762,10 +2760,10 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
|
||||
case N_QUANTIFIER:
|
||||
r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
|
||||
if (r == RECURSION_EXIST) {
|
||||
if (NQUALIFIER(node).lower == 0) r = 0;
|
||||
if (NQUANTIFIER(node).lower == 0) r = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -2821,8 +2819,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env);
|
||||
case N_QUANTIFIER:
|
||||
r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
|
||||
break;
|
||||
|
||||
case N_ANCHOR:
|
||||
@ -2876,8 +2874,8 @@ subexp_recursive_check(Node* node)
|
||||
} while (IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = subexp_recursive_check(NQUALIFIER(node).target);
|
||||
case N_QUANTIFIER:
|
||||
r = subexp_recursive_check(NQUANTIFIER(node).target);
|
||||
break;
|
||||
|
||||
case N_ANCHOR:
|
||||
@ -2941,11 +2939,11 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = subexp_recursive_check_trav(NQUALIFIER(node).target, env);
|
||||
if (NQUALIFIER(node).upper == 0) {
|
||||
case N_QUANTIFIER:
|
||||
r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
|
||||
if (NQUANTIFIER(node).upper == 0) {
|
||||
if (r == FOUND_CALLED_NODE)
|
||||
NQUALIFIER(node).is_refered = 1;
|
||||
NQUANTIFIER(node).is_refered = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -3008,8 +3006,8 @@ setup_subexp_call(Node* node, ScanEnv* env)
|
||||
} while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
r = setup_subexp_call(NQUALIFIER(node).target, env);
|
||||
case N_QUANTIFIER:
|
||||
r = setup_subexp_call(NQUANTIFIER(node).target, env);
|
||||
break;
|
||||
case N_EFFECT:
|
||||
r = setup_subexp_call(NEFFECT(node).target, env);
|
||||
@ -3158,10 +3156,10 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
|
||||
|
||||
retry:
|
||||
type = NTYPE(node);
|
||||
if (type == N_QUALIFIER) {
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
if (type == N_QUANTIFIER) {
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
||||
#ifdef USE_QUALIFIER_PEEK_NEXT
|
||||
#ifdef USE_QUANTIFIER_PEEK_NEXT
|
||||
qn->next_head_exact = get_head_value_node(next_node, 1, reg);
|
||||
#endif
|
||||
/* automatic posseivation a*b ==> (?>a*)b */
|
||||
@ -3327,11 +3325,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
int child_state = state;
|
||||
int add_state = 0;
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
Node* target = qn->target;
|
||||
int var_num;
|
||||
|
||||
@ -3345,8 +3343,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
||||
if (NTYPE(qn->target) == N_EFFECT) {
|
||||
EffectNode* en = &(NEFFECT(qn->target));
|
||||
if (en->type == EFFECT_MEMORY) {
|
||||
if (NTYPE(en->target) == N_QUALIFIER) {
|
||||
QualifierNode* q = &(NQUALIFIER(en->target));
|
||||
if (NTYPE(en->target) == N_QUANTIFIER) {
|
||||
QuantifierNode* q = &(NQUANTIFIER(en->target));
|
||||
if (IS_REPEAT_INFINITE(q->upper)
|
||||
&& q->greedy == qn->greedy) {
|
||||
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
||||
@ -3509,10 +3507,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
OnigDistance d;
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
Node* target = qn->target;
|
||||
|
||||
if ((state & IN_REPEAT) != 0) {
|
||||
@ -3525,7 +3523,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
||||
if (d == 0) {
|
||||
qn->target_empty_info = NQ_TARGET_IS_EMPTY;
|
||||
#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
|
||||
r = qualifiers_memory_node_info(target);
|
||||
r = quantifiers_memory_node_info(target);
|
||||
if (r < 0) break;
|
||||
if (r > 0) {
|
||||
qn->target_empty_info = r;
|
||||
@ -3567,15 +3565,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
||||
if (r) break;
|
||||
}
|
||||
onig_node_free(target);
|
||||
break; /* break case N_QUALIFIER: */
|
||||
break; /* break case N_QUANTIFIER: */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
||||
if (qn->greedy && (qn->target_empty_info != 0)) {
|
||||
if (NTYPE(target) == N_QUALIFIER) {
|
||||
QualifierNode* tqn = &(NQUALIFIER(target));
|
||||
if (NTYPE(target) == N_QUANTIFIER) {
|
||||
QuantifierNode* tqn = &(NQUANTIFIER(target));
|
||||
if (IS_NOT_NULL(tqn->head_exact)) {
|
||||
qn->head_exact = tqn->head_exact;
|
||||
tqn->head_exact = NULL;
|
||||
@ -3615,8 +3613,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
||||
{
|
||||
Node* target = en->target;
|
||||
r = setup_tree(target, reg, state, env);
|
||||
if (NTYPE(target) == N_QUALIFIER) {
|
||||
QualifierNode* tqn = &(NQUALIFIER(target));
|
||||
if (NTYPE(target) == N_QUANTIFIER) {
|
||||
QuantifierNode* tqn = &(NQUANTIFIER(target));
|
||||
if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
||||
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
||||
int qtype = NTYPE(tqn->target);
|
||||
@ -3645,7 +3643,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
||||
/* allowed node types in look-behind */
|
||||
#define ALLOWED_TYPE_IN_LB \
|
||||
( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
|
||||
N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL )
|
||||
N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
|
||||
|
||||
#define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY )
|
||||
#define ALLOWED_EFFECT_IN_LB_NOT 0
|
||||
@ -4080,7 +4078,14 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
||||
v1 = now->len;
|
||||
v2 = alt->len;
|
||||
|
||||
if (v1 <= 2 && v2 <= 2) {
|
||||
if (v2 == 0) {
|
||||
return ;
|
||||
}
|
||||
else if (v1 == 0) {
|
||||
copy_opt_exact_info(now, alt);
|
||||
return ;
|
||||
}
|
||||
else if (v1 <= 2 && v2 <= 2) {
|
||||
/* ByteValTable[x] is big value --> low price */
|
||||
v2 = map_position_value(enc, now->s[0]);
|
||||
v1 = map_position_value(enc, alt->s[0]);
|
||||
@ -4143,10 +4148,9 @@ static int
|
||||
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
||||
OnigEncoding enc, OnigAmbigType ambig_flag)
|
||||
{
|
||||
int i, j, n, len;
|
||||
int i, n, len;
|
||||
UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
|
||||
OnigCodePoint code, ccode;
|
||||
const OnigCompAmbigCodes* ccs;
|
||||
OnigCodePoint code;
|
||||
const OnigPairAmbigCodes* pccs;
|
||||
OnigAmbigType amb;
|
||||
|
||||
@ -4164,21 +4168,6 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
||||
add_char_opt_map_info(map, buf[0], enc);
|
||||
}
|
||||
}
|
||||
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (ccs[i].code == code) {
|
||||
for (j = 0; j < ccs[i].n; j++) {
|
||||
ccode = ccs[i].items[j].code[0];
|
||||
len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
|
||||
if (len < 0) return len;
|
||||
add_char_opt_map_info(map, buf[0], enc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -4572,12 +4561,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{
|
||||
int i;
|
||||
OnigDistance min, max;
|
||||
NodeOptInfo nopt;
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
QuantifierNode* qn = &(NQUANTIFIER(node));
|
||||
|
||||
r = optimize_node_left(qn->target, &nopt, env);
|
||||
if (r) break;
|
||||
@ -4831,6 +4820,38 @@ clear_optimize_info(regex_t* reg)
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
static void print_enc_string(FILE* fp, OnigEncoding enc,
|
||||
const UChar *s, const UChar *end)
|
||||
{
|
||||
fprintf(fp, "\nPATTERN: /");
|
||||
|
||||
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
||||
const UChar *p;
|
||||
OnigCodePoint code;
|
||||
|
||||
p = s;
|
||||
while (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code >= 0x80) {
|
||||
fprintf(fp, " 0x%04x ", (int )code);
|
||||
}
|
||||
else {
|
||||
fputc((int )code, fp);
|
||||
}
|
||||
|
||||
p += enc_len(enc, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (s < end) {
|
||||
fputc((int )*s, fp);
|
||||
s++;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(fp, "/\n");
|
||||
}
|
||||
|
||||
static void
|
||||
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
||||
{
|
||||
@ -5122,6 +5143,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
|
||||
reg->state = ONIG_STATE_COMPILING;
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
||||
#endif
|
||||
|
||||
if (reg->alloc == 0) {
|
||||
init_size = (pattern_end - pattern) * 2;
|
||||
if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
||||
@ -5277,6 +5302,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
err:
|
||||
if (IS_NOT_NULL(scan_env.error)) {
|
||||
if (IS_NOT_NULL(einfo)) {
|
||||
einfo->enc = scan_env.enc;
|
||||
einfo->par = scan_env.error;
|
||||
einfo->par_end = scan_env.error_end;
|
||||
}
|
||||
@ -5379,13 +5405,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_init()
|
||||
onig_init(void)
|
||||
{
|
||||
if (onig_inited != 0)
|
||||
return 0;
|
||||
|
||||
onig_inited = 1;
|
||||
|
||||
THREAD_SYSTEM_INIT;
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
onigenc_init();
|
||||
@ -5401,9 +5428,9 @@ onig_init()
|
||||
|
||||
|
||||
extern int
|
||||
onig_end()
|
||||
onig_end(void)
|
||||
{
|
||||
extern int onig_free_shared_cclass_table();
|
||||
extern int onig_free_shared_cclass_table(void);
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
@ -5422,6 +5449,7 @@ onig_end()
|
||||
onig_inited = 0;
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
THREAD_SYSTEM_END;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5470,8 +5498,6 @@ OnigOpInfoType OnigOpInfo[] = {
|
||||
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
||||
{ OP_WORD, "word", ARG_NON },
|
||||
{ OP_NOT_WORD, "not-word", ARG_NON },
|
||||
{ OP_WORD_SB, "word-sb", ARG_NON },
|
||||
{ OP_WORD_MB, "word-mb", ARG_NON },
|
||||
{ OP_WORD_BOUND, "word-bound", ARG_NON },
|
||||
{ OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
||||
{ OP_WORD_BEGIN, "word-begin", ARG_NON },
|
||||
@ -5969,11 +5995,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
||||
break;
|
||||
#endif
|
||||
|
||||
case N_QUALIFIER:
|
||||
fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node,
|
||||
NQUALIFIER(node).lower, NQUALIFIER(node).upper,
|
||||
(NQUALIFIER(node).greedy ? "" : "?"));
|
||||
print_indent_tree(f, NQUALIFIER(node).target, indent + add);
|
||||
case N_QUANTIFIER:
|
||||
fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
|
||||
NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
|
||||
(NQUANTIFIER(node).greedy ? "" : "?"));
|
||||
print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
|
||||
break;
|
||||
|
||||
case N_EFFECT:
|
||||
@ -6002,7 +6028,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
|
||||
break;
|
||||
}
|
||||
|
||||
if (type != N_LIST && type != N_ALT && type != N_QUALIFIER &&
|
||||
if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
|
||||
type != N_EFFECT)
|
||||
fprintf(f, "\n");
|
||||
fflush(f);
|
||||
|
@ -2,7 +2,7 @@
|
||||
regenc.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -32,13 +32,13 @@
|
||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
|
||||
|
||||
extern int
|
||||
onigenc_init()
|
||||
onigenc_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern OnigEncoding
|
||||
onigenc_get_default_encoding()
|
||||
onigenc_get_default_encoding(void)
|
||||
{
|
||||
return OnigEncDefaultCharEncoding;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
regenc.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -2,7 +2,7 @@
|
||||
regerror.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -183,6 +183,48 @@ onig_error_code_to_format(int code)
|
||||
}
|
||||
|
||||
|
||||
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
||||
UChar buf[], int buf_size, int *is_over)
|
||||
{
|
||||
int len;
|
||||
UChar *p;
|
||||
OnigCodePoint code;
|
||||
|
||||
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
||||
p = s;
|
||||
len = 0;
|
||||
while (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code >= 0x80) {
|
||||
if (len + 5 <= buf_size) {
|
||||
sprintf((char* )(&(buf[len])), "\\%03o",
|
||||
(unsigned int)(code & 0377));
|
||||
len += 5;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf[len++] = (UChar )code;
|
||||
}
|
||||
|
||||
p += enc_len(enc, p);
|
||||
if (len >= buf_size) break;
|
||||
}
|
||||
|
||||
*is_over = ((p < end) ? 1 : 0);
|
||||
}
|
||||
else {
|
||||
len = MIN((end - s), buf_size);
|
||||
xmemcpy(buf, s, (size_t )len);
|
||||
*is_over = ((buf_size < (end - s)) ? 1 : 0);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
|
||||
#define MAX_ERROR_PAR_LEN 30
|
||||
|
||||
@ -198,7 +240,8 @@ onig_error_code_to_str(s, code, va_alist)
|
||||
{
|
||||
UChar *p, *q;
|
||||
OnigErrorInfo* einfo;
|
||||
int len;
|
||||
int len, is_over;
|
||||
UChar parbuf[MAX_ERROR_PAR_LEN];
|
||||
va_list vargs;
|
||||
|
||||
va_init_list(vargs, code);
|
||||
@ -212,23 +255,20 @@ onig_error_code_to_str(s, code, va_alist)
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
einfo = va_arg(vargs, OnigErrorInfo*);
|
||||
len = einfo->par_end - einfo->par;
|
||||
len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
|
||||
parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
|
||||
q = onig_error_code_to_format(code);
|
||||
p = s;
|
||||
while (*q != '\0') {
|
||||
if (*q == '%') {
|
||||
q++;
|
||||
if (*q == 'n') { /* '%n': name */
|
||||
if (len > MAX_ERROR_PAR_LEN) {
|
||||
xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
|
||||
p += (MAX_ERROR_PAR_LEN - 3);
|
||||
xmemcpy(p, parbuf, len);
|
||||
p += len;
|
||||
if (is_over != 0) {
|
||||
xmemcpy(p, "...", 3);
|
||||
p += 3;
|
||||
}
|
||||
else {
|
||||
xmemcpy(p, einfo->par, len);
|
||||
p += len;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
else
|
||||
@ -278,9 +318,6 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
||||
|
||||
va_init_list(args, fmt);
|
||||
n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
|
||||
if (n < 0 || n >= bufsize) {
|
||||
n = bufsize - 1;
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
need = (pat_end - pat) * 4 + 4;
|
||||
|
@ -2,7 +2,7 @@
|
||||
regexec.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -76,7 +76,7 @@ history_root_free(OnigRegion* r)
|
||||
}
|
||||
|
||||
static OnigCaptureTreeNode*
|
||||
history_node_new()
|
||||
history_node_new(void)
|
||||
{
|
||||
OnigCaptureTreeNode* node;
|
||||
|
||||
@ -233,7 +233,7 @@ onig_region_init(OnigRegion* region)
|
||||
}
|
||||
|
||||
extern OnigRegion*
|
||||
onig_region_new()
|
||||
onig_region_new(void)
|
||||
{
|
||||
OnigRegion* r;
|
||||
|
||||
@ -371,36 +371,58 @@ typedef struct {
|
||||
OnigOptionType options;
|
||||
OnigRegion* region;
|
||||
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
int best_len; /* for ONIG_OPTION_FIND_LONGEST */
|
||||
UChar* best_s;
|
||||
#endif
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
void* state_check_buff;
|
||||
int state_check_buff_size;
|
||||
#endif
|
||||
} MatchArg;
|
||||
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
|
||||
(msa).stack_p = (void* )0;\
|
||||
(msa).options = (arg_option);\
|
||||
(msa).region = (arg_region);\
|
||||
(msa).start = (arg_start);\
|
||||
(msa).stack_p = (void* )0;\
|
||||
(msa).options = (arg_option);\
|
||||
(msa).region = (arg_region);\
|
||||
(msa).start = (arg_start);\
|
||||
(msa).best_len = ONIG_MISMATCH;\
|
||||
} while (0)
|
||||
#else
|
||||
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
|
||||
(msa).stack_p = (void* )0;\
|
||||
(msa).options = (arg_option);\
|
||||
(msa).region = (arg_region);\
|
||||
(msa).start = (arg_start);\
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
|
||||
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \
|
||||
(msa).state_check_buff = (void* )0;\
|
||||
(msa).state_check_buff_size = 0;\
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
|
||||
if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
|
||||
int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\
|
||||
(msa).state_check_buff_size = size; \
|
||||
if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\
|
||||
unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
|
||||
offset = ((offset) * (state_num)) >> 3;\
|
||||
if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
|
||||
if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
|
||||
(msa).state_check_buff = (void* )xmalloc(size);\
|
||||
else \
|
||||
(msa).state_check_buff = (void* )xalloca(size);\
|
||||
xmemset((msa).state_check_buff, 0, (size_t )size);\
|
||||
xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
|
||||
(size_t )(size - (offset))); \
|
||||
(msa).state_check_buff_size = size;\
|
||||
}\
|
||||
else {\
|
||||
(msa).state_check_buff = (void* )0;\
|
||||
(msa).state_check_buff_size = 0;\
|
||||
}\
|
||||
}\
|
||||
else {\
|
||||
(msa).state_check_buff = (void* )0;\
|
||||
(msa).state_check_buff_size = 0;\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
@ -411,7 +433,7 @@ typedef struct {
|
||||
}\
|
||||
} while (0);
|
||||
#else
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num)
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
|
||||
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
|
||||
#endif
|
||||
|
||||
@ -1283,14 +1305,14 @@ static int MaxStackDepth = 0;
|
||||
/*
|
||||
* :nodoc:
|
||||
*/
|
||||
static VALUE onig_stat_print()
|
||||
static VALUE onig_stat_print(void)
|
||||
{
|
||||
onig_print_statistics(stderr);
|
||||
return Qnil;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void onig_statistics_init()
|
||||
extern void onig_statistics_init(void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 256; i++) {
|
||||
@ -1476,8 +1498,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
case OP_END: STAT_OP_IN(OP_END);
|
||||
n = s - sstart;
|
||||
if (n > best_len) {
|
||||
OnigRegion* region = msa->region;
|
||||
OnigRegion* region;
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
if (IS_FIND_LONGEST(option)) {
|
||||
if (n > msa->best_len) {
|
||||
msa->best_len = n;
|
||||
msa->best_s = (UChar* )sstart;
|
||||
}
|
||||
else
|
||||
goto end_best_len;
|
||||
}
|
||||
#endif
|
||||
best_len = n;
|
||||
region = msa->region;
|
||||
if (region) {
|
||||
#ifdef USE_POSIX_REGION_OPTION
|
||||
if (IS_POSIX_REGION(msa->options)) {
|
||||
@ -1553,6 +1586,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
#endif
|
||||
} /* if (region) */
|
||||
} /* n > best_len */
|
||||
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
end_best_len:
|
||||
#endif
|
||||
STAT_OP_OUT;
|
||||
|
||||
if (IS_FIND_CONDITION(option)) {
|
||||
@ -1590,24 +1627,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
ss = s;
|
||||
sp = p;
|
||||
|
||||
exact1_ic_retry:
|
||||
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
while (len-- > 0) {
|
||||
if (*p != *q) {
|
||||
#if 1
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
s = ss;
|
||||
p = sp;
|
||||
goto exact1_ic_retry;
|
||||
}
|
||||
else
|
||||
goto fail;
|
||||
#else
|
||||
goto fail;
|
||||
#endif
|
||||
}
|
||||
p++; q++;
|
||||
}
|
||||
@ -1696,24 +1721,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
ss = s;
|
||||
sp = p;
|
||||
|
||||
exactn_ic_retry:
|
||||
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
|
||||
DATA_ENSURE(0);
|
||||
q = lowbuf;
|
||||
while (len-- > 0) {
|
||||
if (*p != *q) {
|
||||
#if 1
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
s = ss;
|
||||
p = sp;
|
||||
goto exactn_ic_retry;
|
||||
}
|
||||
else
|
||||
goto fail;
|
||||
#else
|
||||
goto fail;
|
||||
#endif
|
||||
}
|
||||
p++; q++;
|
||||
}
|
||||
@ -2949,20 +2962,12 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
|
||||
tsave = t;
|
||||
psave = p;
|
||||
|
||||
retry:
|
||||
while (t < tend) {
|
||||
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
|
||||
q = lowbuf;
|
||||
while (lowlen > 0) {
|
||||
if (*t++ != *q++) {
|
||||
if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
|
||||
t = tsave;
|
||||
p = psave;
|
||||
goto retry;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
lowlen--;
|
||||
}
|
||||
@ -3262,7 +3267,12 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
|
||||
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, at);
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
{
|
||||
int offset = at - str;
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (region
|
||||
#ifdef USE_POSIX_REGION_OPTION
|
||||
@ -3567,16 +3577,31 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
if (start > end || start < str) goto mismatch_no_msa;
|
||||
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
#define MATCH_AND_RETURN_CHECK \
|
||||
r = match_at(reg, str, end, s, prev, &msa);\
|
||||
if (r != ONIG_MISMATCH) {\
|
||||
if (r >= 0) goto match;\
|
||||
goto finish; /* error */ \
|
||||
if (r >= 0) {\
|
||||
if (! IS_FIND_LONGEST(reg->options)) {\
|
||||
goto match;\
|
||||
}\
|
||||
}\
|
||||
else goto finish; /* error */ \
|
||||
}
|
||||
#else
|
||||
#define MATCH_AND_RETURN_CHECK \
|
||||
r = match_at(reg, str, end, s, prev, &msa);\
|
||||
if (r != ONIG_MISMATCH) {\
|
||||
if (r >= 0) {\
|
||||
goto match;\
|
||||
}\
|
||||
else goto finish; /* error */ \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* anchor optimize: resume search range */
|
||||
if (reg->anchor != 0 && str < end) {
|
||||
UChar* semi_end;
|
||||
UChar *min_semi_end, *max_semi_end;
|
||||
|
||||
if (reg->anchor & ANCHOR_BEGIN_POSITION) {
|
||||
/* search start-position only */
|
||||
@ -3602,49 +3627,58 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
}
|
||||
else if (reg->anchor & ANCHOR_END_BUF) {
|
||||
semi_end = (UChar* )end;
|
||||
min_semi_end = max_semi_end = (UChar* )end;
|
||||
|
||||
end_buf:
|
||||
if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
|
||||
if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
|
||||
goto mismatch_no_msa;
|
||||
|
||||
if (range > start) {
|
||||
if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) {
|
||||
start = semi_end - reg->anchor_dmax;
|
||||
if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
|
||||
start = min_semi_end - reg->anchor_dmax;
|
||||
if (start < end)
|
||||
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
|
||||
else { /* match with empty at end */
|
||||
start = onigenc_get_prev_char_head(reg->enc, str, end);
|
||||
}
|
||||
}
|
||||
if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) {
|
||||
range = semi_end - reg->anchor_dmin + 1;
|
||||
if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
|
||||
range = max_semi_end - reg->anchor_dmin + 1;
|
||||
}
|
||||
|
||||
if (start >= range) goto mismatch_no_msa;
|
||||
}
|
||||
else {
|
||||
if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) {
|
||||
range = semi_end - reg->anchor_dmax;
|
||||
if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
|
||||
range = min_semi_end - reg->anchor_dmax;
|
||||
}
|
||||
if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) {
|
||||
start = semi_end - reg->anchor_dmin;
|
||||
if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
|
||||
start = max_semi_end - reg->anchor_dmin;
|
||||
start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
|
||||
if (range > start) goto mismatch_no_msa;
|
||||
}
|
||||
if (range > start) goto mismatch_no_msa;
|
||||
}
|
||||
}
|
||||
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
|
||||
UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
|
||||
|
||||
max_semi_end = (UChar* )end;
|
||||
if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
|
||||
semi_end = pre_end;
|
||||
if (semi_end > str && start <= semi_end) {
|
||||
min_semi_end = pre_end;
|
||||
|
||||
#ifdef USE_CRNL_AS_LINE_TERMINATOR
|
||||
pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
|
||||
if (IS_NOT_NULL(pre_end) &&
|
||||
ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
|
||||
min_semi_end = pre_end;
|
||||
}
|
||||
#endif
|
||||
if (min_semi_end > str && start <= min_semi_end) {
|
||||
goto end_buf;
|
||||
}
|
||||
}
|
||||
else {
|
||||
semi_end = (UChar* )end;
|
||||
min_semi_end = (UChar* )end;
|
||||
goto end_buf;
|
||||
}
|
||||
}
|
||||
@ -3666,7 +3700,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, start);
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
msa.state_check_buff = (void* )0;
|
||||
msa.state_check_buff = (void* )0;
|
||||
msa.state_check_buff_size = 0;
|
||||
#endif
|
||||
MATCH_AND_RETURN_CHECK;
|
||||
@ -3681,7 +3715,12 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
#endif
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, orig_start);
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
{
|
||||
int offset = (MIN(start, range) - str);
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
|
||||
}
|
||||
#endif
|
||||
|
||||
s = (UChar* )start;
|
||||
if (range > start) { /* forward search */
|
||||
@ -3809,6 +3848,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
}
|
||||
|
||||
mismatch:
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
if (IS_FIND_LONGEST(reg->options)) {
|
||||
if (msa.best_len >= 0) {
|
||||
s = msa.best_s;
|
||||
goto match;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
r = ONIG_MISMATCH;
|
||||
|
||||
finish:
|
||||
|
@ -2,7 +2,7 @@
|
||||
regext.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -2,7 +2,7 @@
|
||||
reggnu.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -4,7 +4,7 @@
|
||||
regint.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -71,7 +71,7 @@
|
||||
/* internal config */
|
||||
#define USE_RECYCLE_NODE
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define USE_QUALIFIER_PEEK_NEXT
|
||||
#define USE_QUANTIFIER_PEEK_NEXT
|
||||
#define USE_ST_HASH_TABLE
|
||||
#define USE_SHARED_CCLASS_TABLE
|
||||
|
||||
@ -86,34 +86,29 @@
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
|
||||
/* #define USE_MULTI_THREAD_SYSTEM */
|
||||
#define THREAD_SYSTEM_INIT /* depend on thread system */
|
||||
#define THREAD_SYSTEM_END /* depend on thread system */
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#define CHECK_INTERRUPT /* depend on application */
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xcalloc calloc
|
||||
#define xfree free
|
||||
#else
|
||||
#include "ruby.h"
|
||||
#include "version.h"
|
||||
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
|
||||
|
||||
#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
|
||||
#define USE_MULTI_THREAD_SYSTEM
|
||||
|
||||
#define THREAD_SYSTEM_INIT
|
||||
#define THREAD_SYSTEM_END
|
||||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
#define CHECK_INTERRUPT do {\
|
||||
if (rb_trap_pending) {\
|
||||
if (! rb_prohibit_interrupt) {\
|
||||
rb_trap_exec();\
|
||||
}\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define DEFAULT_WARN_FUNCTION onig_rb_warn
|
||||
#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning
|
||||
@ -121,7 +116,7 @@
|
||||
#endif /* else NOT_RUBY */
|
||||
|
||||
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
|
||||
#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000
|
||||
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 8
|
||||
#define xmemset memset
|
||||
@ -129,16 +124,13 @@
|
||||
#define xmemmove memmove
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#define xalloca _alloca
|
||||
#ifdef NOT_RUBY
|
||||
#if _MSC_VER < 1500
|
||||
# define vsnprintf _vsnprintf
|
||||
#endif
|
||||
#ifndef vsnprintf
|
||||
#define vsnprintf _vsnprintf
|
||||
#endif
|
||||
#else
|
||||
#define xalloca alloca
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
|
||||
#define ONIG_STATE_INC(reg) (reg)->state++
|
||||
#define ONIG_STATE_DEC(reg) (reg)->state--
|
||||
@ -235,6 +227,10 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __BORLANDC__
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
@ -258,7 +254,8 @@
|
||||
#define NULL_UCHARP ((UChar* )0)
|
||||
|
||||
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#define WORD_ALIGNMENT_SIZE SIZEOF_INT
|
||||
/* sizeof(OnigCodePoint) */
|
||||
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
|
||||
|
||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
(pad_size) = WORD_ALIGNMENT_SIZE \
|
||||
@ -586,8 +583,6 @@ enum OpCode {
|
||||
|
||||
OP_WORD,
|
||||
OP_NOT_WORD,
|
||||
OP_WORD_SB,
|
||||
OP_WORD_MB,
|
||||
OP_WORD_BOUND,
|
||||
OP_NOT_WORD_BOUND,
|
||||
OP_WORD_BEGIN,
|
||||
|
@ -2,7 +2,7 @@
|
||||
regparse.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -64,13 +64,13 @@ extern void onig_null_warn(const char* s) { }
|
||||
extern void
|
||||
onig_rb_warn(const char* s)
|
||||
{
|
||||
rb_warn(s);
|
||||
rb_warn("%s", s);
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_rb_warning(const char* s)
|
||||
{
|
||||
rb_warning(s);
|
||||
rb_warning("%s", s);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1051,9 +1051,9 @@ onig_node_free(Node* node)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
if (NQUALIFIER(node).target)
|
||||
onig_node_free(NQUALIFIER(node).target);
|
||||
case N_QUANTIFIER:
|
||||
if (NQUANTIFIER(node).target)
|
||||
onig_node_free(NQUANTIFIER(node).target);
|
||||
break;
|
||||
|
||||
case N_EFFECT:
|
||||
@ -1088,7 +1088,7 @@ onig_node_free(Node* node)
|
||||
|
||||
#ifdef USE_RECYCLE_NODE
|
||||
extern int
|
||||
onig_free_node_list()
|
||||
onig_free_node_list(void)
|
||||
{
|
||||
FreeNode* n;
|
||||
|
||||
@ -1104,7 +1104,7 @@ onig_free_node_list()
|
||||
#endif
|
||||
|
||||
static Node*
|
||||
node_new()
|
||||
node_new(void)
|
||||
{
|
||||
Node* node;
|
||||
|
||||
@ -1133,7 +1133,7 @@ initialize_cclass(CClassNode* cc)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_cclass()
|
||||
node_new_cclass(void)
|
||||
{
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
@ -1202,7 +1202,7 @@ node_new_ctype(int type)
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_anychar()
|
||||
node_new_anychar(void)
|
||||
{
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
@ -1318,25 +1318,25 @@ node_new_call(UChar* name, UChar* name_end)
|
||||
#endif
|
||||
|
||||
static Node*
|
||||
node_new_qualifier(int lower, int upper, int by_number)
|
||||
node_new_quantifier(int lower, int upper, int by_number)
|
||||
{
|
||||
Node* node = node_new();
|
||||
CHECK_NULL_RETURN(node);
|
||||
node->type = N_QUALIFIER;
|
||||
NQUALIFIER(node).state = 0;
|
||||
NQUALIFIER(node).target = NULL;
|
||||
NQUALIFIER(node).lower = lower;
|
||||
NQUALIFIER(node).upper = upper;
|
||||
NQUALIFIER(node).greedy = 1;
|
||||
NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
|
||||
NQUALIFIER(node).head_exact = NULL_NODE;
|
||||
NQUALIFIER(node).next_head_exact = NULL_NODE;
|
||||
NQUALIFIER(node).is_refered = 0;
|
||||
node->type = N_QUANTIFIER;
|
||||
NQUANTIFIER(node).state = 0;
|
||||
NQUANTIFIER(node).target = NULL;
|
||||
NQUANTIFIER(node).lower = lower;
|
||||
NQUANTIFIER(node).upper = upper;
|
||||
NQUANTIFIER(node).greedy = 1;
|
||||
NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
|
||||
NQUANTIFIER(node).head_exact = NULL_NODE;
|
||||
NQUANTIFIER(node).next_head_exact = NULL_NODE;
|
||||
NQUANTIFIER(node).is_refered = 0;
|
||||
if (by_number != 0)
|
||||
NQUALIFIER(node).state |= NST_BY_NUMBER;
|
||||
NQUANTIFIER(node).state |= NST_BY_NUMBER;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
NQUALIFIER(node).comb_exp_check_num = 0;
|
||||
NQUANTIFIER(node).comb_exp_check_num = 0;
|
||||
#endif
|
||||
|
||||
return node;
|
||||
@ -1481,6 +1481,7 @@ onig_node_new_str(const UChar* s, const UChar* end)
|
||||
return node_new_str(s, end);
|
||||
}
|
||||
|
||||
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
|
||||
static Node*
|
||||
node_new_str_raw(UChar* s, UChar* end)
|
||||
{
|
||||
@ -1488,20 +1489,21 @@ node_new_str_raw(UChar* s, UChar* end)
|
||||
NSTRING_SET_RAW(node);
|
||||
return node;
|
||||
}
|
||||
#endif
|
||||
|
||||
static Node*
|
||||
node_new_empty()
|
||||
node_new_empty(void)
|
||||
{
|
||||
return node_new_str(NULL, NULL);
|
||||
}
|
||||
|
||||
static Node*
|
||||
node_new_str_raw_char(UChar c)
|
||||
node_new_str_char(UChar c)
|
||||
{
|
||||
UChar p[1];
|
||||
|
||||
p[0] = c;
|
||||
return node_new_str_raw(p, p + 1);
|
||||
return node_new_str(p, p + 1);
|
||||
}
|
||||
|
||||
static Node*
|
||||
@ -1531,6 +1533,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
|
||||
static int
|
||||
node_str_head_pad(StrNode* sn, int num, UChar val)
|
||||
{
|
||||
UChar buf[NODE_STR_BUF_SIZE];
|
||||
int i, len;
|
||||
|
||||
len = sn->end - sn->s;
|
||||
onig_strcpy(buf, sn->s, sn->end);
|
||||
onig_strcpy(&(sn->s[num]), buf, buf + len);
|
||||
sn->end += num;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
sn->s[i] = val;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int
|
||||
onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
|
||||
{
|
||||
@ -1924,29 +1944,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
|
||||
{
|
||||
BBuf *tbuf;
|
||||
int r;
|
||||
|
||||
if (IS_CCLASS_NOT(cc)) {
|
||||
bitset_invert(cc->bs);
|
||||
|
||||
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
|
||||
r = not_code_range_buf(enc, cc->mbuf, &tbuf);
|
||||
if (r != 0) return r;
|
||||
|
||||
bbuf_free(cc->mbuf);
|
||||
cc->mbuf = tbuf;
|
||||
}
|
||||
|
||||
CCLASS_CLEAR_NOT(cc);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
|
||||
{
|
||||
@ -2089,7 +2086,7 @@ conv_backslash_value(int c, ScanEnv* env)
|
||||
}
|
||||
|
||||
static int
|
||||
is_invalid_qualifier_target(Node* node)
|
||||
is_invalid_quantifier_target(Node* node)
|
||||
{
|
||||
switch (NTYPE(node)) {
|
||||
case N_ANCHOR:
|
||||
@ -2098,19 +2095,19 @@ is_invalid_qualifier_target(Node* node)
|
||||
|
||||
case N_EFFECT:
|
||||
if (NEFFECT(node).type == EFFECT_OPTION)
|
||||
return is_invalid_qualifier_target(NEFFECT(node).target);
|
||||
return is_invalid_quantifier_target(NEFFECT(node).target);
|
||||
break;
|
||||
|
||||
case N_LIST: /* ex. (?:\G\A)* */
|
||||
do {
|
||||
if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;
|
||||
if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
|
||||
} while (IS_NOT_NULL(node = NCONS(node).right));
|
||||
return 0;
|
||||
break;
|
||||
|
||||
case N_ALT: /* ex. (?:abc|\A)* */
|
||||
do {
|
||||
if (is_invalid_qualifier_target(NCONS(node).left)) return 1;
|
||||
if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
|
||||
} while (IS_NOT_NULL(node = NCONS(node).right));
|
||||
break;
|
||||
|
||||
@ -2122,7 +2119,7 @@ is_invalid_qualifier_target(Node* node)
|
||||
|
||||
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
|
||||
static int
|
||||
popular_qualifier_num(QualifierNode* qf)
|
||||
popular_quantifier_num(QuantifierNode* qf)
|
||||
{
|
||||
if (qf->greedy) {
|
||||
if (qf->lower == 0) {
|
||||
@ -2166,15 +2163,15 @@ static enum ReduceType ReduceTypeTable[6][6] = {
|
||||
};
|
||||
|
||||
extern void
|
||||
onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
|
||||
onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
|
||||
{
|
||||
int pnum, cnum;
|
||||
QualifierNode *p, *c;
|
||||
QuantifierNode *p, *c;
|
||||
|
||||
p = &(NQUALIFIER(pnode));
|
||||
c = &(NQUALIFIER(cnode));
|
||||
pnum = popular_qualifier_num(p);
|
||||
cnum = popular_qualifier_num(c);
|
||||
p = &(NQUANTIFIER(pnode));
|
||||
c = &(NQUANTIFIER(cnode));
|
||||
pnum = popular_quantifier_num(p);
|
||||
cnum = popular_quantifier_num(c);
|
||||
|
||||
switch(ReduceTypeTable[cnum][pnum]) {
|
||||
case RQ_DEL:
|
||||
@ -2282,7 +2279,7 @@ typedef struct {
|
||||
|
||||
|
||||
static int
|
||||
fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
|
||||
fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
|
||||
{
|
||||
int low, up, syn_allow, non_low = 0;
|
||||
int r = 0;
|
||||
@ -3035,7 +3032,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
case '{':
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
|
||||
r = fetch_range_qualifier(&p, end, tok, env);
|
||||
r = fetch_range_quantifier(&p, end, tok, env);
|
||||
if (r < 0) return r; /* error */
|
||||
if (r == 0) goto greedy_check;
|
||||
else if (r == 2) { /* {n} */
|
||||
@ -3454,7 +3451,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
|
||||
case '{':
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
|
||||
r = fetch_range_qualifier(&p, end, tok, env);
|
||||
r = fetch_range_quantifier(&p, end, tok, env);
|
||||
if (r < 0) return r; /* error */
|
||||
if (r == 0) goto greedy_check;
|
||||
else if (r == 2) { /* {n} */
|
||||
@ -3512,7 +3509,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
|
||||
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
|
||||
tok->type = TK_ANCHOR;
|
||||
tok->u.subtype = (IS_SINGLELINE(env->option)
|
||||
? ANCHOR_END_BUF : ANCHOR_END_LINE);
|
||||
? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
|
||||
break;
|
||||
|
||||
case '[':
|
||||
@ -4619,11 +4616,11 @@ static const char* ReduceQStr[] = {
|
||||
};
|
||||
|
||||
static int
|
||||
set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
{
|
||||
QualifierNode* qn;
|
||||
QuantifierNode* qn;
|
||||
|
||||
qn = &(NQUALIFIER(qnode));
|
||||
qn = &(NQUANTIFIER(qnode));
|
||||
if (qn->lower == 1 && qn->upper == 1) {
|
||||
return 1;
|
||||
}
|
||||
@ -4642,15 +4639,15 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
case N_QUANTIFIER:
|
||||
{ /* check redundant double repeat. */
|
||||
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
|
||||
QualifierNode* qnt = &(NQUALIFIER(target));
|
||||
int nestq_num = popular_qualifier_num(qn);
|
||||
int targetq_num = popular_qualifier_num(qnt);
|
||||
QuantifierNode* qnt = &(NQUANTIFIER(target));
|
||||
int nestq_num = popular_quantifier_num(qn);
|
||||
int targetq_num = popular_quantifier_num(qnt);
|
||||
|
||||
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
|
||||
if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
|
||||
@ -4686,7 +4683,7 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
#endif
|
||||
if (targetq_num >= 0) {
|
||||
if (nestq_num >= 0) {
|
||||
onig_reduce_nested_qualifier(qnode, target);
|
||||
onig_reduce_nested_quantifier(qnode, target);
|
||||
goto q_exit;
|
||||
}
|
||||
else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
|
||||
@ -4708,61 +4705,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
|
||||
CClassNode* cc, Node** root)
|
||||
{
|
||||
int r, i, j, k, clen, len, ncode, n;
|
||||
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
||||
Node **ptail, *snode = NULL_NODE;
|
||||
const OnigCompAmbigCodes* ccs;
|
||||
const OnigCompAmbigCodeItem* ci;
|
||||
OnigAmbigType amb;
|
||||
|
||||
n = 0;
|
||||
*root = NULL_NODE;
|
||||
ptail = root;
|
||||
|
||||
|
||||
for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
|
||||
if ((amb & ambig_flag) == 0) continue;
|
||||
|
||||
ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
|
||||
for (i = 0; i < ncode; i++) {
|
||||
if (onig_is_code_in_cc(enc, ccs[i].code, cc)) {
|
||||
for (j = 0; j < ccs[i].n; j++) {
|
||||
ci = &(ccs[i].items[j]);
|
||||
if (ci->len > 1) { /* compound only */
|
||||
if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
|
||||
|
||||
clen = ci->len;
|
||||
for (k = 0; k < clen; k++) {
|
||||
len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf);
|
||||
|
||||
if (k == 0) {
|
||||
snode = node_new_str_raw(buf, buf + len);
|
||||
CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
|
||||
}
|
||||
else {
|
||||
r = onig_node_str_cat(snode, buf, buf + len);
|
||||
if (r < 0) return r;
|
||||
}
|
||||
}
|
||||
|
||||
*ptail = node_new_alt(snode, NULL_NODE);
|
||||
CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
|
||||
ptail = &(NCONS(*ptail).right);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_SHARED_CCLASS_TABLE
|
||||
|
||||
#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
|
||||
@ -4826,11 +4768,11 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_free_shared_cclass_table()
|
||||
onig_free_shared_cclass_table(void)
|
||||
{
|
||||
if (IS_NOT_NULL(OnigTypeCClassTable)) {
|
||||
onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
|
||||
xfree(OnigTypeCClassTable);
|
||||
onig_st_free_table(OnigTypeCClassTable);
|
||||
OnigTypeCClassTable = NULL;
|
||||
}
|
||||
|
||||
@ -4911,23 +4853,36 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
||||
case TK_RAW_BYTE:
|
||||
tk_raw_byte:
|
||||
{
|
||||
*np = node_new_str_raw_char((UChar )tok->u.c);
|
||||
*np = node_new_str_char((UChar )tok->u.c);
|
||||
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
|
||||
len = 1;
|
||||
while (1) {
|
||||
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
|
||||
if (len == enc_len(env->enc, NSTRING(*np).s)) {
|
||||
r = fetch_token(tok, src, end, env);
|
||||
goto string_end;
|
||||
}
|
||||
}
|
||||
|
||||
r = fetch_token(tok, src, end, env);
|
||||
if (r < 0) return r;
|
||||
if (r != TK_RAW_BYTE) {
|
||||
#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
|
||||
if (len >= enc_len(env->enc, NSTRING(*np).s)) {
|
||||
NSTRING_CLEAR_RAW(*np);
|
||||
#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
|
||||
int rem;
|
||||
if (len < ONIGENC_MBC_MINLEN(env->enc)) {
|
||||
rem = ONIGENC_MBC_MINLEN(env->enc) - len;
|
||||
(void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
|
||||
if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
|
||||
goto string_end;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
goto string_end;
|
||||
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
|
||||
}
|
||||
|
||||
r = node_str_cat_char(*np, (UChar )tok->u.c);
|
||||
if (r < 0) return r;
|
||||
|
||||
len++;
|
||||
}
|
||||
}
|
||||
@ -5098,24 +5053,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_IGNORECASE(env->option) &&
|
||||
(env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
int res;
|
||||
Node *alt_root, *work;
|
||||
|
||||
res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc,
|
||||
cc, &alt_root);
|
||||
if (res < 0) return res;
|
||||
if (res > 0) {
|
||||
work = node_new_alt(*np, alt_root);
|
||||
if (IS_NULL(work)) {
|
||||
onig_node_free(alt_root);
|
||||
return ONIGERR_MEMORY;
|
||||
}
|
||||
*np = work;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5127,9 +5064,9 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
||||
case TK_ANYCHAR_ANYTIME:
|
||||
*np = node_new_anychar();
|
||||
CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
|
||||
qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
|
||||
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
|
||||
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
|
||||
NQUALIFIER(qn).target = *np;
|
||||
NQUANTIFIER(qn).target = *np;
|
||||
*np = qn;
|
||||
break;
|
||||
|
||||
@ -5185,14 +5122,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
|
||||
|
||||
repeat:
|
||||
if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
|
||||
if (is_invalid_qualifier_target(*targetp))
|
||||
if (is_invalid_quantifier_target(*targetp))
|
||||
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
|
||||
|
||||
qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
|
||||
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
|
||||
(r == TK_INTERVAL ? 1 : 0));
|
||||
CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
|
||||
NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
|
||||
r = set_qualifier(qn, *targetp, group, env);
|
||||
NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
|
||||
r = set_quantifier(qn, *targetp, group, env);
|
||||
if (r < 0) return r;
|
||||
|
||||
if (tok->u.repeat.possessive != 0) {
|
||||
|
@ -4,7 +4,7 @@
|
||||
regparse.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -37,7 +37,7 @@
|
||||
#define N_CTYPE (1<< 2)
|
||||
#define N_ANYCHAR (1<< 3)
|
||||
#define N_BACKREF (1<< 4)
|
||||
#define N_QUALIFIER (1<< 5)
|
||||
#define N_QUANTIFIER (1<< 5)
|
||||
#define N_EFFECT (1<< 6)
|
||||
#define N_ANCHOR (1<< 7)
|
||||
#define N_LIST (1<< 8)
|
||||
@ -52,7 +52,7 @@
|
||||
#define NSTRING(node) ((node)->u.str)
|
||||
#define NCCLASS(node) ((node)->u.cclass)
|
||||
#define NCTYPE(node) ((node)->u.ctype)
|
||||
#define NQUALIFIER(node) ((node)->u.qualifier)
|
||||
#define NQUANTIFIER(node) ((node)->u.quantifier)
|
||||
#define NANCHOR(node) ((node)->u.anchor)
|
||||
#define NBACKREF(node) ((node)->u.backref)
|
||||
#define NEFFECT(node) ((node)->u.effect)
|
||||
@ -131,7 +131,7 @@ typedef struct {
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
|
||||
#endif
|
||||
} QualifierNode;
|
||||
} QuantifierNode;
|
||||
|
||||
/* status bits */
|
||||
#define NST_MIN_FIXED (1<<0)
|
||||
@ -170,8 +170,8 @@ typedef struct {
|
||||
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
|
||||
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
|
||||
#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
|
||||
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
|
||||
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
@ -230,15 +230,15 @@ typedef struct {
|
||||
typedef struct _Node {
|
||||
int type;
|
||||
union {
|
||||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QualifierNode qualifier;
|
||||
EffectNode effect;
|
||||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QuantifierNode quantifier;
|
||||
EffectNode effect;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
CallNode call;
|
||||
CallNode call;
|
||||
#endif
|
||||
BackrefNode backref;
|
||||
AnchorNode anchor;
|
||||
BackrefNode backref;
|
||||
AnchorNode anchor;
|
||||
struct {
|
||||
struct _Node* left;
|
||||
struct _Node* right;
|
||||
@ -306,7 +306,7 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
|
||||
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
||||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
@ -315,7 +315,7 @@ extern Node* onig_node_new_anchor P_((int type));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern void onig_node_str_clear P_((Node* node));
|
||||
extern int onig_free_node_list();
|
||||
extern int onig_free_node_list P_((void));
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
|
||||
|
@ -76,7 +76,7 @@ regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size)
|
||||
s = "";
|
||||
}
|
||||
else {
|
||||
sprintf(tbuf, "undefined error code (%d)", posix_ecode);
|
||||
sprintf(tbuf, "undefined error code (%d)", posix_ecode);
|
||||
s = tbuf;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
regposix.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -2,7 +2,7 @@
|
||||
regsyntax.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -2,7 +2,7 @@
|
||||
regversion.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -47,7 +47,7 @@ onig_copyright(void)
|
||||
{
|
||||
static char s[58];
|
||||
|
||||
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako",
|
||||
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
|
Loading…
Reference in New Issue
Block a user