mirror of
https://github.com/php/php-src.git
synced 2025-01-20 18:53:37 +08:00
Updated bundled oniguruma library (used for multibyte regular expression) to 4.3.1.
This commit is contained in:
parent
560208b1c6
commit
3ea2152839
@ -1,5 +1,61 @@
|
||||
History
|
||||
|
||||
2006/08/21: Version 4.3.1
|
||||
|
||||
2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
|
||||
2006/08/21: [impl] change stack type values
|
||||
and re-define STK_MASK_TO_VOID_TARGET etc...
|
||||
2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite.
|
||||
2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE.
|
||||
2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n}
|
||||
2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
|
||||
if backreference is not used.
|
||||
2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern
|
||||
in combination explosion check.
|
||||
|
||||
2006/08/17: Version 4.3.0
|
||||
|
||||
2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
|
||||
2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK.
|
||||
check /(.+)*/, /(\s*foo\s*)*/ etc...
|
||||
[API] add num_comb_exp_check member in regex_t.
|
||||
[dist] change LTVERSION value to "1:0:0" in configure.in.
|
||||
2006/08/15: [bug] OP_REPEAT_INC process in match_at().
|
||||
should check repeat-count >= range-upper and
|
||||
range-upper may be infinite.
|
||||
|
||||
2006/08/11: Version 4.2.3
|
||||
|
||||
2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
|
||||
2006/08/10: [impl] remove double call in set_qualifier().
|
||||
2006/08/10: [impl] remove by_number member in QualifierNode.
|
||||
2006/08/09: [impl] remove a comma at the end of enum ReduceType
|
||||
for escape warning on Mac OS X.
|
||||
2006/08/07: [impl] remove warning in regcomp.c.
|
||||
2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
|
||||
|
||||
2006/08/03: Version 4.2.2
|
||||
|
||||
2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
|
||||
2006/08/03: [bug] (thanks Hiroyuki Yamamoto)
|
||||
segmentation fault in regexec(). (POSIX API)
|
||||
2006/08/02: [bug] combination of \G in look-ahead/look-behind and other
|
||||
anchors(\A, \z, \Z) cause invalid result.
|
||||
ex. /(?!\G)a\z/.match("ba")
|
||||
start arg. of MATCH_ARG_INIT() should be original
|
||||
arg. of onig_search().
|
||||
|
||||
2006/07/31: Version 4.2.1
|
||||
|
||||
2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
|
||||
2006/07/31: [bug] (thanks Kimura Minoru)
|
||||
re-implement bm_search_notrev().
|
||||
2006/07/31: [impl] bm_search_notrev() refactoring.
|
||||
2006/07/31: [bug] (thanks Kimura Minoru)
|
||||
fix incomplete multibyte string in exact info.
|
||||
2006/07/31: [impl] (thanks Seiji Masugata)
|
||||
remove cast in va_init_list() for Intel C Compiler.
|
||||
|
||||
2006/07/18: Version 4.2.0
|
||||
|
||||
2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
|
||||
|
@ -8,7 +8,7 @@
|
||||
<h1>Oniguruma</h1>
|
||||
|
||||
<p>
|
||||
2006/07/18 (C) K.Kosako
|
||||
2006/08/21 (C) K.Kosako
|
||||
</p>
|
||||
|
||||
<p>
|
||||
@ -37,8 +37,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
<dt><b>What's new</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>Version 4.2.0 released. (2006/07/18)
|
||||
<li>Version 2.5.6 released. (2006/05/29)
|
||||
<li>Version 4.3.1 released. (2006/08/21)
|
||||
<li>Version 2.5.7 released. (2006/07/28)
|
||||
</ul>
|
||||
</dl>
|
||||
|
||||
@ -70,13 +70,13 @@ It follows the BSD license in the case of the one except for it.
|
||||
|
||||
<dt><b>Download:</b>
|
||||
<ul>
|
||||
<li> <a href="archive/onig-4.2.0.tar.gz">Latest release version 4.2.0</a> (2006/07/18) <a href="HISTORY_4X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.1.2.tar.gz">4.1.2</a> (2006/07/03)
|
||||
<li> <a href="archive/onig-4.1.1.tar.gz">4.1.1</a> (2006/05/22)
|
||||
<li> <a href="archive/onig-4.1.0.tar.gz">4.1.0</a> (2006/05/15)
|
||||
<li> <a href="archive/onigd2_5_6.tar.gz">Latest release version 2.5.6</a> (2006/05/29) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.3.1.tar.gz">Latest release version 4.3.1</a> (2006/08/21) <a href="HISTORY_4X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.3.0.tar.gz">4.3.0</a> (2006/08/17)
|
||||
<li> <a href="archive/onig-4.2.2.tar.gz">4.2.2</a> (2006/08/03)
|
||||
<li> <a href="archive/onig-4.2.1.tar.gz">4.2.1</a> (2006/07/31)
|
||||
<li> <a href="archive/onigd2_5_7.tar.gz">Latest release version 2.5.7</a> (2006/07/28) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
<li> <a href="archive/onigd2_5_6.tar.gz">2.5.6</a> (2006/05/29)
|
||||
<li> <a href="archive/onigd2_5_5.tar.gz">2.5.5</a> (2006/05/08)
|
||||
<li> <a href="archive/onigd2_5_4.tar.gz">2.5.4</a> (2006/02/27)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
@ -87,7 +87,7 @@ It follows the BSD license in the case of the one except for it.
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<dt><b>Documents:</b> (version 4.2.0)
|
||||
<dt><b>Documents:</b> (version 4.3.1)
|
||||
<ul>
|
||||
<li> <a href="doc/RE.txt">Regular Expressions</a>
|
||||
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
|
||||
@ -139,6 +139,7 @@ It follows the BSD license in the case of the one except for it.
|
||||
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
|
||||
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
|
||||
<li> <a href="http://www.yokkasoft.net/">YokkaSoft</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
@ -158,7 +159,6 @@ It follows the BSD license in the case of the one except for it.
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
|
||||
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
|
||||
<li> <a href="http://staff.aist.go.jp/tanaka-akira/textprocess/">"Text Processing" Lecture documents (Tanaka Akira)</a> (Japanese page)
|
||||
<li> <a href="resource/JRC2006_panel.pdf">"Regex library in Ruby 1.9/2.0" Japan Ruby Conference 2006 (K.Kosako)</a> (Japanese)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
|
@ -37,8 +37,8 @@ extern "C" {
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 4
|
||||
#define ONIGURUMA_VERSION_MINOR 2
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
#define ONIGURUMA_VERSION_MINOR 3
|
||||
#define ONIGURUMA_VERSION_TEENY 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
@ -744,6 +744,7 @@ typedef struct re_pattern_buffer {
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* combination explosion check */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
|
@ -186,6 +186,15 @@ add_opcode(regex_t* reg, int opcode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
add_state_check_num(regex_t* reg, int num)
|
||||
{
|
||||
StateCheckNumType n = (StateCheckNumType )num;
|
||||
|
||||
BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
add_rel_addr(regex_t* reg, int addr)
|
||||
{
|
||||
@ -644,7 +653,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
||||
}
|
||||
|
||||
p[id].lower = lower;
|
||||
p[id].upper = upper;
|
||||
p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -684,7 +693,258 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
is_anychar_star_qualifier(QualifierNode* qn)
|
||||
{
|
||||
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
||||
NTYPE(qn->target) == N_ANYCHAR)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define QUALIFIER_EXPAND_LIMIT_SIZE 50
|
||||
#define CKN_ON (ckn > 0)
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
static int
|
||||
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int len, mod_tlen, cklen;
|
||||
int ckn;
|
||||
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
||||
int empty_info = qn->target_empty_info;
|
||||
int tlen = compile_length_tree(qn->target, reg);
|
||||
|
||||
if (tlen < 0) return tlen;
|
||||
|
||||
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
||||
|
||||
cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
|
||||
|
||||
/* anychar repeat */
|
||||
if (NTYPE(qn->target) == N_ANYCHAR) {
|
||||
if (qn->greedy && infinite) {
|
||||
if (IS_NOT_NULL(qn->next_head_exact))
|
||||
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
|
||||
else
|
||||
return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty_info != 0)
|
||||
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
||||
else
|
||||
mod_tlen = tlen;
|
||||
|
||||
if (infinite && qn->lower <= 1) {
|
||||
if (qn->greedy) {
|
||||
if (qn->lower == 1)
|
||||
len = SIZE_OP_JUMP;
|
||||
else
|
||||
len = 0;
|
||||
|
||||
len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
|
||||
}
|
||||
else {
|
||||
if (qn->lower == 0)
|
||||
len = SIZE_OP_JUMP;
|
||||
else
|
||||
len = 0;
|
||||
|
||||
len += mod_tlen + SIZE_OP_PUSH + cklen;
|
||||
}
|
||||
}
|
||||
else if (qn->upper == 0) {
|
||||
if (qn->is_refered != 0) /* /(?<n>..){0}/ */
|
||||
len = SIZE_OP_JUMP + tlen;
|
||||
else
|
||||
len = 0;
|
||||
}
|
||||
else if (qn->upper == 1 && qn->greedy) {
|
||||
if (qn->lower == 0) {
|
||||
if (CKN_ON) {
|
||||
len = SIZE_OP_STATE_CHECK_PUSH + tlen;
|
||||
}
|
||||
else {
|
||||
len = SIZE_OP_PUSH + tlen;
|
||||
}
|
||||
}
|
||||
else {
|
||||
len = tlen;
|
||||
}
|
||||
}
|
||||
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
||||
len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
|
||||
}
|
||||
else {
|
||||
len = SIZE_OP_REPEAT_INC
|
||||
+ mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
||||
if (CKN_ON)
|
||||
len += SIZE_OP_STATE_CHECK;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
{
|
||||
int r, mod_tlen;
|
||||
int ckn;
|
||||
int infinite = IS_REPEAT_INFINITE(qn->upper);
|
||||
int empty_info = qn->target_empty_info;
|
||||
int tlen = compile_length_tree(qn->target, reg);
|
||||
|
||||
if (tlen < 0) return tlen;
|
||||
|
||||
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
||||
|
||||
if (is_anychar_star_qualifier(qn)) {
|
||||
r = compile_tree_n_times(qn->target, qn->lower, reg);
|
||||
if (r) return r;
|
||||
if (IS_NOT_NULL(qn->next_head_exact)) {
|
||||
if (IS_MULTILINE(reg->options))
|
||||
r = add_opcode(reg, (CKN_ON ?
|
||||
OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
|
||||
: OP_ANYCHAR_ML_STAR_PEEK_NEXT));
|
||||
else
|
||||
r = add_opcode(reg, (CKN_ON ?
|
||||
OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT
|
||||
: OP_ANYCHAR_STAR_PEEK_NEXT));
|
||||
if (r) return r;
|
||||
if (CKN_ON) {
|
||||
r = add_state_check_num(reg, ckn);
|
||||
if (r) return r;
|
||||
}
|
||||
|
||||
return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
|
||||
}
|
||||
else {
|
||||
if (IS_MULTILINE(reg->options)) {
|
||||
r = add_opcode(reg, (CKN_ON ?
|
||||
OP_STATE_CHECK_ANYCHAR_ML_STAR
|
||||
: OP_ANYCHAR_ML_STAR));
|
||||
}
|
||||
else {
|
||||
r = add_opcode(reg, (CKN_ON ?
|
||||
OP_STATE_CHECK_ANYCHAR_STAR
|
||||
: OP_ANYCHAR_STAR));
|
||||
}
|
||||
if (r) return r;
|
||||
if (CKN_ON)
|
||||
r = add_state_check_num(reg, ckn);
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty_info != 0)
|
||||
mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
||||
else
|
||||
mod_tlen = tlen;
|
||||
|
||||
if (infinite && qn->lower <= 1) {
|
||||
if (qn->greedy) {
|
||||
if (qn->lower == 1) {
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP,
|
||||
(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
|
||||
if (r) return r;
|
||||
}
|
||||
|
||||
if (CKN_ON) {
|
||||
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
||||
if (r) return r;
|
||||
r = add_state_check_num(reg, ckn);
|
||||
if (r) return r;
|
||||
r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
|
||||
}
|
||||
else {
|
||||
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
||||
}
|
||||
if (r) return r;
|
||||
r = compile_tree_empty_check(qn->target, reg, empty_info);
|
||||
if (r) return r;
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP,
|
||||
-(mod_tlen + (int )SIZE_OP_JUMP
|
||||
+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
|
||||
}
|
||||
else {
|
||||
if (qn->lower == 0) {
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
||||
if (r) return r;
|
||||
}
|
||||
r = compile_tree_empty_check(qn->target, reg, empty_info);
|
||||
if (r) return r;
|
||||
if (CKN_ON) {
|
||||
r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
|
||||
if (r) return r;
|
||||
r = add_state_check_num(reg, ckn);
|
||||
if (r) return r;
|
||||
r = add_rel_addr(reg,
|
||||
-(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
|
||||
}
|
||||
else
|
||||
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
||||
}
|
||||
}
|
||||
else if (qn->upper == 0) {
|
||||
if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
||||
if (r) return r;
|
||||
r = compile_tree(qn->target, reg);
|
||||
}
|
||||
else
|
||||
r = 0;
|
||||
}
|
||||
else if (qn->upper == 1 && qn->greedy) {
|
||||
if (qn->lower == 0) {
|
||||
if (CKN_ON) {
|
||||
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
||||
if (r) return r;
|
||||
r = add_state_check_num(reg, ckn);
|
||||
if (r) return r;
|
||||
r = add_rel_addr(reg, tlen);
|
||||
}
|
||||
else {
|
||||
r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
|
||||
}
|
||||
if (r) return r;
|
||||
}
|
||||
|
||||
r = compile_tree(qn->target, reg);
|
||||
}
|
||||
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
||||
if (CKN_ON) {
|
||||
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
||||
if (r) return r;
|
||||
r = add_state_check_num(reg, ckn);
|
||||
if (r) return r;
|
||||
r = add_rel_addr(reg, SIZE_OP_JUMP);
|
||||
}
|
||||
else {
|
||||
r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
||||
}
|
||||
|
||||
if (r) return r;
|
||||
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
||||
if (r) return r;
|
||||
r = compile_tree(qn->target, reg);
|
||||
}
|
||||
else {
|
||||
r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
||||
if (CKN_ON) {
|
||||
if (r) return r;
|
||||
r = add_opcode(reg, OP_STATE_CHECK);
|
||||
if (r) return r;
|
||||
r = add_state_check_num(reg, ckn);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
#else /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
static int
|
||||
compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
@ -751,16 +1011,6 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
is_anychar_star_qualifier(QualifierNode* qn)
|
||||
{
|
||||
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
||||
NTYPE(qn->target) == N_ANYCHAR)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
{
|
||||
@ -887,6 +1137,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
|
||||
}
|
||||
return r;
|
||||
}
|
||||
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
static int
|
||||
compile_length_option_node(EffectNode* node, regex_t* reg)
|
||||
@ -1435,7 +1686,9 @@ compile_tree(Node* node, regex_t* reg)
|
||||
}
|
||||
if (r) return r;
|
||||
|
||||
#ifdef USE_BACKREF_AT_LEVEL
|
||||
add_bacref_mems:
|
||||
#endif
|
||||
r = add_length(reg, br->back_num);
|
||||
if (r) return r;
|
||||
p = BACKREFS_P(br);
|
||||
@ -3040,6 +3293,146 @@ divide_ambig_string_node(Node* node, regex_t* reg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
#define CEC_THRES_NUM_BIG_REPEAT 512
|
||||
#define CEC_INFINITE_NUM 0x7fffffff
|
||||
|
||||
#define CEC_IN_INFINITE_REPEAT (1<<0)
|
||||
#define CEC_IN_FINITE_REPEAT (1<<1)
|
||||
#define CEC_CONT_BIG_REPEAT (1<<2)
|
||||
|
||||
static int
|
||||
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
||||
{
|
||||
int type;
|
||||
int r = state;
|
||||
|
||||
type = NTYPE(node);
|
||||
switch (type) {
|
||||
case N_LIST:
|
||||
{
|
||||
Node* prev = NULL_NODE;
|
||||
do {
|
||||
r = setup_comb_exp_check(NCONS(node).left, r, env);
|
||||
prev = NCONS(node).left;
|
||||
} while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
}
|
||||
break;
|
||||
|
||||
case N_ALT:
|
||||
{
|
||||
int ret;
|
||||
do {
|
||||
ret = setup_comb_exp_check(NCONS(node).left, state, env);
|
||||
r |= ret;
|
||||
} while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
|
||||
}
|
||||
break;
|
||||
|
||||
case N_QUALIFIER:
|
||||
{
|
||||
int child_state = state;
|
||||
int add_state = 0;
|
||||
QualifierNode* qn = &(NQUALIFIER(node));
|
||||
Node* target = qn->target;
|
||||
int var_num;
|
||||
|
||||
if (! IS_REPEAT_INFINITE(qn->upper)) {
|
||||
if (qn->upper > 1) {
|
||||
/* {0,1}, {1,1} are allowed */
|
||||
child_state |= CEC_IN_FINITE_REPEAT;
|
||||
|
||||
/* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
|
||||
if (env->backrefed_mem == 0) {
|
||||
if (NTYPE(qn->target) == N_EFFECT) {
|
||||
EffectNode* en = &(NEFFECT(qn->target));
|
||||
if (en->type == EFFECT_MEMORY) {
|
||||
if (NTYPE(en->target) == N_QUALIFIER) {
|
||||
QualifierNode* q = &(NQUALIFIER(en->target));
|
||||
if (IS_REPEAT_INFINITE(q->upper)
|
||||
&& q->greedy == qn->greedy) {
|
||||
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
||||
if (qn->upper == 1)
|
||||
child_state = state;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state & CEC_IN_FINITE_REPEAT) {
|
||||
qn->comb_exp_check_num = -1;
|
||||
}
|
||||
else {
|
||||
if (IS_REPEAT_INFINITE(qn->upper)) {
|
||||
var_num = CEC_INFINITE_NUM;
|
||||
child_state |= CEC_IN_INFINITE_REPEAT;
|
||||
}
|
||||
else {
|
||||
var_num = qn->upper - qn->lower;
|
||||
}
|
||||
|
||||
if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
|
||||
add_state |= CEC_CONT_BIG_REPEAT;
|
||||
|
||||
if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
|
||||
((state & CEC_CONT_BIG_REPEAT) != 0 &&
|
||||
var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
|
||||
if (qn->comb_exp_check_num == 0) {
|
||||
env->num_comb_exp_check++;
|
||||
qn->comb_exp_check_num = env->num_comb_exp_check;
|
||||
if (env->curr_max_regnum > env->comb_exp_max_regnum)
|
||||
env->comb_exp_max_regnum = env->curr_max_regnum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r = setup_comb_exp_check(target, child_state, env);
|
||||
r |= add_state;
|
||||
}
|
||||
break;
|
||||
|
||||
case N_EFFECT:
|
||||
{
|
||||
EffectNode* en = &(NEFFECT(node));
|
||||
|
||||
switch (en->type) {
|
||||
case EFFECT_MEMORY:
|
||||
{
|
||||
if (env->curr_max_regnum < en->regnum)
|
||||
env->curr_max_regnum = en->regnum;
|
||||
|
||||
r = setup_comb_exp_check(en->target, state, env);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
r = setup_comb_exp_check(en->target, state, env);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
case N_CALL:
|
||||
if (IS_CALL_RECURSION(&(NCALL(node))))
|
||||
env->has_recursion = 1;
|
||||
else
|
||||
r = setup_comb_exp_check(NCALL(node).target, state, env);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define IN_ALT (1<<0)
|
||||
#define IN_NOT (1<<1)
|
||||
#define IN_REPEAT (1<<2)
|
||||
@ -3600,9 +3993,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
|
||||
}
|
||||
|
||||
static void
|
||||
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
|
||||
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
|
||||
{
|
||||
int i, n;
|
||||
int i, j, len;
|
||||
UChar *p, *end;
|
||||
OptAncInfo tanc;
|
||||
|
||||
if (! to->ignore_case && add->ignore_case) {
|
||||
@ -3611,11 +4005,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
|
||||
to->ignore_case = 1;
|
||||
}
|
||||
|
||||
for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
|
||||
to->s[i] = add->s[n];
|
||||
p = add->s;
|
||||
end = p + add->len;
|
||||
for (i = to->len; p < end; ) {
|
||||
len = enc_len(enc, p);
|
||||
if (i + len > OPT_EXACT_MAXLEN) break;
|
||||
for (j = 0; j < len && p < end; j++)
|
||||
to->s[i++] = *p++;
|
||||
}
|
||||
|
||||
to->len = i;
|
||||
to->reach_end = (n == add->len ? add->reach_end : 0);
|
||||
to->reach_end = (p == end ? add->reach_end : 0);
|
||||
|
||||
concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
||||
if (! to->reach_end) tanc.right_anchor = 0;
|
||||
@ -3630,15 +4030,10 @@ concat_opt_exact_info_str(OptExactInfo* to,
|
||||
UChar *p;
|
||||
|
||||
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
||||
if (raw) {
|
||||
len = enc_len(enc, p);
|
||||
if (i + len > OPT_EXACT_MAXLEN) break;
|
||||
for (j = 0; j < len && p < end; j++)
|
||||
to->s[i++] = *p++;
|
||||
}
|
||||
else {
|
||||
len = enc_len(enc, p);
|
||||
if (i + len > OPT_EXACT_MAXLEN) break;
|
||||
for (j = 0; j < len; j++)
|
||||
to->s[i++] = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
to->len = i;
|
||||
@ -3903,11 +4298,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
||||
|
||||
if (add->exb.len > 0) {
|
||||
if (exb_reach) {
|
||||
concat_opt_exact_info(&to->exb, &add->exb);
|
||||
concat_opt_exact_info(&to->exb, &add->exb, enc);
|
||||
clear_opt_exact_info(&add->exb);
|
||||
}
|
||||
else if (exm_reach) {
|
||||
concat_opt_exact_info(&to->exm, &add->exb);
|
||||
concat_opt_exact_info(&to->exm, &add->exb, enc);
|
||||
clear_opt_exact_info(&add->exb);
|
||||
}
|
||||
}
|
||||
@ -4206,7 +4601,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
||||
if (nopt.exb.reach_end) {
|
||||
for (i = 2; i < qn->lower &&
|
||||
! is_full_opt_exact_info(&opt->exb); i++) {
|
||||
concat_opt_exact_info(&opt->exb, &nopt.exb);
|
||||
concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
|
||||
}
|
||||
if (i < qn->lower) {
|
||||
opt->exb.reach_end = 0;
|
||||
@ -4744,6 +5139,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
reg->num_null_check = 0;
|
||||
reg->repeat_range_alloc = 0;
|
||||
reg->repeat_range = (OnigRepeatRange* )NULL;
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
reg->num_comb_exp_check = 0;
|
||||
#endif
|
||||
|
||||
r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
|
||||
if (r != 0) goto err;
|
||||
@ -4797,6 +5195,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
reg->bt_mem_end |= reg->capture_history;
|
||||
}
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
if (scan_env.backrefed_mem == 0
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
|| scan_env.num_call == 0
|
||||
#endif
|
||||
) {
|
||||
setup_comb_exp_check(root, 0, &scan_env);
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
if (scan_env.has_recursion != 0) {
|
||||
scan_env.num_comb_exp_check = 0;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (scan_env.comb_exp_max_regnum > 0) {
|
||||
int i;
|
||||
for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
|
||||
if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
|
||||
scan_env.num_comb_exp_check = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reg->num_comb_exp_check = scan_env.num_comb_exp_check;
|
||||
#endif
|
||||
|
||||
clear_optimize_info(reg);
|
||||
#ifndef ONIG_DONT_OPTIMIZE
|
||||
r = set_optimize_info_from_tree(root, reg, &scan_env);
|
||||
@ -5006,6 +5431,16 @@ onig_end()
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
/* arguments type */
|
||||
#define ARG_SPECIAL -1
|
||||
#define ARG_NON 0
|
||||
#define ARG_RELADDR 1
|
||||
#define ARG_ABSADDR 2
|
||||
#define ARG_LENGTH 3
|
||||
#define ARG_MEMNUM 4
|
||||
#define ARG_OPTION 5
|
||||
#define ARG_STATE_CHECK 6
|
||||
|
||||
OnigOpInfoType OnigOpInfo[] = {
|
||||
{ OP_FINISH, "finish", ARG_NON },
|
||||
{ OP_END, "end", ARG_NON },
|
||||
@ -5036,63 +5471,73 @@ OnigOpInfoType OnigOpInfo[] = {
|
||||
{ OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
|
||||
{ OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
|
||||
{ OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
||||
{ OP_WORD, "word", ARG_NON },
|
||||
{ OP_NOT_WORD, "not-word", ARG_NON },
|
||||
{ OP_WORD_SB, "word-sb", ARG_NON },
|
||||
{ OP_WORD_MB, "word-mb", ARG_NON },
|
||||
{ OP_WORD_BOUND, "word-bound", ARG_NON },
|
||||
{ OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
||||
{ OP_WORD_BEGIN, "word-begin", ARG_NON },
|
||||
{ OP_WORD_END, "word-end", ARG_NON },
|
||||
{ OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
||||
{ OP_END_BUF, "end-buf", ARG_NON },
|
||||
{ OP_BEGIN_LINE, "begin-line", ARG_NON },
|
||||
{ OP_END_LINE, "end-line", ARG_NON },
|
||||
{ OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
||||
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
||||
{ OP_BACKREF1, "backref1", ARG_NON },
|
||||
{ OP_BACKREF2, "backref2", ARG_NON },
|
||||
{ OP_BACKREF3, "backref3", ARG_NON },
|
||||
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
||||
{ OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
||||
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
||||
{ OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
||||
{ OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
|
||||
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
||||
{ OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
||||
{ OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
||||
{ OP_SET_OPTION, "set-option", ARG_OPTION },
|
||||
{ OP_FAIL, "fail", ARG_NON },
|
||||
{ OP_JUMP, "jump", ARG_RELADDR },
|
||||
{ OP_PUSH, "push", ARG_RELADDR },
|
||||
{ OP_POP, "pop", ARG_NON },
|
||||
{ OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
||||
{ OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
||||
{ OP_REPEAT, "repeat", ARG_SPECIAL },
|
||||
{ OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
||||
{ OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
||||
{ OP_PUSH_POS, "push-pos", ARG_NON },
|
||||
{ OP_POP_POS, "pop-pos", ARG_NON },
|
||||
{ OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
||||
{ OP_FAIL_POS, "fail-pos", ARG_NON },
|
||||
{ OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
||||
{ OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
||||
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
||||
{ OP_WORD, "word", ARG_NON },
|
||||
{ OP_NOT_WORD, "not-word", ARG_NON },
|
||||
{ OP_WORD_SB, "word-sb", ARG_NON },
|
||||
{ OP_WORD_MB, "word-mb", ARG_NON },
|
||||
{ OP_WORD_BOUND, "word-bound", ARG_NON },
|
||||
{ OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
||||
{ OP_WORD_BEGIN, "word-begin", ARG_NON },
|
||||
{ OP_WORD_END, "word-end", ARG_NON },
|
||||
{ OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
||||
{ OP_END_BUF, "end-buf", ARG_NON },
|
||||
{ OP_BEGIN_LINE, "begin-line", ARG_NON },
|
||||
{ OP_END_LINE, "end-line", ARG_NON },
|
||||
{ OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
||||
{ OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
||||
{ OP_BACKREF1, "backref1", ARG_NON },
|
||||
{ OP_BACKREF2, "backref2", ARG_NON },
|
||||
{ OP_BACKREF3, "backref3", ARG_NON },
|
||||
{ OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
||||
{ OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
||||
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
||||
{ OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
||||
{ OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
|
||||
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
||||
{ OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
||||
{ OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
||||
{ OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
||||
{ OP_SET_OPTION, "set-option", ARG_OPTION },
|
||||
{ OP_FAIL, "fail", ARG_NON },
|
||||
{ OP_JUMP, "jump", ARG_RELADDR },
|
||||
{ OP_PUSH, "push", ARG_RELADDR },
|
||||
{ OP_POP, "pop", ARG_NON },
|
||||
{ OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
||||
{ OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
||||
{ OP_REPEAT, "repeat", ARG_SPECIAL },
|
||||
{ OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
||||
{ OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
||||
{ OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
||||
{ OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
||||
{ OP_PUSH_POS, "push-pos", ARG_NON },
|
||||
{ OP_POP_POS, "pop-pos", ARG_NON },
|
||||
{ OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
||||
{ OP_FAIL_POS, "fail-pos", ARG_NON },
|
||||
{ OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
||||
{ OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
||||
{ OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
||||
{ OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
|
||||
{ OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
|
||||
{ OP_CALL, "call", ARG_ABSADDR },
|
||||
{ OP_RETURN, "return", ARG_NON },
|
||||
{ OP_CALL, "call", ARG_ABSADDR },
|
||||
{ OP_RETURN, "return", ARG_NON },
|
||||
{ OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
|
||||
{ OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
|
||||
{ OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
|
||||
{ OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
|
||||
{ OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
||||
"state-check-anychar-ml*", ARG_STATE_CHECK },
|
||||
{ OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
|
||||
"state-check-anychar*-peek-next", ARG_SPECIAL },
|
||||
{ OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT,
|
||||
"state-check-anychar-ml*-peek-next", ARG_SPECIAL },
|
||||
{ -1, "", ARG_NON }
|
||||
};
|
||||
|
||||
@ -5151,6 +5596,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
||||
RelAddrType addr;
|
||||
LengthType len;
|
||||
MemNumType mem;
|
||||
StateCheckNumType scn;
|
||||
OnigCodePoint code;
|
||||
UChar *q;
|
||||
|
||||
@ -5185,6 +5631,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
||||
fprintf(f, ":%d", option);
|
||||
}
|
||||
break;
|
||||
|
||||
case ARG_STATE_CHECK:
|
||||
scn = *((StateCheckNumType* )bp);
|
||||
bp += SIZE_STATE_CHECK_NUM;
|
||||
fprintf(f, ":%d", scn);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -5362,6 +5814,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
||||
fprintf(f, ":%d:(%d)", len, addr);
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_PUSH:
|
||||
case OP_STATE_CHECK_PUSH_OR_JUMP:
|
||||
scn = *((StateCheckNumType* )bp);
|
||||
bp += SIZE_STATE_CHECK_NUM;
|
||||
addr = *((RelAddrType* )bp);
|
||||
bp += SIZE_RELADDR;
|
||||
fprintf(f, ":%d:(%d)", scn, addr);
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
|
||||
case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
|
||||
scn = *((StateCheckNumType* )bp);
|
||||
bp += SIZE_STATE_CHECK_NUM;
|
||||
fprintf(f, ":%d", scn);
|
||||
p_string(f, 1, bp);
|
||||
bp += 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
|
||||
*--bp);
|
||||
|
@ -306,6 +306,9 @@ typedef struct _StackType {
|
||||
UChar *pcode; /* byte code position */
|
||||
UChar *pstr; /* string position */
|
||||
UChar *pstr_prev; /* previous char position of pstr */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
unsigned int state_check;
|
||||
#endif
|
||||
} state;
|
||||
struct {
|
||||
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
|
||||
@ -339,29 +342,28 @@ typedef struct _StackType {
|
||||
/* stack type */
|
||||
/* used by normal-POP */
|
||||
#define STK_ALT 0x0001
|
||||
#define STK_LOOK_BEHIND_NOT 0x0003
|
||||
#define STK_POS_NOT 0x0005
|
||||
/* avoided by normal-POP, but value should be small */
|
||||
#define STK_NULL_CHECK_START 0x0100
|
||||
#define STK_LOOK_BEHIND_NOT 0x0002
|
||||
#define STK_POS_NOT 0x0003
|
||||
/* handled by normal-POP */
|
||||
#define STK_MEM_START 0x0200
|
||||
#define STK_MEM_END 0x0300
|
||||
#define STK_REPEAT_INC 0x0400
|
||||
#define STK_MEM_START 0x0100
|
||||
#define STK_MEM_END 0x8200
|
||||
#define STK_REPEAT_INC 0x0300
|
||||
#define STK_STATE_CHECK_MARK 0x1000
|
||||
/* avoided by normal-POP */
|
||||
#define STK_NULL_CHECK_START 0x3000
|
||||
#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
|
||||
#define STK_MEM_END_MARK 0x8400
|
||||
#define STK_POS 0x0500 /* used when POP-POS */
|
||||
#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
|
||||
#define STK_REPEAT 0x0700
|
||||
#define STK_CALL_FRAME 0x0800
|
||||
#define STK_RETURN 0x0900
|
||||
#define STK_MEM_END_MARK 0x0a00
|
||||
#define STK_VOID 0x0b00 /* for fill a blank */
|
||||
#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */
|
||||
#define STK_VOID 0x0a00 /* for fill a blank */
|
||||
|
||||
/* stack type check mask */
|
||||
#define STK_MASK_POP_USED 0x00ff
|
||||
#define IS_TO_VOID_TARGET(stk) \
|
||||
(((stk)->type & STK_MASK_POP_USED) || \
|
||||
(stk)->type == STK_NULL_CHECK_START || (stk)->type == STK_NULL_CHECK_END)
|
||||
#define STK_MASK_POP_USED 0x00ff
|
||||
#define STK_MASK_TO_VOID_TARGET 0x10ff
|
||||
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
|
||||
|
||||
typedef struct {
|
||||
void* stack_p;
|
||||
@ -369,6 +371,10 @@ typedef struct {
|
||||
OnigOptionType options;
|
||||
OnigRegion* region;
|
||||
const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
void* state_check_buff;
|
||||
int state_check_buff_size;
|
||||
#endif
|
||||
} MatchArg;
|
||||
|
||||
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
|
||||
@ -378,7 +384,36 @@ typedef struct {
|
||||
(msa).start = (arg_start);\
|
||||
} while (0)
|
||||
|
||||
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
|
||||
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num) do { \
|
||||
(msa).state_check_buff = (void* )0;\
|
||||
if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
|
||||
int size = ((int )((str_len) + 1) * (state_num) + 7) / 8;\
|
||||
(msa).state_check_buff_size = size; \
|
||||
if (size > 0 && size < STATE_CHECK_BUFF_MAX_SIZE) {\
|
||||
if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
|
||||
(msa).state_check_buff = (void* )xmalloc(size);\
|
||||
else \
|
||||
(msa).state_check_buff = (void* )xalloca(size);\
|
||||
xmemset((msa).state_check_buff, 0, (size_t )size);\
|
||||
}\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define MATCH_ARG_FREE(msa) do {\
|
||||
if ((msa).stack_p) xfree((msa).stack_p);\
|
||||
if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
|
||||
if ((msa).state_check_buff) xfree((msa).state_check_buff);\
|
||||
}\
|
||||
} while (0);
|
||||
#else
|
||||
#define STATE_CHECK_BUFF_INIT(msa, str_len, state_num)
|
||||
#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
|
||||
@ -472,6 +507,73 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
#define STACK_AT(index) (stk_base + (index))
|
||||
#define GET_STACK_INDEX(stk) ((stk) - stk_base)
|
||||
|
||||
#define STACK_PUSH_TYPE(stack_type) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = (stack_type);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
#define STATE_CHECK_POS(s,snum) \
|
||||
(((s) - str) * num_comb_exp_check + ((snum) - 1))
|
||||
#define STATE_CHECK_VAL(v,snum) do {\
|
||||
if (state_check_buff != NULL) {\
|
||||
int x = STATE_CHECK_POS(s,snum);\
|
||||
(v) = state_check_buff[x/8] & (1<<(x%8));\
|
||||
}\
|
||||
else (v) = 0;\
|
||||
} while(0)
|
||||
|
||||
|
||||
#define ELSE_IF_STATE_CHECK_MARK(stk) \
|
||||
else if ((stk)->type == STK_STATE_CHECK_MARK) { \
|
||||
int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
|
||||
state_check_buff[x/8] |= (1<<(x%8)); \
|
||||
}
|
||||
|
||||
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = (stack_type);\
|
||||
stk->u.state.pcode = (pat);\
|
||||
stk->u.state.pstr = (s);\
|
||||
stk->u.state.pstr_prev = (sprev);\
|
||||
stk->u.state.state_check = 0;\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
#define STACK_PUSH_ENSURED(stack_type,pat) do {\
|
||||
stk->type = (stack_type);\
|
||||
stk->u.state.pcode = (pat);\
|
||||
stk->u.state.state_check = 0;\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = STK_ALT;\
|
||||
stk->u.state.pcode = (pat);\
|
||||
stk->u.state.pstr = (s);\
|
||||
stk->u.state.pstr_prev = (sprev);\
|
||||
stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
#define STACK_PUSH_STATE_CHECK(s,snum) do {\
|
||||
if (state_check_buff != NULL) {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = STK_STATE_CHECK_MARK;\
|
||||
stk->u.state.pstr = (s);\
|
||||
stk->u.state.state_check = (snum);\
|
||||
STACK_INC;\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
#else /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
#define ELSE_IF_STATE_CHECK_MARK(stk)
|
||||
|
||||
#define STACK_PUSH(stack_type,pat,s,sprev) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = (stack_type);\
|
||||
@ -486,12 +588,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
stk->u.state.pcode = (pat);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
|
||||
#define STACK_PUSH_TYPE(stack_type) do {\
|
||||
STACK_ENSURE(1);\
|
||||
stk->type = (stack_type);\
|
||||
STACK_INC;\
|
||||
} while(0)
|
||||
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
|
||||
#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
|
||||
@ -551,7 +648,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
k = stk;\
|
||||
while (k > stk_base) {\
|
||||
k--;\
|
||||
if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \
|
||||
if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
|
||||
&& k->u.mem.num == (mnum)) {\
|
||||
level++;\
|
||||
}\
|
||||
@ -631,6 +728,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
stk--;\
|
||||
STACK_BASE_CHECK(stk, "STACK_POP"); \
|
||||
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
|
||||
ELSE_IF_STATE_CHECK_MARK(stk);\
|
||||
}\
|
||||
break;\
|
||||
case STACK_POP_LEVEL_MEM_START:\
|
||||
@ -642,6 +740,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
|
||||
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
|
||||
}\
|
||||
ELSE_IF_STATE_CHECK_MARK(stk);\
|
||||
}\
|
||||
break;\
|
||||
default:\
|
||||
@ -660,6 +759,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
|
||||
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
|
||||
}\
|
||||
ELSE_IF_STATE_CHECK_MARK(stk);\
|
||||
}\
|
||||
break;\
|
||||
}\
|
||||
@ -681,6 +781,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
|
||||
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
|
||||
}\
|
||||
ELSE_IF_STATE_CHECK_MARK(stk);\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
@ -700,6 +801,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
|
||||
mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
|
||||
mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
|
||||
}\
|
||||
ELSE_IF_STATE_CHECK_MARK(stk);\
|
||||
}\
|
||||
} while(0)
|
||||
|
||||
@ -947,6 +1049,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
|
||||
is_fail = 0; \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define ON_STR_BEGIN(s) ((s) == str)
|
||||
#define ON_STR_END(s) ((s) == end)
|
||||
#define IS_EMPTY_STR (str == end)
|
||||
@ -1314,6 +1417,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
StackIndex si;
|
||||
StackIndex *repeat_stk;
|
||||
StackIndex *mem_start_stk, *mem_end_stk;
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int scv;
|
||||
unsigned char* state_check_buff = msa->state_check_buff;
|
||||
int num_comb_exp_check = reg->num_comb_exp_check;
|
||||
#endif
|
||||
n = reg->num_repeat + reg->num_mem * 2;
|
||||
|
||||
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
|
||||
@ -1924,6 +2032,94 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
while (s < end) {
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
|
||||
n = enc_len(encode, s);
|
||||
DATA_ENSURE(n);
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
sprev = s;
|
||||
s += n;
|
||||
}
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_ANYCHAR_ML_STAR:
|
||||
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
|
||||
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
while (s < end) {
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
|
||||
n = enc_len(encode, s);
|
||||
if (n > 1) {
|
||||
DATA_ENSURE(n);
|
||||
sprev = s;
|
||||
s += n;
|
||||
}
|
||||
else {
|
||||
sprev = s;
|
||||
s++;
|
||||
}
|
||||
}
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT:
|
||||
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT);
|
||||
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
while (s < end) {
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
if (*p == *s) {
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
|
||||
}
|
||||
n = enc_len(encode, s);
|
||||
DATA_ENSURE(n);
|
||||
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
|
||||
sprev = s;
|
||||
s += n;
|
||||
}
|
||||
p++;
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT:
|
||||
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT);
|
||||
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
while (s < end) {
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
if (*p == *s) {
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p + 1, s, sprev, mem);
|
||||
}
|
||||
n = enc_len(encode, s);
|
||||
if (n >1) {
|
||||
DATA_ENSURE(n);
|
||||
sprev = s;
|
||||
s += n;
|
||||
}
|
||||
else {
|
||||
sprev = s;
|
||||
s++;
|
||||
}
|
||||
}
|
||||
p++;
|
||||
STAT_OP_OUT;
|
||||
break;
|
||||
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
case OP_WORD: STAT_OP_IN(OP_WORD);
|
||||
DATA_ENSURE(1);
|
||||
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
|
||||
@ -2451,6 +2647,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
continue;
|
||||
break;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH);
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
GET_RELADDR_INC(addr, p);
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
|
||||
STAT_OP_OUT;
|
||||
continue;
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
GET_RELADDR_INC(addr, p);
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) {
|
||||
p += addr;
|
||||
}
|
||||
else {
|
||||
STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
|
||||
}
|
||||
STAT_OP_OUT;
|
||||
continue;
|
||||
break;
|
||||
|
||||
case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK);
|
||||
GET_STATE_CHECK_NUM_INC(mem, p);
|
||||
STATE_CHECK_VAL(scv, mem);
|
||||
if (scv) goto fail;
|
||||
|
||||
STACK_PUSH_STATE_CHECK(s, mem);
|
||||
STAT_OP_OUT;
|
||||
continue;
|
||||
break;
|
||||
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
||||
|
||||
case OP_POP: STAT_OP_IN(OP_POP);
|
||||
STACK_POP_ONE;
|
||||
STAT_OP_OUT;
|
||||
@ -2525,7 +2758,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
|
||||
repeat_inc:
|
||||
stkp->u.repeat.count++;
|
||||
if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
|
||||
if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
|
||||
/* end of repeat. Nothing to do. */
|
||||
}
|
||||
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
|
||||
@ -2555,8 +2788,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
|
||||
repeat_inc_ng:
|
||||
stkp->u.repeat.count++;
|
||||
if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
|
||||
IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
|
||||
if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
|
||||
if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
|
||||
UChar* pcode = stkp->u.repeat.pcode;
|
||||
|
||||
@ -2685,6 +2917,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
|
||||
p = stk->u.state.pcode;
|
||||
s = stk->u.state.pstr;
|
||||
sprev = stk->u.state.pstr_prev;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
if (stk->u.state.state_check != 0) {
|
||||
stk->type = STK_STATE_CHECK_MARK;
|
||||
stk++;
|
||||
}
|
||||
#endif
|
||||
|
||||
STAT_OP_OUT;
|
||||
continue;
|
||||
break;
|
||||
@ -2869,66 +3109,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
|
||||
const UChar* text, const UChar* text_end,
|
||||
const UChar* text_range)
|
||||
{
|
||||
const UChar *s, *t, *p, *end;
|
||||
const UChar *s, *se, *t, *p, *end;
|
||||
const UChar *tail;
|
||||
int skip;
|
||||
int skip, tlen1;
|
||||
|
||||
#ifdef ONIG_DEBUG_SEARCH
|
||||
fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
|
||||
(int )text, (int )text_end, (int )text_range);
|
||||
#endif
|
||||
|
||||
end = text_range + (target_end - target) - 1;
|
||||
tlen1 = (target_end - target) - 1;
|
||||
end = text_range + tlen1;
|
||||
if (end > text_end)
|
||||
end = text_end;
|
||||
|
||||
tail = target_end - 1;
|
||||
s = text;
|
||||
while ((s - text) < target_end - target) {
|
||||
s += enc_len(reg->enc, s);
|
||||
}
|
||||
s--; /* set to text check tail position. */
|
||||
|
||||
if (IS_NULL(reg->int_map)) {
|
||||
while (s < end) {
|
||||
p = s;
|
||||
p = se = s + tlen1;
|
||||
t = tail;
|
||||
while (t >= target && *p == *t) {
|
||||
p--; t--;
|
||||
while (*p == *t && t >= target) {
|
||||
p--; t--;
|
||||
}
|
||||
if (t < target) return (UChar* )(p + 1);
|
||||
if (t < target) return (UChar* )s;
|
||||
|
||||
skip = reg->map[*s];
|
||||
p = s + 1;
|
||||
if (p >= text_end) return (UChar* )NULL;
|
||||
t = p;
|
||||
skip = reg->map[*se];
|
||||
t = s;
|
||||
do {
|
||||
p += enc_len(reg->enc, p);
|
||||
} while ((p - t) < skip && p < text_end);
|
||||
|
||||
s += (p - t);
|
||||
s += enc_len(reg->enc, s);
|
||||
} while ((s - t) < skip && s < end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (s < end) {
|
||||
p = s;
|
||||
p = se = s + tlen1;
|
||||
t = tail;
|
||||
while (t >= target && *p == *t) {
|
||||
p--; t--;
|
||||
while (*p == *t && t >= target) {
|
||||
p--; t--;
|
||||
}
|
||||
if (t < target) return (UChar* )(p + 1);
|
||||
if (t < target) return (UChar* )s;
|
||||
|
||||
skip = reg->int_map[*s];
|
||||
p = s + 1;
|
||||
if (p >= text_end) return (UChar* )NULL;
|
||||
t = p;
|
||||
skip = reg->int_map[*se];
|
||||
t = s;
|
||||
do {
|
||||
p += enc_len(reg->enc, p);
|
||||
} while ((p - t) < skip && p < text_end);
|
||||
|
||||
s += (p - t);
|
||||
s += enc_len(reg->enc, s);
|
||||
} while ((s - t) < skip && s < end);
|
||||
}
|
||||
}
|
||||
|
||||
return (UChar* )NULL;
|
||||
}
|
||||
|
||||
@ -3083,6 +3313,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
|
||||
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, at);
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
|
||||
|
||||
if (region
|
||||
#ifdef USE_POSIX_REGION_OPTION
|
||||
@ -3343,6 +3574,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
int r;
|
||||
UChar *s, *prev;
|
||||
MatchArg msa;
|
||||
const UChar *orig_start = start;
|
||||
|
||||
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
|
||||
start:
|
||||
@ -3484,6 +3716,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
prev = (UChar* )NULL;
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, start);
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
msa.state_check_buff = (void* )0;
|
||||
#endif
|
||||
MATCH_AND_RETURN_CHECK;
|
||||
goto mismatch;
|
||||
}
|
||||
@ -3495,7 +3730,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
(int )(end - str), (int )(start - str), (int )(range - str));
|
||||
#endif
|
||||
|
||||
MATCH_ARG_INIT(msa, option, region, start);
|
||||
MATCH_ARG_INIT(msa, option, region, orig_start);
|
||||
STATE_CHECK_BUFF_INIT(msa, end - str, reg->num_comb_exp_check);
|
||||
|
||||
s = (UChar* )start;
|
||||
if (range > start) { /* forward search */
|
||||
|
@ -59,7 +59,7 @@
|
||||
/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
|
||||
#define USE_NAMED_GROUP
|
||||
#define USE_SUBEXP_CALL
|
||||
#define USE_BACKREF_AT_LEVEL
|
||||
#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */
|
||||
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
@ -82,6 +82,7 @@
|
||||
/* interface to external system */
|
||||
#ifdef NOT_RUBY /* given from Makefile */
|
||||
#include "config.h"
|
||||
#define USE_BACKREF_AT_LEVEL
|
||||
#define USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
@ -117,6 +118,9 @@
|
||||
|
||||
#endif /* else NOT_RUBY */
|
||||
|
||||
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
|
||||
#define STATE_CHECK_BUFF_MAX_SIZE 0x08000000
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 8
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
@ -639,34 +643,35 @@ enum OpCode {
|
||||
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
|
||||
|
||||
OP_CALL, /* \g<name> */
|
||||
OP_RETURN
|
||||
};
|
||||
OP_RETURN,
|
||||
|
||||
/* arguments type */
|
||||
#define ARG_SPECIAL -1
|
||||
#define ARG_NON 0
|
||||
#define ARG_RELADDR 1
|
||||
#define ARG_ABSADDR 2
|
||||
#define ARG_LENGTH 3
|
||||
#define ARG_MEMNUM 4
|
||||
#define ARG_OPTION 5
|
||||
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
|
||||
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
|
||||
OP_STATE_CHECK, /* check only */
|
||||
OP_STATE_CHECK_ANYCHAR_STAR,
|
||||
OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
||||
OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT,
|
||||
OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT
|
||||
};
|
||||
|
||||
typedef int RelAddrType;
|
||||
typedef int AbsAddrType;
|
||||
typedef int LengthType;
|
||||
typedef int RepeatNumType;
|
||||
typedef short int MemNumType;
|
||||
typedef short int StateCheckNumType;
|
||||
typedef void* PointerType;
|
||||
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR sizeof(RelAddrType)
|
||||
#define SIZE_ABSADDR sizeof(AbsAddrType)
|
||||
#define SIZE_LENGTH sizeof(LengthType)
|
||||
#define SIZE_MEMNUM sizeof(MemNumType)
|
||||
#define SIZE_REPEATNUM sizeof(RepeatNumType)
|
||||
#define SIZE_OPTION sizeof(OnigOptionType)
|
||||
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
|
||||
#define SIZE_POINTER sizeof(PointerType)
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR sizeof(RelAddrType)
|
||||
#define SIZE_ABSADDR sizeof(AbsAddrType)
|
||||
#define SIZE_LENGTH sizeof(LengthType)
|
||||
#define SIZE_MEMNUM sizeof(MemNumType)
|
||||
#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
|
||||
#define SIZE_REPEATNUM sizeof(RepeatNumType)
|
||||
#define SIZE_OPTION sizeof(OnigOptionType)
|
||||
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
|
||||
#define SIZE_POINTER sizeof(PointerType)
|
||||
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
@ -692,6 +697,7 @@ typedef void* PointerType;
|
||||
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
|
||||
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
|
||||
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
|
||||
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
|
||||
|
||||
/* code point's address must be aligned address. */
|
||||
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
|
||||
@ -734,6 +740,13 @@ typedef void* PointerType;
|
||||
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
|
||||
#define SIZE_OP_RETURN SIZE_OPCODE
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1 + SIZE_STATE_CHECK_NUM)
|
||||
#endif
|
||||
|
||||
#define MC_ESC(enc) (enc)->meta_char_table.esc
|
||||
#define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar
|
||||
|
@ -940,6 +940,13 @@ scan_env_clear(ScanEnv* env)
|
||||
|
||||
for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
|
||||
env->mem_nodes_static[i] = NULL_NODE;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
env->num_comb_exp_check = 0;
|
||||
env->comb_exp_max_regnum = 0;
|
||||
env->curr_max_regnum = 0;
|
||||
env->has_recursion = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1321,11 +1328,17 @@ node_new_qualifier(int lower, int upper, int by_number)
|
||||
NQUALIFIER(node).lower = lower;
|
||||
NQUALIFIER(node).upper = upper;
|
||||
NQUALIFIER(node).greedy = 1;
|
||||
NQUALIFIER(node).by_number = by_number;
|
||||
NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
|
||||
NQUALIFIER(node).head_exact = NULL_NODE;
|
||||
NQUALIFIER(node).next_head_exact = NULL_NODE;
|
||||
NQUALIFIER(node).is_refered = 0;
|
||||
if (by_number != 0)
|
||||
NQUALIFIER(node).state |= NST_BY_NUMBER;
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
NQUALIFIER(node).comb_exp_check_num = 0;
|
||||
#endif
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -2140,7 +2153,7 @@ enum ReduceType {
|
||||
RQ_AQ, /* to '*?' */
|
||||
RQ_QQ, /* to '??' */
|
||||
RQ_P_QQ, /* to '+)??' */
|
||||
RQ_PQ_Q, /* to '+?)?' */
|
||||
RQ_PQ_Q /* to '+?)?' */
|
||||
};
|
||||
|
||||
static enum ReduceType ReduceTypeTable[6][6] = {
|
||||
@ -4633,16 +4646,14 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
{ /* check redundant double repeat. */
|
||||
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
|
||||
QualifierNode* qnt = &(NQUALIFIER(target));
|
||||
int nestq_num = popular_qualifier_num(qn);
|
||||
int targetq_num = popular_qualifier_num(qnt);
|
||||
|
||||
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
if (qn->by_number == 0 && qnt->by_number == 0 &&
|
||||
if (!IS_QUALIFIER_BY_NUMBER(qn) && !IS_QUALIFIER_BY_NUMBER(qnt) &&
|
||||
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
|
||||
int nestq_num, targetq_num;
|
||||
UChar buf[WARN_BUFSIZE];
|
||||
|
||||
nestq_num = popular_qualifier_num(qn);
|
||||
targetq_num = popular_qualifier_num(qnt);
|
||||
|
||||
switch(ReduceTypeTable[targetq_num][nestq_num]) {
|
||||
case RQ_ASIS:
|
||||
break;
|
||||
@ -4673,9 +4684,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
|
||||
|
||||
warn_exit:
|
||||
#endif
|
||||
if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
|
||||
onig_reduce_nested_qualifier(qnode, target);
|
||||
goto q_exit;
|
||||
if (targetq_num >= 0) {
|
||||
if (nestq_num >= 0) {
|
||||
onig_reduce_nested_qualifier(qnode, target);
|
||||
goto q_exit;
|
||||
}
|
||||
else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
|
||||
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
|
||||
if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
|
||||
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -124,11 +124,13 @@ typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
int greedy;
|
||||
int by_number; /* {n,m} */
|
||||
int target_empty_info;
|
||||
struct _Node* head_exact;
|
||||
struct _Node* next_head_exact;
|
||||
int is_refered; /* include called node. don't eliminate even if {0} */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
|
||||
#endif
|
||||
} QualifierNode;
|
||||
|
||||
/* status bits */
|
||||
@ -146,6 +148,7 @@ typedef struct {
|
||||
#define NST_NAME_REF (1<<11)
|
||||
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
|
||||
#define NST_NEST_LEVEL (1<<13)
|
||||
#define NST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
|
||||
#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
|
||||
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
|
||||
@ -168,6 +171,7 @@ typedef struct {
|
||||
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
|
||||
#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
|
||||
#define IS_QUALIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
|
||||
|
||||
typedef struct {
|
||||
int state;
|
||||
@ -277,6 +281,12 @@ typedef struct {
|
||||
int mem_alloc;
|
||||
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
|
||||
Node** mem_nodes_dynamic;
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int num_comb_exp_check;
|
||||
int comb_exp_max_regnum;
|
||||
int curr_max_regnum;
|
||||
int has_recursion;
|
||||
#endif
|
||||
} ScanEnv;
|
||||
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
regposix.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -192,7 +192,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
||||
end = (UChar* )(str + len);
|
||||
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
||||
(OnigRegion* )pmatch, options);
|
||||
(OnigRegion* )pm, options);
|
||||
|
||||
if (r >= 0) {
|
||||
r = 0; /* Match */
|
||||
@ -212,6 +212,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
if (pm != pmatch && pm != NULL)
|
||||
xfree(pm);
|
||||
|
||||
#if 0
|
||||
if (reg->re_nsub > nmatch - 1)
|
||||
reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user