mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-26 04:25:51 +08:00
regex.h: Add friend classes.
2013-09-18 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Add friend classes. (match_results<>::position, regex_iterator<>::operator++): Implement position specification in regex_iterator. (regex_match<>, regex_search<>): Move match_results initializations to these function. Remove `todo`. * include/bits/regex_compiler.tcc: (_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching. * include/bits/regex_constants.h: Fix indentation. Change match_flag_type to enum type. * include/bits/regex_executor.h: Merge identical code to the base class _Executor. Support flags in regex_constants. * include/bits/regex_executor.tcc: Likewise. * include/bits/regex_scanner.h: Add comments. * include/bits/regex_scanner.tcc: Same. * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: Add a testcase. * testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New. * testsuite/28_regex/iterators/regex_iterator/char/ string_position_01.cc: Remove `xfail`. * testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: Remove `xfail` and make the case really work. From-SVN: r202706
This commit is contained in:
parent
64bc8861e9
commit
b21abceec3
@ -1,3 +1,28 @@
|
||||
2013-09-18 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h: Add friend classes.
|
||||
(match_results<>::position, regex_iterator<>::operator++):
|
||||
Implement position specification in regex_iterator.
|
||||
(regex_match<>, regex_search<>):
|
||||
Move match_results initializations to these function. Remove `todo`.
|
||||
* include/bits/regex_compiler.tcc:
|
||||
(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
|
||||
* include/bits/regex_constants.h:
|
||||
Fix indentation. Change match_flag_type to enum type.
|
||||
* include/bits/regex_executor.h:
|
||||
Merge identical code to the base class _Executor.
|
||||
Support flags in regex_constants.
|
||||
* include/bits/regex_executor.tcc: Likewise.
|
||||
* include/bits/regex_scanner.h: Add comments.
|
||||
* include/bits/regex_scanner.tcc: Same.
|
||||
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
|
||||
Add a testcase.
|
||||
* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
|
||||
* testsuite/28_regex/iterators/regex_iterator/char/
|
||||
string_position_01.cc: Remove `xfail`.
|
||||
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
|
||||
Remove `xfail` and make the case really work.
|
||||
|
||||
2013-09-18 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
* testsuite/performance/25_algorithms/search_n.cc: Fix typo.
|
||||
|
@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
const basic_regex<_Cp, _Rp>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_Executor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_DFSExecutor;
|
||||
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_BFSExecutor;
|
||||
|
||||
flag_type _M_flags;
|
||||
_Rx_traits _M_traits;
|
||||
_AutomatonPtr _M_automaton;
|
||||
@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
*/
|
||||
explicit
|
||||
match_results(const _Alloc& __a = _Alloc())
|
||||
: _Base_type(__a)
|
||||
: _Base_type(__a), _M_in_iterator(false)
|
||||
{ }
|
||||
|
||||
/**
|
||||
* @brief Copy constructs a %match_results.
|
||||
*/
|
||||
match_results(const match_results& __rhs)
|
||||
: _Base_type(__rhs)
|
||||
: _Base_type(__rhs), _M_in_iterator(false)
|
||||
{ }
|
||||
|
||||
/**
|
||||
* @brief Move constructs a %match_results.
|
||||
*/
|
||||
match_results(match_results&& __rhs) noexcept
|
||||
: _Base_type(std::move(__rhs))
|
||||
: _Base_type(std::move(__rhs)), _M_in_iterator(false)
|
||||
{ }
|
||||
|
||||
/**
|
||||
@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
difference_type
|
||||
position(size_type __sub = 0) const
|
||||
{
|
||||
return __sub < size() ? std::distance(this->prefix().first,
|
||||
(*this)[__sub].first) : -1;
|
||||
// [28.12.1.4.5]
|
||||
if (_M_in_iterator)
|
||||
return __sub < size() ? std::distance(_M_begin,
|
||||
(*this)[__sub].first) : -1;
|
||||
else
|
||||
return __sub < size() ? std::distance(this->prefix().first,
|
||||
(*this)[__sub].first) : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
template<typename, typename, typename, typename>
|
||||
friend class __detail::_BFSExecutor;
|
||||
|
||||
template<typename, typename, typename>
|
||||
friend class regex_iterator;
|
||||
|
||||
template<typename _Bp, typename _Ap,
|
||||
typename _Ch_type, typename _Rx_traits>
|
||||
friend bool
|
||||
@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
const basic_regex<_Ch_type,
|
||||
_Rx_traits>&,
|
||||
regex_constants::match_flag_type);
|
||||
|
||||
_Bi_iter _M_begin;
|
||||
bool _M_in_iterator;
|
||||
};
|
||||
|
||||
typedef match_results<const char*> cmatch;
|
||||
@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
* @retval false Otherwise.
|
||||
*
|
||||
* @throws an exception of type regex_error.
|
||||
*
|
||||
* @todo Implement this function.
|
||||
*/
|
||||
template<typename _Bi_iter, typename _Alloc,
|
||||
typename _Ch_type, typename _Rx_traits>
|
||||
@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{
|
||||
if (__re._M_automaton == nullptr)
|
||||
return false;
|
||||
|
||||
auto __size = __re._M_automaton->_M_sub_count();
|
||||
__size += 2;
|
||||
__m.resize(__size);
|
||||
for (decltype(__size) __i = 0; __i < __size; ++__i)
|
||||
__m.at(__i).matched = false;
|
||||
|
||||
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
|
||||
{
|
||||
for (auto __it : __m)
|
||||
@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
* undefined.
|
||||
*
|
||||
* @throws an exception of type regex_error.
|
||||
*
|
||||
* @todo Implement this function.
|
||||
*/
|
||||
template<typename _Bi_iter, typename _Alloc,
|
||||
typename _Ch_type, typename _Rx_traits>
|
||||
@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{
|
||||
if (__re._M_automaton == nullptr)
|
||||
return false;
|
||||
|
||||
auto __size = __re._M_automaton->_M_sub_count();
|
||||
__size += 2;
|
||||
__m.resize(__size);
|
||||
for (decltype(__size) __i = 0; __i < __size; ++__i)
|
||||
__m.at(__i).matched = false;
|
||||
|
||||
if (__detail::__get_executor(__first, __last, __m, __re, __flags)
|
||||
->_M_search())
|
||||
{
|
||||
@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
|
||||
operator++()
|
||||
{
|
||||
// FIXME: In all cases in which the call to regex_search returns true,
|
||||
// In all cases in which the call to regex_search returns true,
|
||||
// match.prefix().first shall be equal to the previous value of
|
||||
// match[0].second, and for each index i in the half-open range
|
||||
// [0, match.size()) for which match[i].matched is true,
|
||||
@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
|
||||
| regex_constants::match_not_null
|
||||
| regex_constants::match_continuous))
|
||||
return *this;
|
||||
{
|
||||
_M_match._M_in_iterator = true;
|
||||
_M_match._M_begin = _M_begin;
|
||||
return *this;
|
||||
}
|
||||
else
|
||||
++__start;
|
||||
}
|
||||
_M_flags |= regex_constants::match_prev_avail;
|
||||
if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
|
||||
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
|
||||
{
|
||||
_M_match._M_in_iterator = true;
|
||||
_M_match._M_begin = _M_begin;
|
||||
}
|
||||
else
|
||||
_M_match = value_type();
|
||||
}
|
||||
return *this;
|
||||
|
@ -28,7 +28,7 @@
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// TODO make comments doxygen format.
|
||||
// FIXME make comments doxygen format.
|
||||
|
||||
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
|
||||
// (http://swtch.com/~rsc/regexp/regexp1.html"),
|
||||
@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
if (__n < 0)
|
||||
__throw_regex_error(regex_constants::error_badbrace);
|
||||
auto __end = _M_nfa._M_insert_dummy();
|
||||
// _M_alt is the "match more" branch, and _M_next is the
|
||||
// "match less" one. Switch _M_alt and _M_next of all created
|
||||
// nodes. This is a hacking but IMO works well.
|
||||
std::stack<_StateIdT> __stack;
|
||||
for (int __i = 0; __i < __n; ++__i)
|
||||
{
|
||||
auto __tmp = __r._M_clone();
|
||||
__e._M_append
|
||||
(_StateSeqT(_M_nfa,
|
||||
_M_nfa._M_insert_alt(__tmp._M_start,
|
||||
__end, __neg),
|
||||
__tmp._M_end));
|
||||
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
|
||||
__end, __neg);
|
||||
__stack.push(__alt);
|
||||
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
|
||||
}
|
||||
__e._M_append(__end);
|
||||
while (!__stack.empty())
|
||||
{
|
||||
auto& __tmp = _M_nfa[__stack.top()];
|
||||
__stack.pop();
|
||||
swap(__tmp._M_next, __tmp._M_alt);
|
||||
}
|
||||
}
|
||||
else // {3,}
|
||||
{
|
||||
|
@ -52,19 +52,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
*/
|
||||
//@{
|
||||
enum __syntax_option
|
||||
{
|
||||
_S_icase,
|
||||
_S_nosubs,
|
||||
_S_optimize,
|
||||
_S_collate,
|
||||
_S_ECMAScript,
|
||||
_S_basic,
|
||||
_S_extended,
|
||||
_S_awk,
|
||||
_S_grep,
|
||||
_S_egrep,
|
||||
_S_syntax_last
|
||||
};
|
||||
{
|
||||
_S_icase,
|
||||
_S_nosubs,
|
||||
_S_optimize,
|
||||
_S_collate,
|
||||
_S_ECMAScript,
|
||||
_S_basic,
|
||||
_S_extended,
|
||||
_S_awk,
|
||||
_S_grep,
|
||||
_S_egrep,
|
||||
_S_syntax_last
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This is a bitmask type indicating how to interpret the regex.
|
||||
@ -211,20 +211,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
//@{
|
||||
|
||||
enum __match_flag
|
||||
{
|
||||
_S_not_bol,
|
||||
_S_not_eol,
|
||||
_S_not_bow,
|
||||
_S_not_eow,
|
||||
_S_any,
|
||||
_S_not_null,
|
||||
_S_continuous,
|
||||
_S_prev_avail,
|
||||
_S_sed,
|
||||
_S_no_copy,
|
||||
_S_first_only,
|
||||
_S_match_flag_last
|
||||
};
|
||||
{
|
||||
_S_not_bol,
|
||||
_S_not_eol,
|
||||
_S_not_bow,
|
||||
_S_not_eow,
|
||||
_S_any,
|
||||
_S_not_null,
|
||||
_S_continuous,
|
||||
_S_prev_avail,
|
||||
_S_sed,
|
||||
_S_no_copy,
|
||||
_S_first_only,
|
||||
_S_match_flag_last
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This is a bitmask type indicating regex matching rules.
|
||||
@ -233,110 +233,148 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
* perform bitwise operations on these values and expect the right thing to
|
||||
* happen.
|
||||
*/
|
||||
typedef std::bitset<_S_match_flag_last> match_flag_type;
|
||||
enum match_flag_type : unsigned int
|
||||
{
|
||||
/**
|
||||
* The default matching rules.
|
||||
*/
|
||||
match_default = 0,
|
||||
|
||||
/**
|
||||
* The default matching rules.
|
||||
*/
|
||||
constexpr match_flag_type match_default = 0;
|
||||
/**
|
||||
* The first character in the sequence [first, last) is treated as though it
|
||||
* is not at the beginning of a line, so the character (^) in the regular
|
||||
* expression shall not match [first, first).
|
||||
*/
|
||||
match_not_bol = 1 << _S_not_bol,
|
||||
|
||||
/**
|
||||
* The first character in the sequence [first, last) is treated as though it
|
||||
* is not at the beginning of a line, so the character (^) in the regular
|
||||
* expression shall not match [first, first).
|
||||
*/
|
||||
constexpr match_flag_type match_not_bol = 1 << _S_not_bol;
|
||||
/**
|
||||
* The last character in the sequence [first, last) is treated as though it
|
||||
* is not at the end of a line, so the character ($) in the regular
|
||||
* expression shall not match [last, last).
|
||||
*/
|
||||
match_not_eol = 1 << _S_not_eol,
|
||||
|
||||
/**
|
||||
* The last character in the sequence [first, last) is treated as though it
|
||||
* is not at the end of a line, so the character ($) in the regular
|
||||
* expression shall not match [last, last).
|
||||
*/
|
||||
constexpr match_flag_type match_not_eol = 1 << _S_not_eol;
|
||||
/**
|
||||
* The expression \\b is not matched against the sub-sequence
|
||||
* [first,first).
|
||||
*/
|
||||
match_not_bow = 1 << _S_not_bow,
|
||||
|
||||
/**
|
||||
* The expression \\b is not matched against the sub-sequence
|
||||
* [first,first).
|
||||
*/
|
||||
constexpr match_flag_type match_not_bow = 1 << _S_not_bow;
|
||||
/**
|
||||
* The expression \\b should not be matched against the sub-sequence
|
||||
* [last,last).
|
||||
*/
|
||||
match_not_eow = 1 << _S_not_eow,
|
||||
|
||||
/**
|
||||
* The expression \\b should not be matched against the sub-sequence
|
||||
* [last,last).
|
||||
*/
|
||||
constexpr match_flag_type match_not_eow = 1 << _S_not_eow;
|
||||
/**
|
||||
* If more than one match is possible then any match is an acceptable
|
||||
* result.
|
||||
*/
|
||||
match_any = 1 << _S_any,
|
||||
|
||||
/**
|
||||
* If more than one match is possible then any match is an acceptable
|
||||
* result.
|
||||
*/
|
||||
constexpr match_flag_type match_any = 1 << _S_any;
|
||||
/**
|
||||
* The expression does not match an empty sequence.
|
||||
*/
|
||||
match_not_null = 1 << _S_not_null,
|
||||
|
||||
/**
|
||||
* The expression does not match an empty sequence.
|
||||
*/
|
||||
constexpr match_flag_type match_not_null = 1 << _S_not_null;
|
||||
/**
|
||||
* The expression only matches a sub-sequence that begins at first .
|
||||
*/
|
||||
match_continuous = 1 << _S_continuous,
|
||||
|
||||
/**
|
||||
* The expression only matches a sub-sequence that begins at first .
|
||||
*/
|
||||
constexpr match_flag_type match_continuous = 1 << _S_continuous;
|
||||
/**
|
||||
* --first is a valid iterator position. When this flag is set then the
|
||||
* flags match_not_bol and match_not_bow are ignored by the regular
|
||||
* expression algorithms 28.11 and iterators 28.12.
|
||||
*/
|
||||
match_prev_avail = 1 << _S_prev_avail,
|
||||
|
||||
/**
|
||||
* --first is a valid iterator position. When this flag is set then the
|
||||
* flags match_not_bol and match_not_bow are ignored by the regular
|
||||
* expression algorithms 28.11 and iterators 28.12.
|
||||
*/
|
||||
constexpr match_flag_type match_prev_avail = 1 << _S_prev_avail;
|
||||
/**
|
||||
* When a regular expression match is to be replaced by a new string, the
|
||||
* new string is constructed using the rules used by the ECMAScript replace
|
||||
* function in ECMA- 262 [Ecma International, ECMAScript Language
|
||||
* Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
|
||||
* String.prototype.replace. In addition, during search and replace
|
||||
* operations all non-overlapping occurrences of the regular expression
|
||||
* are located and replaced, and sections of the input that did not match
|
||||
* the expression are copied unchanged to the output string.
|
||||
*
|
||||
* Format strings (from ECMA-262 [15.5.4.11]):
|
||||
* @li $$ The dollar-sign itself ($)
|
||||
* @li $& The matched substring.
|
||||
* @li $` The portion of @a string that precedes the matched substring.
|
||||
* This would be match_results::prefix().
|
||||
* @li $' The portion of @a string that follows the matched substring.
|
||||
* This would be match_results::suffix().
|
||||
* @li $n The nth capture, where n is in [1,9] and $n is not followed by a
|
||||
* decimal digit. If n <= match_results::size() and the nth capture
|
||||
* is undefined, use the empty string instead. If n >
|
||||
* match_results::size(), the result is implementation-defined.
|
||||
* @li $nn The nnth capture, where nn is a two-digit decimal number on
|
||||
* [01, 99]. If nn <= match_results::size() and the nth capture is
|
||||
* undefined, use the empty string instead. If
|
||||
* nn > match_results::size(), the result is implementation-defined.
|
||||
*/
|
||||
format_default = 0,
|
||||
|
||||
/**
|
||||
* When a regular expression match is to be replaced by a new string, the
|
||||
* new string is constructed using the rules used by the ECMAScript replace
|
||||
* function in ECMA- 262 [Ecma International, ECMAScript Language
|
||||
* Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11
|
||||
* String.prototype.replace. In addition, during search and replace
|
||||
* operations all non-overlapping occurrences of the regular expression
|
||||
* are located and replaced, and sections of the input that did not match
|
||||
* the expression are copied unchanged to the output string.
|
||||
*
|
||||
* Format strings (from ECMA-262 [15.5.4.11]):
|
||||
* @li $$ The dollar-sign itself ($)
|
||||
* @li $& The matched substring.
|
||||
* @li $` The portion of @a string that precedes the matched substring.
|
||||
* This would be match_results::prefix().
|
||||
* @li $' The portion of @a string that follows the matched substring.
|
||||
* This would be match_results::suffix().
|
||||
* @li $n The nth capture, where n is in [1,9] and $n is not followed by a
|
||||
* decimal digit. If n <= match_results::size() and the nth capture
|
||||
* is undefined, use the empty string instead. If n >
|
||||
* match_results::size(), the result is implementation-defined.
|
||||
* @li $nn The nnth capture, where nn is a two-digit decimal number on
|
||||
* [01, 99]. If nn <= match_results::size() and the nth capture is
|
||||
* undefined, use the empty string instead. If
|
||||
* nn > match_results::size(), the result is implementation-defined.
|
||||
*/
|
||||
constexpr match_flag_type format_default = 0;
|
||||
/**
|
||||
* When a regular expression match is to be replaced by a new string, the
|
||||
* new string is constructed using the rules used by the POSIX sed utility
|
||||
* in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
|
||||
* Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
|
||||
*/
|
||||
format_sed = 1 << _S_sed,
|
||||
|
||||
/**
|
||||
* When a regular expression match is to be replaced by a new string, the
|
||||
* new string is constructed using the rules used by the POSIX sed utility
|
||||
* in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable
|
||||
* Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
|
||||
*/
|
||||
constexpr match_flag_type format_sed = 1 << _S_sed;
|
||||
/**
|
||||
* During a search and replace operation, sections of the character
|
||||
* container sequence being searched that do not match the regular
|
||||
* expression shall not be copied to the output string.
|
||||
*/
|
||||
format_no_copy = 1 << _S_no_copy,
|
||||
|
||||
/**
|
||||
* During a search and replace operation, sections of the character
|
||||
* container sequence being searched that do not match the regular
|
||||
* expression shall not be copied to the output string.
|
||||
*/
|
||||
constexpr match_flag_type format_no_copy = 1 << _S_no_copy;
|
||||
/**
|
||||
* When specified during a search and replace operation, only the first
|
||||
* occurrence of the regular expression shall be replaced.
|
||||
*/
|
||||
format_first_only = 1 << _S_first_only,
|
||||
};
|
||||
|
||||
/**
|
||||
* When specified during a search and replace operation, only the first
|
||||
* occurrence of the regular expression shall be replaced.
|
||||
*/
|
||||
constexpr match_flag_type format_first_only = 1 << _S_first_only;
|
||||
constexpr inline match_flag_type
|
||||
operator&(match_flag_type __a, match_flag_type __b)
|
||||
{
|
||||
return (match_flag_type)(static_cast<unsigned int>(__a)
|
||||
& static_cast<unsigned int>(__b));
|
||||
}
|
||||
|
||||
constexpr inline match_flag_type
|
||||
operator|(match_flag_type __a, match_flag_type __b)
|
||||
{
|
||||
return (match_flag_type)(static_cast<unsigned int>(__a)
|
||||
| static_cast<unsigned int>(__b));
|
||||
}
|
||||
|
||||
constexpr inline match_flag_type
|
||||
operator^(match_flag_type __a, match_flag_type __b)
|
||||
{
|
||||
return (match_flag_type)(static_cast<unsigned int>(__a)
|
||||
^ static_cast<unsigned int>(__b));
|
||||
}
|
||||
|
||||
constexpr inline match_flag_type
|
||||
operator~(match_flag_type __a)
|
||||
{ return (match_flag_type)(~static_cast<unsigned int>(__a)); }
|
||||
|
||||
inline match_flag_type&
|
||||
operator&=(match_flag_type& __a, match_flag_type __b)
|
||||
{ return __a = __a & __b; }
|
||||
|
||||
inline match_flag_type&
|
||||
operator|=(match_flag_type& __a, match_flag_type __b)
|
||||
{ return __a = __a | __b; }
|
||||
|
||||
inline match_flag_type&
|
||||
operator^=(match_flag_type& __a, match_flag_type __b)
|
||||
{ return __a = __a ^ __b; }
|
||||
|
||||
//@}
|
||||
|
||||
|
@ -28,7 +28,11 @@
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// TODO: convert comments to doxygen format.
|
||||
// FIXME convert comments to doxygen format.
|
||||
|
||||
// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
|
||||
// much more similar. Also, make grouping seperated. The
|
||||
// regex_constants::nosubs enables much more simpler execution.
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
@ -57,55 +61,107 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
class _Executor
|
||||
{
|
||||
public:
|
||||
typedef basic_regex<_CharT, _TraitsT> _RegexT;
|
||||
typedef match_results<_BiIter, _Alloc> _ResultsT;
|
||||
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
virtual
|
||||
~_Executor()
|
||||
{ }
|
||||
|
||||
// Set matched when string exactly match the pattern.
|
||||
virtual bool
|
||||
_M_match() = 0;
|
||||
|
||||
// Set matched when some prefix of the string matches the pattern.
|
||||
virtual bool
|
||||
_M_search() = 0;
|
||||
|
||||
protected:
|
||||
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
|
||||
typedef typename _TraitsT::char_class_type _ClassT;
|
||||
|
||||
public:
|
||||
_Executor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
_FlagT __flags,
|
||||
_SizeT __size,
|
||||
const _TraitsT& __traits)
|
||||
: _M_current(__begin), _M_begin(__begin), _M_end(__end),
|
||||
_M_results(__results), _M_flags(__flags), _M_traits(__traits)
|
||||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _M_begin(__begin),
|
||||
_M_end(__end),
|
||||
_M_results(__results),
|
||||
_M_re(__re),
|
||||
_M_flags(__flags)
|
||||
{ }
|
||||
|
||||
// Set matched when string exactly match the pattern.
|
||||
bool
|
||||
_M_match()
|
||||
{
|
||||
__size += 2;
|
||||
_M_results.resize(__size);
|
||||
for (_SizeT __i = 0; __i < __size; ++__i)
|
||||
_M_results[__i].matched = false;
|
||||
_M_match_mode = true;
|
||||
_M_init(_M_begin);
|
||||
return _M_main();
|
||||
}
|
||||
|
||||
// Set matched when some prefix of the string matches the pattern.
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(_M_begin);
|
||||
return _M_main();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_is_word(_CharT __ch)
|
||||
_M_search()
|
||||
{
|
||||
if (_M_flags & regex_constants::match_continuous)
|
||||
return _M_search_from_first();
|
||||
auto __cur = _M_begin;
|
||||
do
|
||||
{
|
||||
_M_match_mode = false;
|
||||
_M_init(__cur);
|
||||
if (_M_main())
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
while (__cur++ != _M_end);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
_M_is_word(_CharT __ch) const
|
||||
{
|
||||
static const _CharT __s = 'w';
|
||||
return _M_traits.isctype(__ch,
|
||||
_M_traits.lookup_classname(&__s, &__s+1));
|
||||
return _M_re._M_traits.isctype
|
||||
(__ch, _M_re._M_traits.lookup_classname(&__s, &__s+1));
|
||||
}
|
||||
|
||||
bool
|
||||
_M_at_begin() const
|
||||
{
|
||||
return _M_current == _M_begin
|
||||
&& !(_M_flags & (regex_constants::match_not_bol
|
||||
| regex_constants::match_prev_avail));
|
||||
}
|
||||
|
||||
bool
|
||||
_M_at_end() const
|
||||
{
|
||||
return _M_current == _M_end
|
||||
&& !(_M_flags & regex_constants::match_not_eol);
|
||||
}
|
||||
|
||||
bool
|
||||
_M_word_boundry(_State<_CharT, _TraitsT> __state) const;
|
||||
|
||||
bool
|
||||
_M_lookahead(_State<_CharT, _TraitsT> __state) const;
|
||||
|
||||
public:
|
||||
virtual void
|
||||
_M_init(_BiIter __cur) = 0;
|
||||
|
||||
virtual void
|
||||
_M_set_start(_StateIdT __start) = 0;
|
||||
|
||||
virtual bool
|
||||
_M_main() = 0;
|
||||
|
||||
_BiIter _M_current;
|
||||
const _BiIter _M_begin;
|
||||
const _BiIter _M_end;
|
||||
_ResultsVec& _M_results;
|
||||
const _TraitsT& _M_traits;
|
||||
_FlagT _M_flags;
|
||||
const _RegexT& _M_re;
|
||||
_ResultsT& _M_results;
|
||||
const _FlagT _M_flags;
|
||||
bool _M_match_mode;
|
||||
};
|
||||
|
||||
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
|
||||
@ -128,61 +184,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef _NFA<_CharT, _TraitsT> _NFAT;
|
||||
typedef typename _BaseT::_RegexT _RegexT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
typedef typename _BaseT::_FlagT _FlagT;
|
||||
|
||||
public:
|
||||
_DFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
const _RegexT& __nfa,
|
||||
const _TraitsT& __traits,
|
||||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
|
||||
__traits),
|
||||
_M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results),
|
||||
_M_start_state(__nfa._M_start())
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton)),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
{ }
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{
|
||||
this->_M_current = this->_M_begin;
|
||||
return _M_dfs<true>(_M_start_state);
|
||||
}
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{
|
||||
this->_M_current = this->_M_begin;
|
||||
return _M_dfs<false>(_M_start_state);
|
||||
}
|
||||
|
||||
bool
|
||||
_M_search()
|
||||
{
|
||||
auto __cur = this->_M_begin;
|
||||
do
|
||||
{
|
||||
this->_M_current = __cur;
|
||||
if (_M_dfs<false>(_M_start_state))
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
while (__cur++ != this->_M_end);
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_dfs(_StateIdT __i);
|
||||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
_M_cur_results.resize(_M_nfa._M_sub_count() + 2);
|
||||
this->_M_current = __cur;
|
||||
}
|
||||
|
||||
void
|
||||
_M_set_start(_StateIdT __start)
|
||||
{ _M_start_state = __start; }
|
||||
|
||||
bool
|
||||
_M_main()
|
||||
{ return _M_dfs(this->_M_start_state); }
|
||||
|
||||
bool
|
||||
_M_dfs(_StateIdT __start);
|
||||
|
||||
// To record current solution.
|
||||
_ResultsVec _M_cur_results;
|
||||
const _TraitsT& _M_traits;
|
||||
const _RegexT& _M_nfa;
|
||||
const _NFAT& _M_nfa;
|
||||
_StateIdT _M_start_state;
|
||||
};
|
||||
|
||||
@ -206,47 +247,57 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
{
|
||||
public:
|
||||
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
|
||||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef _NFA<_CharT, _TraitsT> _NFAT;
|
||||
typedef typename _BaseT::_RegexT _RegexT;
|
||||
typedef typename _BaseT::_ResultsT _ResultsT;
|
||||
typedef typename _BaseT::_ResultsVec _ResultsVec;
|
||||
typedef typename _BaseT::_FlagT _FlagT;
|
||||
// Here's a solution for greedy/ungreedy mode in BFS approach. We need to
|
||||
// carefully work out how to compare to conflict matching states.
|
||||
//
|
||||
// A matching state is a pair(where, when); `where` is a NFA node; `when`
|
||||
// is a _BiIter, indicating which char is the next to be mathed one. Two
|
||||
// matching states conflict means that they have equivalent `where` and
|
||||
// `when`.
|
||||
// is a _BiIter, indicating which char is the next to be matched. Two
|
||||
// matching states conflict if they have equivalent `where` and `when`.
|
||||
//
|
||||
// Now since we need to drop one and keep another, because at most one of
|
||||
// them could be the final optimal solution. This behavior is affected by
|
||||
// Now we need to drop one and keep another, because at most one of them
|
||||
// could be the final optimal solution. This behavior is affected by
|
||||
// greedy policy.
|
||||
//
|
||||
// The definition of `greedy`:
|
||||
// For the sequence of quantifiers in NFA sorted by there start position,
|
||||
// now maintain a vector in a matching state, with equal length to
|
||||
// now maintain a vector in every matching state, with equal length to
|
||||
// quantifier seq, recording repeating times of every quantifier. Now to
|
||||
// compare two matching states, we just lexically compare these two
|
||||
// vectors. To win the compare(to survive), one matching state needs to
|
||||
// make its greedy quantifier count larger, and ungreedy quantifiers
|
||||
// count smaller.
|
||||
//
|
||||
// In the implementation, we recorded negtive numbers for greedy
|
||||
// quantifiers and positive numbers of ungreedy ones. Now a simple
|
||||
// In the implementation, we recorded negtive counts for greedy
|
||||
// quantifiers and positive counts of ungreedy ones. Now the implicit
|
||||
// operator<() for lexicographical_compare will emit the answer.
|
||||
//
|
||||
// When two vectors equal, it means the `where`, `when` and quantifier
|
||||
// counts are identical, it indicates the same answer, so just return
|
||||
// counts are identical, and indicates the same solution; so just return
|
||||
// false.
|
||||
struct _ResultsEntry
|
||||
: private _BaseT::_ResultsVec
|
||||
: private _ResultsVec
|
||||
{
|
||||
public:
|
||||
_ResultsEntry(unsigned int __res_sz, unsigned int __sz)
|
||||
: _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz)
|
||||
: _ResultsVec(__res_sz), _M_quant_keys(__sz)
|
||||
{ }
|
||||
|
||||
void
|
||||
resize(unsigned int __n)
|
||||
{ _ResultsVec::resize(__n); }
|
||||
|
||||
unsigned int
|
||||
size()
|
||||
{ return _ResultsVec::size(); }
|
||||
|
||||
sub_match<_BiIter>&
|
||||
operator[](unsigned int __idx)
|
||||
{ return this->_BaseT::_ResultsVec::operator[](__idx); }
|
||||
{ return _ResultsVec::operator[](__idx); }
|
||||
|
||||
bool
|
||||
operator<(const _ResultsEntry& __rhs) const
|
||||
@ -263,75 +314,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_M_inc(unsigned int __idx, bool __neg)
|
||||
{ _M_quant_keys[__idx] += __neg ? 1 : -1; }
|
||||
|
||||
typename _BaseT::_ResultsVec
|
||||
_ResultsVec
|
||||
_M_get()
|
||||
{ return *this; }
|
||||
|
||||
public:
|
||||
std::vector<int> _M_quant_keys;
|
||||
};
|
||||
|
||||
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
|
||||
typedef regex_constants::match_flag_type _FlagT;
|
||||
|
||||
public:
|
||||
_BFSExecutor(_BiIter __begin,
|
||||
_BiIter __end,
|
||||
_ResultsT& __results,
|
||||
const _RegexT& __nfa,
|
||||
const _TraitsT& __traits,
|
||||
const _RegexT& __re,
|
||||
_FlagT __flags)
|
||||
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
|
||||
__traits),
|
||||
_M_nfa(__nfa),
|
||||
_M_cur_results(nullptr),
|
||||
_M_start_state(__nfa._M_start())
|
||||
: _BaseT(__begin, __end, __results, __re, __flags),
|
||||
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton)),
|
||||
_M_start_state(_M_nfa._M_start())
|
||||
{ }
|
||||
|
||||
bool
|
||||
_M_match()
|
||||
{
|
||||
_M_init(this->_M_begin);
|
||||
return _M_main_loop<true>();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_search_from_first()
|
||||
{
|
||||
_M_init(this->_M_begin);
|
||||
return _M_main_loop<false>();
|
||||
}
|
||||
|
||||
bool
|
||||
_M_search()
|
||||
{
|
||||
auto __cur = this->_M_begin;
|
||||
do
|
||||
{
|
||||
_M_init(__cur);
|
||||
if (_M_main_loop<false>())
|
||||
return true;
|
||||
}
|
||||
// Continue when __cur == _M_end
|
||||
while (__cur++ != this->_M_end);
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
void
|
||||
_M_init(_BiIter __cur)
|
||||
{
|
||||
_GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id);
|
||||
_GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
|
||||
this->_M_current = __cur;
|
||||
_M_covered.clear();
|
||||
_M_covered[_M_start_state] =
|
||||
_ResultsPtr(new _ResultsEntry(this->_M_results.size(),
|
||||
_ResultsVec& __res(this->_M_results);
|
||||
_M_covered[this->_M_start_state] =
|
||||
_ResultsPtr(new _ResultsEntry(__res.size(),
|
||||
_M_nfa._M_quant_count));
|
||||
_M_e_closure();
|
||||
}
|
||||
|
||||
template<bool __match_mode>
|
||||
bool
|
||||
_M_main_loop();
|
||||
void
|
||||
_M_set_start(_StateIdT __start)
|
||||
{ _M_start_state = __start; }
|
||||
|
||||
bool
|
||||
_M_main();
|
||||
|
||||
void
|
||||
_M_e_closure();
|
||||
@ -345,10 +368,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
std::map<_StateIdT, _ResultsPtr> _M_covered;
|
||||
// To record global optimal solution.
|
||||
_ResultsPtr _M_cur_results;
|
||||
const _RegexT& _M_nfa;
|
||||
const _NFAT& _M_nfa;
|
||||
_StateIdT _M_start_state;
|
||||
};
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
__get_executor(_BiIter __b,
|
||||
_BiIter __e,
|
||||
match_results<_BiIter, _Alloc>& __m,
|
||||
const basic_regex<_CharT, _TraitsT>& __re,
|
||||
regex_constants::match_flag_type __flags);
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
|
@ -36,7 +36,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
template<bool __match_mode>
|
||||
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_dfs(_StateIdT __i)
|
||||
{
|
||||
@ -44,9 +43,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
// This is not that certain. Need deeper investigate.
|
||||
return false;
|
||||
auto& __current = this->_M_current;
|
||||
auto& __begin = this->_M_begin;
|
||||
auto& __end = this->_M_end;
|
||||
auto& __results = _M_cur_results;
|
||||
const auto& __state = _M_nfa[__i];
|
||||
bool __ret = false;
|
||||
switch (__state._M_opcode)
|
||||
@ -54,129 +50,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
case _S_opcode_alternative:
|
||||
// Greedy or not, this is a question ;)
|
||||
if (!__state._M_neg)
|
||||
__ret = _M_dfs<__match_mode>(__state._M_alt)
|
||||
|| _M_dfs<__match_mode>(__state._M_next);
|
||||
__ret = _M_dfs(__state._M_alt)
|
||||
|| _M_dfs(__state._M_next);
|
||||
else
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next)
|
||||
|| _M_dfs<__match_mode>(__state._M_alt);
|
||||
__ret = _M_dfs(__state._M_next)
|
||||
|| _M_dfs(__state._M_alt);
|
||||
break;
|
||||
case _S_opcode_subexpr_begin:
|
||||
// Here's the critical part: if there's nothing changed since last
|
||||
// visit, do NOT continue. This prevents the executor from get into
|
||||
// infinite loop when use "()*" to match "".
|
||||
//
|
||||
// Every change on __results will be roll back after the recursion
|
||||
// step finished.
|
||||
if (!__results[__state._M_subexpr].matched
|
||||
|| __results[__state._M_subexpr].first != __current)
|
||||
// Every change on _M_cur_results will be roll back after the
|
||||
// recursion step finished.
|
||||
if (!_M_cur_results[__state._M_subexpr].matched
|
||||
|| _M_cur_results[__state._M_subexpr].first != __current)
|
||||
{
|
||||
auto __back = __current;
|
||||
__results[__state._M_subexpr].first = __current;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__results[__state._M_subexpr].first = __back;
|
||||
_M_cur_results[__state._M_subexpr].first = __current;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_cur_results[__state._M_subexpr].first = __back;
|
||||
}
|
||||
break;
|
||||
case _S_opcode_subexpr_end:
|
||||
if (__results[__state._M_subexpr].second != __current
|
||||
|| __results[__state._M_subexpr].matched != true)
|
||||
if (_M_cur_results[__state._M_subexpr].second != __current
|
||||
|| _M_cur_results[__state._M_subexpr].matched != true)
|
||||
{
|
||||
auto __back = __results[__state._M_subexpr];
|
||||
__results[__state._M_subexpr].second = __current;
|
||||
__results[__state._M_subexpr].matched = true;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__results[__state._M_subexpr] = __back;
|
||||
auto __back = _M_cur_results[__state._M_subexpr];
|
||||
_M_cur_results[__state._M_subexpr].second = __current;
|
||||
_M_cur_results[__state._M_subexpr].matched = true;
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
_M_cur_results[__state._M_subexpr] = __back;
|
||||
}
|
||||
else
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_begin_assertion:
|
||||
if (__current == __begin)
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
if (this->_M_at_begin())
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_end_assertion:
|
||||
if (__current == __end)
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
if (this->_M_at_end())
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
break;
|
||||
// By definition.
|
||||
case _S_opcode_word_boundry:
|
||||
{
|
||||
bool __ans = false;
|
||||
if (__current == __begin && this->_M_is_word(*__current))
|
||||
__ans = true;
|
||||
else if (__current == __end && this->_M_is_word(*__current))
|
||||
__ans = true;
|
||||
else
|
||||
{
|
||||
auto __pre = __current;
|
||||
--__pre;
|
||||
if (this->_M_is_word(*__current)
|
||||
!= this->_M_is_word(*__pre))
|
||||
__ans = true;
|
||||
}
|
||||
if (__ans == !__state._M_neg)
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
}
|
||||
if (this->_M_word_boundry(__state) == !__state._M_neg)
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
break;
|
||||
// Here __state._M_alt offers a single start node for a sub-NFA.
|
||||
// We recursivly invoke our algorithm to match the sub-NFA.
|
||||
case _S_opcode_subexpr_lookahead:
|
||||
{
|
||||
_ResultsT __m;
|
||||
// FIXME Here's not necessarily a DFSExecutor. But we need to
|
||||
// refactor the whole NFA to a recursive tree structure first.
|
||||
_DFSExecutor __sub(this->_M_current,
|
||||
this->_M_end,
|
||||
__m,
|
||||
this->_M_nfa,
|
||||
this->_M_traits,
|
||||
this->_M_flags);
|
||||
__sub._M_start_state = __state._M_alt;
|
||||
if (__sub._M_search_from_first() == !__state._M_neg)
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
}
|
||||
if (this->_M_lookahead(__state) == !__state._M_neg)
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
if (__current != __end && __state._M_matches(*__current))
|
||||
if (__current != this->_M_end && __state._M_matches(*__current))
|
||||
{
|
||||
++__current;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
--__current;
|
||||
}
|
||||
break;
|
||||
// First fetch the matched result from __results as __submatch;
|
||||
// First fetch the matched result from _M_cur_results as __submatch;
|
||||
// then compare it with
|
||||
// (__current, __current + (__submatch.second - __submatch.first))
|
||||
// If matched, keep going; else just return to try another state.
|
||||
case _S_opcode_backref:
|
||||
{
|
||||
auto& __submatch = __results[__state._M_backref_index];
|
||||
auto& __submatch = _M_cur_results[__state._M_backref_index];
|
||||
if (!__submatch.matched)
|
||||
break;
|
||||
auto __last = __current;
|
||||
for (auto __tmp = __submatch.first;
|
||||
__last != __end && __tmp != __submatch.second;
|
||||
__last != this->_M_end && __tmp != __submatch.second;
|
||||
++__tmp)
|
||||
++__last;
|
||||
if (_M_traits.transform(__submatch.first, __submatch.second)
|
||||
== _M_traits.transform(__current, __last))
|
||||
if (this->_M_re._M_traits.transform(__submatch.first,
|
||||
__submatch.second)
|
||||
== this->_M_re._M_traits.transform(__current, __last))
|
||||
if (__last != __current)
|
||||
{
|
||||
auto __backup = __current;
|
||||
__current = __last;
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
__current = __backup;
|
||||
}
|
||||
else
|
||||
__ret = _M_dfs<__match_mode>(__state._M_next);
|
||||
__ret = _M_dfs(__state._M_next);
|
||||
}
|
||||
break;
|
||||
case _S_opcode_accept:
|
||||
if (__match_mode)
|
||||
__ret = __current == __end;
|
||||
if (this->_M_match_mode)
|
||||
__ret = __current == this->_M_end;
|
||||
else
|
||||
__ret = true;
|
||||
if (__current == this->_M_begin
|
||||
&& (this->_M_flags & regex_constants::match_not_null))
|
||||
__ret = false;
|
||||
if (__ret)
|
||||
this->_M_results = __results;
|
||||
{
|
||||
_ResultsVec& __res(this->_M_results);
|
||||
if (this->_M_re.flags() & regex_constants::nosubs)
|
||||
{
|
||||
_M_cur_results.resize(3); // truncate
|
||||
__res.resize(3);
|
||||
}
|
||||
for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i)
|
||||
if (_M_cur_results[__i].matched)
|
||||
__res[__i] = _M_cur_results[__i];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
@ -186,23 +168,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
template<bool __match_mode>
|
||||
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_main_loop()
|
||||
_M_main()
|
||||
{
|
||||
bool __ret = false;
|
||||
if (!this->_M_match_mode
|
||||
&& !(this->_M_flags & regex_constants::match_not_null))
|
||||
__ret = _M_includes_some() || __ret;
|
||||
while (this->_M_current != this->_M_end)
|
||||
{
|
||||
if (!__match_mode)
|
||||
// To keep regex_search greedy, no "return true" here.
|
||||
__ret = _M_includes_some() || __ret;
|
||||
_M_move();
|
||||
++this->_M_current;
|
||||
_M_e_closure();
|
||||
if (!this->_M_match_mode)
|
||||
// To keep regex_search greedy, no "return true" here.
|
||||
__ret = _M_includes_some() || __ret;
|
||||
}
|
||||
__ret = _M_includes_some() || __ret;
|
||||
if (this->_M_match_mode)
|
||||
__ret = _M_includes_some();
|
||||
if (__ret)
|
||||
this->_M_results = _M_cur_results->_M_get();
|
||||
{
|
||||
_ResultsVec& __res(this->_M_results);
|
||||
if (this->_M_re.flags() & regex_constants::nosubs)
|
||||
{
|
||||
// truncate
|
||||
_M_cur_results->resize(3);
|
||||
__res.resize(3);
|
||||
}
|
||||
for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i)
|
||||
if ((*_M_cur_results)[__i].matched)
|
||||
__res[__i] = (*_M_cur_results)[__i];
|
||||
}
|
||||
return __ret;
|
||||
}
|
||||
|
||||
@ -211,11 +207,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_e_closure()
|
||||
{
|
||||
auto& __current = this->_M_current;
|
||||
std::queue<_StateIdT> __q;
|
||||
std::vector<bool> __in_q(_M_nfa.size(), false);
|
||||
auto& __begin = this->_M_begin;
|
||||
auto& __end = this->_M_end;
|
||||
auto& __current = this->_M_current;
|
||||
|
||||
for (auto& __it : _M_covered)
|
||||
{
|
||||
@ -292,46 +286,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
}
|
||||
break;
|
||||
case _S_opcode_line_begin_assertion:
|
||||
if (__current == __begin)
|
||||
if (this->_M_at_begin())
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_line_end_assertion:
|
||||
if (__current == __end)
|
||||
if (this->_M_at_end())
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_word_boundry:
|
||||
{
|
||||
bool __ans = false;
|
||||
if (__current == __begin && this->_M_is_word(*__current))
|
||||
__ans = true;
|
||||
else if (__current == __end && this->_M_is_word(*__current))
|
||||
__ans = true;
|
||||
else
|
||||
{
|
||||
auto __pre = __current;
|
||||
--__pre;
|
||||
if (this->_M_is_word(*__current)
|
||||
!= this->_M_is_word(*__pre))
|
||||
__ans = true;
|
||||
}
|
||||
if (__ans == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
}
|
||||
if (this->_M_word_boundry(__state) == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_subexpr_lookahead:
|
||||
{
|
||||
_ResultsT __m;
|
||||
// Same comment as in DFS.
|
||||
_BFSExecutor __sub(this->_M_current,
|
||||
this->_M_end,
|
||||
__m,
|
||||
this->_M_nfa,
|
||||
this->_M_traits,
|
||||
this->_M_flags);
|
||||
__sub._M_start_state = __state._M_alt;
|
||||
if (__sub._M_search_from_first() == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
}
|
||||
if (this->_M_lookahead(__state) == !__state._M_neg)
|
||||
__add_visited_state(__state._M_next);
|
||||
break;
|
||||
case _S_opcode_match:
|
||||
break;
|
||||
@ -395,6 +363,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
return __succ;
|
||||
}
|
||||
|
||||
// Return whether now is at some word boundry.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_word_boundry(_State<_CharT, _TraitsT> __state) const
|
||||
{
|
||||
// By definition.
|
||||
bool __ans = false;
|
||||
auto __pre = _M_current;
|
||||
--__pre;
|
||||
if (!(_M_at_begin() && _M_at_end()))
|
||||
if (_M_at_begin())
|
||||
__ans = _M_is_word(*_M_current)
|
||||
&& !(_M_flags & regex_constants::match_not_bow);
|
||||
else if (_M_at_end())
|
||||
__ans = _M_is_word(*__pre)
|
||||
&& !(_M_flags & regex_constants::match_not_eow);
|
||||
else
|
||||
__ans = _M_is_word(*_M_current)
|
||||
!= _M_is_word(*__pre);
|
||||
return __ans;
|
||||
}
|
||||
|
||||
// Return whether now match the given sub-NFA.
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
|
||||
_M_lookahead(_State<_CharT, _TraitsT> __state) const
|
||||
{
|
||||
auto __sub = __get_executor(this->_M_current,
|
||||
this->_M_end,
|
||||
this->_M_results,
|
||||
this->_M_re,
|
||||
this->_M_flags);
|
||||
__sub->_M_set_start(__state._M_alt);
|
||||
return __sub->_M_search_from_first();
|
||||
}
|
||||
|
||||
template<typename _BiIter, typename _Alloc,
|
||||
typename _CharT, typename _TraitsT>
|
||||
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
|
||||
@ -411,10 +417,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
|
||||
(__re._M_automaton);
|
||||
if (__p->_M_has_backref)
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p,
|
||||
__re._M_traits, __flags));
|
||||
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, *__p,
|
||||
__re._M_traits, __flags));
|
||||
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
|
@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_S_token_backref,
|
||||
_S_token_subexpr_begin,
|
||||
_S_token_subexpr_no_group_begin,
|
||||
_S_token_subexpr_lookahead_begin,
|
||||
_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
|
||||
_S_token_subexpr_end,
|
||||
_S_token_bracket_begin,
|
||||
_S_token_bracket_neg_begin,
|
||||
@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_S_token_ungreedy,
|
||||
_S_token_line_begin,
|
||||
_S_token_line_end,
|
||||
_S_token_word_bound,
|
||||
_S_token_word_bound, // neg if _M_value[0] == 'n'
|
||||
_S_token_comma,
|
||||
_S_token_dup_count,
|
||||
_S_token_eof,
|
||||
@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
_StringT _M_value;
|
||||
bool _M_at_bracket_start;
|
||||
public:
|
||||
// TODO: make them static when this file is stable.
|
||||
// FIXME: make them static when this file is stable.
|
||||
const std::map<char, _TokenT> _M_token_map;
|
||||
const std::map<char, char> _M_ecma_escape_map;
|
||||
const std::map<char, char> _M_awk_escape_map;
|
||||
|
@ -28,7 +28,7 @@
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// TODO make comments doxygen format.
|
||||
// FIXME make comments doxygen format.
|
||||
|
||||
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
|
||||
// and awk
|
||||
|
@ -1,5 +1,4 @@
|
||||
// { dg-options "-std=gnu++11" }
|
||||
// { dg-do run { xfail *-*-* } }
|
||||
|
||||
//
|
||||
// 2013-09-14 Tim Shen <timshen91@gmail.com>
|
||||
@ -54,22 +53,37 @@ test01()
|
||||
string sol[] =
|
||||
{
|
||||
"This",
|
||||
"",
|
||||
"is",
|
||||
"",
|
||||
"a",
|
||||
"",
|
||||
"regular",
|
||||
"",
|
||||
"expression",
|
||||
"",
|
||||
};
|
||||
|
||||
regex re("\\b\\w*\\b");
|
||||
int i = 0;
|
||||
for (auto it = sregex_iterator(s.begin(), s.end(), re);
|
||||
it != sregex_iterator() && i < 5;
|
||||
it != sregex_iterator();
|
||||
++it)
|
||||
{
|
||||
string s((*it)[0].first, (*it)[0].second);
|
||||
VERIFY(s == sol[i++]);
|
||||
}
|
||||
VERIFY(i == 5);
|
||||
VERIFY(i == 10);
|
||||
|
||||
{
|
||||
cmatch m;
|
||||
regex re("(?=(as)df)as(df)");
|
||||
regex_search("asdf", m, re);
|
||||
VERIFY(m.size() == 3);
|
||||
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
|
||||
VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
|
||||
VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -0,0 +1,71 @@
|
||||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-09-18 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.3 regex_search
|
||||
// Tests ECMAScript flags.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
cmatch m;
|
||||
regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
|
||||
VERIFY(regex_search("asdf", m, re));
|
||||
VERIFY(m.size() == 1);
|
||||
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
|
||||
|
||||
VERIFY( regex_search("a", regex("^a")));
|
||||
VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
|
||||
VERIFY( regex_search("a", regex("a$")));
|
||||
VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
|
||||
VERIFY( regex_search("a", regex("\\ba")));
|
||||
VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
|
||||
VERIFY( regex_search("a", regex("a\\b")));
|
||||
VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
|
||||
VERIFY( regex_search("", regex("")));
|
||||
VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
|
||||
VERIFY( regex_search("", regex("^$")));
|
||||
VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
|
||||
VERIFY( regex_search("aaa", m, regex("a*?"),
|
||||
regex_constants::match_not_null));
|
||||
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
|
||||
VERIFY( regex_search("asdf", regex("sdf")));
|
||||
VERIFY(!regex_search("asdf", regex("sdf"),
|
||||
regex_constants::match_continuous));
|
||||
VERIFY( regex_search(" a"+1, regex("\\ba"),
|
||||
regex_constants::match_prev_avail));
|
||||
VERIFY( regex_search("ba"+1, regex("\\Ba"),
|
||||
regex_constants::match_prev_avail));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
// { dg-options "-std=gnu++11" }
|
||||
// { dg-do run { xfail *-*-* } }
|
||||
|
||||
//
|
||||
// 2013-07-25 Tim Shen <timshen91@gmail.com>
|
||||
|
@ -1,6 +1,5 @@
|
||||
// { dg-options "-std=gnu++11" }
|
||||
// { dg-require-namedlocale "en_US.UTF-8" }
|
||||
// { dg-do run { xfail *-*-* } }
|
||||
|
||||
//
|
||||
// 2013-09-05 Tim Shen <timshen91@gmail.com>
|
||||
@ -42,13 +41,19 @@ test01()
|
||||
|
||||
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
|
||||
|
||||
std::wsregex_iterator p(str2.begin(), str2.end(), re2);
|
||||
auto a = p;
|
||||
++p;
|
||||
VERIFY(a != p);
|
||||
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
|
||||
// p != std::wsregex_iterator{}; ++p)
|
||||
// std::wcout << (*p)[1] << std::endl;
|
||||
std::wstring sol[] =
|
||||
{
|
||||
L"ä\u2009Ä",
|
||||
L"\u2009",
|
||||
L"ö\u2009Ö",
|
||||
L"\u2009",
|
||||
L"ü\u2009Ü",
|
||||
L"",
|
||||
};
|
||||
int i = 0;
|
||||
for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
|
||||
p != std::wsregex_iterator{}; ++p)
|
||||
VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
|
||||
}
|
||||
|
||||
int
|
||||
|
Loading…
Reference in New Issue
Block a user