summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libstdc++-v3/ChangeLog8
-rw-r--r--libstdc++-v3/include/bits/regex_executor.h68
-rw-r--r--libstdc++-v3/include/bits/regex_executor.tcc126
-rw-r--r--libstdc++-v3/testsuite/performance/28_regex/split.cc115
4 files changed, 234 insertions, 83 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 377ac0e7e9f..4d6906d5ef4 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,11 @@
+2013-10-08 Tim Shen <timshen91@gmail.com>
+
+ * include/bits/regex_executor.h: Add _TodoList class.
+ * include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
+ _M_match_stack and _M_stack to make everything faster. Break if
+ _M_stack is empty, to reduce unnecessary idling.
+ * testsuite/performance/28_regex/split.cc: New.
+
2013-10-06 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator):
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 2770098152b..462643779f2 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
bool
- _M_search()
- {
- if (_M_flags & regex_constants::match_continuous)
- return _M_search_from_first();
- auto __cur = _M_begin;
- do
- {
- _M_match_mode = false;
- _M_init(__cur);
- if (_M_main())
- return true;
- }
- // Continue when __cur == _M_end
- while (__cur++ != _M_end);
- return false;
- }
+ _M_search();
bool
_M_is_word(_CharT __ch) const
@@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
};
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
+ class _TodoList
+ {
+ public:
+ explicit
+ _TodoList(size_t __sz)
+ : _M_states(), _M_exists(__sz, false)
+ { }
+
+ void _M_push(_StateIdT __u)
+ {
+ _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
+ if (!_M_exists[__u])
+ {
+ _M_exists[__u] = true;
+ _M_states.push_back(__u);
+ }
+ }
+
+ _StateIdT _M_pop()
+ {
+ auto __ret = _M_states.back();
+ _M_states.pop_back();
+ _M_exists[__ret] = false;
+ return __ret;
+ }
+
+ bool _M_empty() const
+ { return _M_states.empty(); }
+
+ void _M_clear()
+ {
+ _M_states.clear();
+ _M_exists.assign(_M_exists.size(), false);
+ }
+
+ private:
+ std::vector<_StateIdT> _M_states;
+ std::vector<bool> _M_exists;
+ };
+
public:
_BFSExecutor(_BiIter __begin,
_BiIter __end,
@@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _BaseT(__begin, __end, __results, __re, __flags),
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
(__re._M_automaton)),
+ _M_match_stack(_M_nfa.size()),
+ _M_stack(_M_nfa.size()),
_M_start_state(_M_nfa._M_start())
{ }
@@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_init(_BiIter __cur)
{
- _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
this->_M_current = __cur;
_M_covered.clear();
_ResultsVec& __res(this->_M_results);
_M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(),
_M_nfa._M_quant_count));
- _M_e_closure();
+ _M_stack._M_push(this->_M_start_state);
}
void
@@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
this->_M_flags));
}
+ const _NFAT& _M_nfa;
std::map<_StateIdT, _ResultsPtr> _M_covered;
+ _TodoList _M_match_stack;
+ _TodoList _M_stack;
+ _StateIdT _M_start_state;
// To record global optimal solution.
_ResultsPtr _M_cur_results;
- const _NFAT& _M_nfa;
- _StateIdT _M_start_state;
};
//@} regex-detail
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 3b1fcbc9e38..59b082e2ac3 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -36,12 +36,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
+ bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ _M_search()
+ {
+ if (_M_flags & regex_constants::match_continuous)
+ return _M_search_from_first();
+ auto __cur = _M_begin;
+ do
+ {
+ _M_match_mode = false;
+ _M_init(__cur);
+ if (_M_main())
+ return true;
+ }
+ // Continue when __cur == _M_end
+ while (__cur++ != _M_end);
+ return false;
+ }
+
+ template<typename _BiIter, typename _Alloc,
+ typename _CharT, typename _TraitsT>
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_dfs(_StateIdT __i)
{
- if (__i == _S_invalid_state_id)
- // This is not that certain. Need deeper investigate.
- return false;
auto& __current = this->_M_current;
const auto& __state = _M_nfa[__i];
bool __ret = false;
@@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main()
{
+ _M_e_closure();
bool __ret = false;
if (!this->_M_match_mode
&& !(this->_M_flags & regex_constants::match_not_null))
@@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
_M_move();
++this->_M_current;
+ if (_M_stack._M_empty())
+ break;
_M_e_closure();
if (!this->_M_match_mode)
// To keep regex_search greedy, no "return true" here.
@@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__ret = _M_includes_some();
if (__ret)
this->_M_set_results(_M_cur_results->_M_get());
+ _M_match_stack._M_clear();
+ _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
return __ret;
}
@@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_e_closure()
{
- std::queue<_StateIdT> __q;
- std::vector<bool> __in_q(_M_nfa.size(), false);
auto& __current = this->_M_current;
- for (auto& __it : _M_covered)
- {
- __in_q[__it.first] = true;
- __q.push(__it.first);
- }
- while (!__q.empty())
+ while (!_M_stack._M_empty())
{
- auto __u = __q.front();
- __q.pop();
- __in_q[__u] = false;
+ auto __u = _M_stack._M_pop();
+ _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
const auto& __state = _M_nfa[__u];
// Can be implemented using method, but there will be too many
// arguments. I would use macro function before C++11, but lambda is
// a better choice, since hopefully compiler can inline it.
- auto __add_visited_state = [&](_StateIdT __v)
+ auto __add_visited_state = [=](_StateIdT __v)
{
- if (__v == _S_invalid_state_id)
- return;
- if (_M_covered.count(__u) != 0
- && (_M_covered.count(__v) == 0
- || *_M_covered[__u] < *_M_covered[__v]))
+ if (_M_covered.count(__v) == 0)
{
_M_covered[__v] =
_ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
+ _M_stack._M_push(__v);
+ return;
+ }
+ auto& __cu = _M_covered[__u];
+ auto& __cv = _M_covered[__v];
+ if (*__cu < *__cv)
+ {
+ __cv = _ResultsPtr(new _ResultsEntry(*__cu));
// if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue.
- if (!__in_q[__v])
- {
- __in_q[__v] = true;
- __q.push(__v);
- }
+ _M_stack._M_push(__v);
}
};
@@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_alternative:
{
__add_visited_state(__state._M_next);
- auto __back =
- _M_covered[__u]->_M_quant_keys[__state._M_quant_index];
- _M_covered[__u]->_M_inc(__state._M_quant_index,
- __state._M_neg);
+ auto& __cu = *_M_covered[__u];
+ auto __back = __cu._M_quant_keys[__state._M_quant_index];
+ __cu._M_inc(__state._M_quant_index, __state._M_neg);
__add_visited_state(__state._M_alt);
- _M_covered[__u]->_M_quant_keys[__state._M_quant_index]
- = __back;
+ __cu._M_quant_keys[__state._M_quant_index] = __back;
}
break;
case _S_opcode_subexpr_begin:
@@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__add_visited_state(__state._M_next);
break;
case _S_opcode_match:
+ _M_match_stack._M_push(__u);
break;
case _S_opcode_accept:
break;
@@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_move()
{
decltype(_M_covered) __next;
- for (auto& __it : _M_covered)
+ while (!_M_match_stack._M_empty())
{
- const auto& __state = _M_nfa[__it.first];
- if (__state._M_opcode == _S_opcode_match
- && __state._M_matches(*this->_M_current))
- if (__state._M_next != _S_invalid_state_id)
- if (__next.count(__state._M_next) == 0
- || *__it.second < *__next[__state._M_next])
- __next[__state._M_next] = move(__it.second);
+ auto __u = _M_match_stack._M_pop();
+ const auto& __state = _M_nfa[__u];
+ auto& __cu = _M_covered[__u];
+ if (__state._M_matches(*this->_M_current)
+ && (__next.count(__state._M_next) == 0
+ || *__cu < *__next[__state._M_next]))
+ {
+ __next[__state._M_next] = std::move(__cu);
+ _M_stack._M_push(__state._M_next);
+ }
}
_M_covered = move(__next);
}
@@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_includes_some()
{
- auto& __s = _M_nfa._M_final_states();
- auto& __t = _M_covered;
bool __succ = false;
- if (__s.size() > 0 && __t.size() > 0)
- {
- auto __first = __s.begin();
- auto __second = __t.begin();
- while (__first != __s.end() && __second != __t.end())
- {
- if (*__first < __second->first)
- ++__first;
- else if (*__first > __second->first)
- ++__second;
- else
- {
- if (_M_cur_results == nullptr
- || *__second->second < *_M_cur_results)
- _M_cur_results =
- _ResultsPtr(new _ResultsEntry(*__second->second));
- __succ = true;
- ++__first;
- ++__second;
- }
- }
- }
+ for (auto __u : _M_nfa._M_final_states())
+ if (_M_covered.count(__u))
+ {
+ __succ = true;
+ auto& __cu = _M_covered[__u];
+ if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
+ _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
+ }
return __succ;
}
diff --git a/libstdc++-v3/testsuite/performance/28_regex/split.cc b/libstdc++-v3/testsuite/performance/28_regex/split.cc
new file mode 100644
index 00000000000..5b972e43696
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/28_regex/split.cc
@@ -0,0 +1,115 @@
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 2013-10-08 Tim Shen <timshen91@gmail.com>
+
+#include <testsuite_performance.h>
+#include <regex>
+
+using namespace __gnu_test;
+using namespace std;
+
+void split(string s)
+{
+ regex re("\\s+");
+ for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
+ it != sregex_token_iterator();
+ ++it)
+ {
+ }
+}
+
+int main()
+{
+ string source = "\
+// Copyright (C) 2013 Free Software Foundation, Inc.\n\
+//\n\
+// This file is part of the GNU ISO C++ Library. This library is free\n\
+// software; you can redistribute it and/or modify it under the\n\
+// terms of the GNU General Public License as published by the\n\
+// Free Software Foundation; either version 3, or (at your option)\n\
+// any later version.\n\
+\n\
+// This library is distributed in the hope that it will be useful,\n\
+// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+// GNU General Public License for more details.\n\
+\n\
+// You should have received a copy of the GNU General Public License along\n\
+// with this library; see the file COPYING3. If not see\n\
+// <http://www.gnu.org/licenses/>.\n\
+\n\
+// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
+\n\
+#include <testsuite_performance.h>\n\
+#include <regex>\n\
+\n\
+using namespace __gnu_test;\n\
+using namespace std;\n\
+\n\
+void split(string s)\n\
+{\n\
+ regex re(\"\\s+\");\n\
+ for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
+ it != sregex_token_iterator();\n\
+ ++it)\n\
+ {\n\
+ }\n\
+}\n\
+\n\
+int main()\n\
+{\n\
+ string source = \"\";\n\
+ time_counter time;\n\
+ resource_counter resource;\n\
+\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+\n\
+ start_counters(time, resource);\n\
+ split(source);\n\
+ stop_counters(time, resource);\n\
+ report_performance(__FILE__, \"\", time, resource);\n\
+\n\
+ return 0;\n\
+}\n";
+
+ time_counter time;
+ resource_counter resource;
+
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+
+ start_counters(time, resource);
+ split(source);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "", time, resource);
+
+ return 0;
+}