diff options
-rw-r--r-- | libstdc++-v3/ChangeLog | 8 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.h | 68 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.tcc | 126 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/performance/28_regex/split.cc | 115 |
4 files changed, 234 insertions, 83 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 377ac0e7e9f..4d6906d5ef4 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,11 @@ +2013-10-08 Tim Shen <timshen91@gmail.com> + + * include/bits/regex_executor.h: Add _TodoList class. + * include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add + _M_match_stack and _M_stack to make everything faster. Break if + _M_stack is empty, to reduce unnecessary idling. + * testsuite/performance/28_regex/split.cc: New. + 2013-10-06 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator): diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 2770098152b..462643779f2 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } bool - _M_search() - { - if (_M_flags & regex_constants::match_continuous) - return _M_search_from_first(); - auto __cur = _M_begin; - do - { - _M_match_mode = false; - _M_init(__cur); - if (_M_main()) - return true; - } - // Continue when __cur == _M_end - while (__cur++ != _M_end); - return false; - } + _M_search(); bool _M_is_word(_CharT __ch) const @@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; typedef std::unique_ptr<_ResultsEntry> _ResultsPtr; + class _TodoList + { + public: + explicit + _TodoList(size_t __sz) + : _M_states(), _M_exists(__sz, false) + { } + + void _M_push(_StateIdT __u) + { + _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size()); + if (!_M_exists[__u]) + { + _M_exists[__u] = true; + _M_states.push_back(__u); + } + } + + _StateIdT _M_pop() + { + auto __ret = _M_states.back(); + _M_states.pop_back(); + _M_exists[__ret] = false; + return __ret; + } + + bool _M_empty() const + { return _M_states.empty(); } + + void _M_clear() + { + _M_states.clear(); + _M_exists.assign(_M_exists.size(), false); + } + + private: + std::vector<_StateIdT> _M_states; + std::vector<bool> _M_exists; + }; + public: _BFSExecutor(_BiIter __begin, _BiIter __end, @@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION : _BaseT(__begin, __end, __results, __re, __flags), _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>> (__re._M_automaton)), + _M_match_stack(_M_nfa.size()), + _M_stack(_M_nfa.size()), _M_start_state(_M_nfa._M_start()) { } @@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_init(_BiIter __cur) { - _GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id); this->_M_current = __cur; _M_covered.clear(); _ResultsVec& __res(this->_M_results); _M_covered[this->_M_start_state] = _ResultsPtr(new _ResultsEntry(__res.size(), _M_nfa._M_quant_count)); - _M_e_closure(); + _M_stack._M_push(this->_M_start_state); } void @@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION this->_M_flags)); } + const _NFAT& _M_nfa; std::map<_StateIdT, _ResultsPtr> _M_covered; + _TodoList _M_match_stack; + _TodoList _M_stack; + _StateIdT _M_start_state; // To record global optimal solution. _ResultsPtr _M_cur_results; - const _NFAT& _M_nfa; - _StateIdT _M_start_state; }; //@} regex-detail diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 3b1fcbc9e38..59b082e2ac3 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -36,12 +36,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _BiIter, typename _Alloc, typename _CharT, typename _TraitsT> + bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: + _M_search() + { + if (_M_flags & regex_constants::match_continuous) + return _M_search_from_first(); + auto __cur = _M_begin; + do + { + _M_match_mode = false; + _M_init(__cur); + if (_M_main()) + return true; + } + // Continue when __cur == _M_end + while (__cur++ != _M_end); + return false; + } + + template<typename _BiIter, typename _Alloc, + typename _CharT, typename _TraitsT> bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: _M_dfs(_StateIdT __i) { - if (__i == _S_invalid_state_id) - // This is not that certain. Need deeper investigate. - return false; auto& __current = this->_M_current; const auto& __state = _M_nfa[__i]; bool __ret = false; @@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: _M_main() { + _M_e_closure(); bool __ret = false; if (!this->_M_match_mode && !(this->_M_flags & regex_constants::match_not_null)) @@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _M_move(); ++this->_M_current; + if (_M_stack._M_empty()) + break; _M_e_closure(); if (!this->_M_match_mode) // To keep regex_search greedy, no "return true" here. @@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __ret = _M_includes_some(); if (__ret) this->_M_set_results(_M_cur_results->_M_get()); + _M_match_stack._M_clear(); + _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty()); return __ret; } @@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: _M_e_closure() { - std::queue<_StateIdT> __q; - std::vector<bool> __in_q(_M_nfa.size(), false); auto& __current = this->_M_current; - for (auto& __it : _M_covered) - { - __in_q[__it.first] = true; - __q.push(__it.first); - } - while (!__q.empty()) + while (!_M_stack._M_empty()) { - auto __u = __q.front(); - __q.pop(); - __in_q[__u] = false; + auto __u = _M_stack._M_pop(); + _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u)); const auto& __state = _M_nfa[__u]; // Can be implemented using method, but there will be too many // arguments. I would use macro function before C++11, but lambda is // a better choice, since hopefully compiler can inline it. - auto __add_visited_state = [&](_StateIdT __v) + auto __add_visited_state = [=](_StateIdT __v) { - if (__v == _S_invalid_state_id) - return; - if (_M_covered.count(__u) != 0 - && (_M_covered.count(__v) == 0 - || *_M_covered[__u] < *_M_covered[__v])) + if (_M_covered.count(__v) == 0) { _M_covered[__v] = _ResultsPtr(new _ResultsEntry(*_M_covered[__u])); + _M_stack._M_push(__v); + return; + } + auto& __cu = _M_covered[__u]; + auto& __cv = _M_covered[__v]; + if (*__cu < *__cv) + { + __cv = _ResultsPtr(new _ResultsEntry(*__cu)); // if a state is updated, it's outgoing neighbors should be // reconsidered too. Push them to the queue. - if (!__in_q[__v]) - { - __in_q[__v] = true; - __q.push(__v); - } + _M_stack._M_push(__v); } }; @@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION case _S_opcode_alternative: { __add_visited_state(__state._M_next); - auto __back = - _M_covered[__u]->_M_quant_keys[__state._M_quant_index]; - _M_covered[__u]->_M_inc(__state._M_quant_index, - __state._M_neg); + auto& __cu = *_M_covered[__u]; + auto __back = __cu._M_quant_keys[__state._M_quant_index]; + __cu._M_inc(__state._M_quant_index, __state._M_neg); __add_visited_state(__state._M_alt); - _M_covered[__u]->_M_quant_keys[__state._M_quant_index] - = __back; + __cu._M_quant_keys[__state._M_quant_index] = __back; } break; case _S_opcode_subexpr_begin: @@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __add_visited_state(__state._M_next); break; case _S_opcode_match: + _M_match_stack._M_push(__u); break; case _S_opcode_accept: break; @@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_move() { decltype(_M_covered) __next; - for (auto& __it : _M_covered) + while (!_M_match_stack._M_empty()) { - const auto& __state = _M_nfa[__it.first]; - if (__state._M_opcode == _S_opcode_match - && __state._M_matches(*this->_M_current)) - if (__state._M_next != _S_invalid_state_id) - if (__next.count(__state._M_next) == 0 - || *__it.second < *__next[__state._M_next]) - __next[__state._M_next] = move(__it.second); + auto __u = _M_match_stack._M_pop(); + const auto& __state = _M_nfa[__u]; + auto& __cu = _M_covered[__u]; + if (__state._M_matches(*this->_M_current) + && (__next.count(__state._M_next) == 0 + || *__cu < *__next[__state._M_next])) + { + __next[__state._M_next] = std::move(__cu); + _M_stack._M_push(__state._M_next); + } } _M_covered = move(__next); } @@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: _M_includes_some() { - auto& __s = _M_nfa._M_final_states(); - auto& __t = _M_covered; bool __succ = false; - if (__s.size() > 0 && __t.size() > 0) - { - auto __first = __s.begin(); - auto __second = __t.begin(); - while (__first != __s.end() && __second != __t.end()) - { - if (*__first < __second->first) - ++__first; - else if (*__first > __second->first) - ++__second; - else - { - if (_M_cur_results == nullptr - || *__second->second < *_M_cur_results) - _M_cur_results = - _ResultsPtr(new _ResultsEntry(*__second->second)); - __succ = true; - ++__first; - ++__second; - } - } - } + for (auto __u : _M_nfa._M_final_states()) + if (_M_covered.count(__u)) + { + __succ = true; + auto& __cu = _M_covered[__u]; + if (_M_cur_results == nullptr || *__cu < *_M_cur_results) + _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu)); + } return __succ; } diff --git a/libstdc++-v3/testsuite/performance/28_regex/split.cc b/libstdc++-v3/testsuite/performance/28_regex/split.cc new file mode 100644 index 00000000000..5b972e43696 --- /dev/null +++ b/libstdc++-v3/testsuite/performance/28_regex/split.cc @@ -0,0 +1,115 @@ +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 2013-10-08 Tim Shen <timshen91@gmail.com> + +#include <testsuite_performance.h> +#include <regex> + +using namespace __gnu_test; +using namespace std; + +void split(string s) +{ + regex re("\\s+"); + for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1); + it != sregex_token_iterator(); + ++it) + { + } +} + +int main() +{ + string source = "\ +// Copyright (C) 2013 Free Software Foundation, Inc.\n\ +//\n\ +// This file is part of the GNU ISO C++ Library. This library is free\n\ +// software; you can redistribute it and/or modify it under the\n\ +// terms of the GNU General Public License as published by the\n\ +// Free Software Foundation; either version 3, or (at your option)\n\ +// any later version.\n\ +\n\ +// This library is distributed in the hope that it will be useful,\n\ +// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +// GNU General Public License for more details.\n\ +\n\ +// You should have received a copy of the GNU General Public License along\n\ +// with this library; see the file COPYING3. If not see\n\ +// <http://www.gnu.org/licenses/>.\n\ +\n\ +// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\ +\n\ +#include <testsuite_performance.h>\n\ +#include <regex>\n\ +\n\ +using namespace __gnu_test;\n\ +using namespace std;\n\ +\n\ +void split(string s)\n\ +{\n\ + regex re(\"\\s+\");\n\ + for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\ + it != sregex_token_iterator();\n\ + ++it)\n\ + {\n\ + }\n\ +}\n\ +\n\ +int main()\n\ +{\n\ + string source = \"\";\n\ + time_counter time;\n\ + resource_counter resource;\n\ +\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ +\n\ + start_counters(time, resource);\n\ + split(source);\n\ + stop_counters(time, resource);\n\ + report_performance(__FILE__, \"\", time, resource);\n\ +\n\ + return 0;\n\ +}\n"; + + time_counter time; + resource_counter resource; + + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + + start_counters(time, resource); + split(source); + stop_counters(time, resource); + report_performance(__FILE__, "", time, resource); + + return 0; +} |