1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 // See below __regex_algo_impl to get what this is talking about. The default
32 // value 1 indicated a conservative optimization without giving up worst case
34 #ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
35 #define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
38 namespace std _GLIBCXX_VISIBILITY(default)
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44 // Result of merging regex_match and regex_search.
46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47 // the other one if possible, for test purpose).
49 // That __match_mode is true means regex_match, else regex_search.
50 template<typename _BiIter, typename _Alloc,
51 typename _CharT, typename _TraitsT,
52 _RegexExecutorPolicy __policy,
55 __regex_algo_impl(_BiIter __s,
57 match_results<_BiIter, _Alloc>& __m,
58 const basic_regex<_CharT, _TraitsT>& __re,
59 regex_constants::match_flag_type __flags)
61 if (__re._M_automaton == nullptr)
64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
66 __res.resize(__re._M_automaton->_M_sub_count() + 2);
67 for (auto& __it : __res)
70 // This function decide which executor to use under given circumstances.
71 // The _S_auto policy now is the following: if a NFA has no
72 // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
73 // quantifiers (*, +, ?), the BFS executor will be used, other wise
74 // DFS executor. This is because DFS executor has a exponential upper
75 // bound, but better best-case performace. Meanwhile, BFS executor can
76 // effectively prevent from exponential-long time matching (which must
77 // contains many quantifiers), but it's slower in average.
79 // For simple regex, BFS executor could be 2 or more times slower than
82 // Of course, BFS executor cannot handle back-references.
84 if (!__re._M_automaton->_M_has_backref
85 && (__policy == _RegexExecutorPolicy::_S_alternate
86 || __re._M_automaton->_M_quant_count
87 > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
89 _Executor<_BiIter, _Alloc, _TraitsT, false>
90 __executor(__s, __e, __m, __re, __flags);
92 __ret = __executor._M_match();
94 __ret = __executor._M_search();
98 _Executor<_BiIter, _Alloc, _TraitsT, true>
99 __executor(__s, __e, __m, __re, __flags);
101 __ret = __executor._M_match();
103 __ret = __executor._M_search();
107 for (auto __it : __res)
109 __it.first = __it.second = __e;
110 auto& __pre = __res[__res.size()-2];
111 auto& __suf = __res[__res.size()-1];
114 __pre.matched = false;
117 __suf.matched = false;
124 __pre.second = __res[0].first;
125 __pre.matched = (__pre.first != __pre.second);
126 __suf.first = __res[0].second;
128 __suf.matched = (__suf.first != __suf.second);
134 _GLIBCXX_END_NAMESPACE_VERSION
137 _GLIBCXX_BEGIN_NAMESPACE_VERSION
139 template<typename _Ch_type>
140 template<typename _Fwd_iter>
141 typename regex_traits<_Ch_type>::string_type
142 regex_traits<_Ch_type>::
143 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
145 typedef std::ctype<char_type> __ctype_type;
146 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
148 static const char* __collatenames[] =
241 "left-square-bracket",
243 "right-square-bracket",
273 "left-curly-bracket",
275 "right-curly-bracket",
281 for (; __first != __last; ++__first)
282 __s += __fctyp.narrow(*__first, 0);
284 for (const auto& __it : __collatenames)
286 return string_type(1, __fctyp.widen(
287 static_cast<char>(&__it - __collatenames)));
289 // TODO Add digraph support:
290 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
292 return string_type();
295 template<typename _Ch_type>
296 template<typename _Fwd_iter>
297 typename regex_traits<_Ch_type>::char_class_type
298 regex_traits<_Ch_type>::
299 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
301 typedef std::ctype<char_type> __ctype_type;
302 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
304 // Mappings from class name to class mask.
305 static const pair<const char*, char_class_type> __classnames[] =
307 {"d", ctype_base::digit},
308 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
309 {"s", ctype_base::space},
310 {"alnum", ctype_base::alnum},
311 {"alpha", ctype_base::alpha},
312 {"blank", {0, _RegexMask::_S_blank}},
313 {"cntrl", ctype_base::cntrl},
314 {"digit", ctype_base::digit},
315 {"graph", ctype_base::graph},
316 {"lower", ctype_base::lower},
317 {"print", ctype_base::print},
318 {"punct", ctype_base::punct},
319 {"space", ctype_base::space},
320 {"upper", ctype_base::upper},
321 {"xdigit", ctype_base::xdigit},
325 for (; __first != __last; ++__first)
326 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
328 for (const auto& __it : __classnames)
329 if (__s == __it.first)
333 & (ctype_base::lower | ctype_base::upper)) != 0))
334 return ctype_base::alpha;
340 template<typename _Ch_type>
342 regex_traits<_Ch_type>::
343 isctype(_Ch_type __c, char_class_type __f) const
345 typedef std::ctype<char_type> __ctype_type;
346 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
348 return __fctyp.is(__f._M_base, __c)
350 || ((__f._M_extended & _RegexMask::_S_under)
351 && __c == __fctyp.widen('_'))
353 || ((__f._M_extended & _RegexMask::_S_blank)
354 && (__c == __fctyp.widen(' ')
355 || __c == __fctyp.widen('\t')));
358 template<typename _Ch_type>
360 regex_traits<_Ch_type>::
361 value(_Ch_type __ch, int __radix) const
363 std::basic_istringstream<char_type> __is(string_type(1, __ch));
367 else if (__radix == 16)
370 return __is.fail() ? -1 : __v;
373 template<typename _Bi_iter, typename _Alloc>
374 template<typename _Out_iter>
375 _Out_iter match_results<_Bi_iter, _Alloc>::
376 format(_Out_iter __out,
377 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
378 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
379 match_flag_type __flags) const
381 _GLIBCXX_DEBUG_ASSERT( ready() );
382 regex_traits<char_type> __traits;
383 typedef std::ctype<char_type> __ctype_type;
385 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
387 auto __output = [&](size_t __idx)
389 auto& __sub = _Base_type::operator[](__idx);
391 __out = std::copy(__sub.first, __sub.second, __out);
394 if (__flags & regex_constants::format_sed)
396 for (; __fmt_first != __fmt_last;)
397 if (*__fmt_first == '&')
402 else if (*__fmt_first == '\\')
404 if (++__fmt_first != __fmt_last
405 && __fctyp.is(__ctype_type::digit, *__fmt_first))
406 __output(__traits.value(*__fmt_first++, 10));
411 *__out++ = *__fmt_first++;
417 auto __next = std::find(__fmt_first, __fmt_last, '$');
418 if (__next == __fmt_last)
421 __out = std::copy(__fmt_first, __next, __out);
423 auto __eat = [&](char __ch) -> bool
433 if (++__next == __fmt_last)
440 __output(_Base_type::size()-2);
441 else if (__eat('\''))
442 __output(_Base_type::size()-1);
443 else if (__fctyp.is(__ctype_type::digit, *__next))
445 long __num = __traits.value(*__next, 10);
446 if (++__next != __fmt_last
447 && __fctyp.is(__ctype_type::digit, *__next))
450 __num += __traits.value(*__next++, 10);
452 if (0 <= __num && __num < this->size())
457 __fmt_first = __next;
459 __out = std::copy(__fmt_first, __fmt_last, __out);
464 template<typename _Out_iter, typename _Bi_iter,
465 typename _Rx_traits, typename _Ch_type>
467 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
468 const basic_regex<_Ch_type, _Rx_traits>& __e,
469 const _Ch_type* __fmt,
470 regex_constants::match_flag_type __flags)
472 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
473 _IterT __i(__first, __last, __e, __flags);
477 if (!(__flags & regex_constants::format_no_copy))
478 __out = std::copy(__first, __last, __out);
482 sub_match<_Bi_iter> __last;
483 auto __len = char_traits<_Ch_type>::length(__fmt);
484 for (; __i != __end; ++__i)
486 if (!(__flags & regex_constants::format_no_copy))
487 __out = std::copy(__i->prefix().first, __i->prefix().second,
489 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
490 __last = __i->suffix();
491 if (__flags & regex_constants::format_first_only)
494 if (!(__flags & regex_constants::format_no_copy))
495 __out = std::copy(__last.first, __last.second, __out);
500 template<typename _Bi_iter,
504 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
505 operator==(const regex_iterator& __rhs) const
507 return (_M_match.empty() && __rhs._M_match.empty())
508 || (_M_begin == __rhs._M_begin
509 && _M_end == __rhs._M_end
510 && _M_pregex == __rhs._M_pregex
511 && _M_flags == __rhs._M_flags
512 && _M_match[0] == __rhs._M_match[0]);
515 template<typename _Bi_iter,
518 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
519 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
522 // In all cases in which the call to regex_search returns true,
523 // match.prefix().first shall be equal to the previous value of
524 // match[0].second, and for each index i in the half-open range
525 // [0, match.size()) for which match[i].matched is true,
526 // match[i].position() shall return distance(begin, match[i].first).
528 if (_M_match[0].matched)
530 auto __start = _M_match[0].second;
531 auto __prefix_first = _M_match[0].second;
532 if (_M_match[0].first == _M_match[0].second)
534 if (__start == _M_end)
536 _M_match = value_type();
541 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
543 | regex_constants::match_not_null
544 | regex_constants::match_continuous))
546 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
547 auto& __prefix = _M_match.at(_M_match.size());
548 __prefix.first = __prefix_first;
549 __prefix.matched = __prefix.first != __prefix.second;
551 _M_match._M_begin = _M_begin;
558 _M_flags |= regex_constants::match_prev_avail;
559 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
561 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
562 auto& __prefix = _M_match.at(_M_match.size());
563 __prefix.first = __prefix_first;
564 __prefix.matched = __prefix.first != __prefix.second;
566 _M_match._M_begin = _M_begin;
569 _M_match = value_type();
574 template<typename _Bi_iter,
577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
578 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
579 operator=(const regex_token_iterator& __rhs)
581 _M_position = __rhs._M_position;
582 _M_subs = __rhs._M_subs;
584 _M_suffix = __rhs._M_suffix;
585 _M_has_m1 = __rhs._M_has_m1;
586 _M_normalize_result();
590 template<typename _Bi_iter,
594 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
595 operator==(const regex_token_iterator& __rhs) const
597 if (_M_end_of_seq() && __rhs._M_end_of_seq())
599 if (_M_suffix.matched && __rhs._M_suffix.matched
600 && _M_suffix == __rhs._M_suffix)
602 if (_M_end_of_seq() || _M_suffix.matched
603 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
605 return _M_position == __rhs._M_position
606 && _M_n == __rhs._M_n
607 && _M_subs == __rhs._M_subs;
610 template<typename _Bi_iter,
613 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
614 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
617 _Position __prev = _M_position;
618 if (_M_suffix.matched)
619 *this = regex_token_iterator();
620 else if (_M_n + 1 < _M_subs.size())
623 _M_result = &_M_current_match();
629 if (_M_position != _Position())
630 _M_result = &_M_current_match();
631 else if (_M_has_m1 && __prev->suffix().length() != 0)
633 _M_suffix.matched = true;
634 _M_suffix.first = __prev->suffix().first;
635 _M_suffix.second = __prev->suffix().second;
636 _M_result = &_M_suffix;
639 *this = regex_token_iterator();
644 template<typename _Bi_iter,
648 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
649 _M_init(_Bi_iter __a, _Bi_iter __b)
652 for (auto __it : _M_subs)
658 if (_M_position != _Position())
659 _M_result = &_M_current_match();
662 _M_suffix.matched = true;
663 _M_suffix.first = __a;
664 _M_suffix.second = __b;
665 _M_result = &_M_suffix;
671 _GLIBCXX_END_NAMESPACE_VERSION