00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00034
00035 namespace __regex
00036 {
00037 struct _Scanner_base
00038 {
00039 typedef unsigned int _StateT;
00040
00041 static constexpr _StateT _S_state_at_start = 1 << 0;
00042 static constexpr _StateT _S_state_in_brace = 1 << 2;
00043 static constexpr _StateT _S_state_in_bracket = 1 << 3;
00044 };
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056 template<typename _InputIterator>
00057 class _Scanner: public _Scanner_base
00058 {
00059 public:
00060 typedef _InputIterator _IteratorT;
00061 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00062 typedef std::basic_string<_CharT> _StringT;
00063 typedef regex_constants::syntax_option_type _FlagT;
00064 typedef const std::ctype<_CharT> _CtypeT;
00065
00066
00067 enum _TokenT
00068 {
00069 _S_token_anychar,
00070 _S_token_backref,
00071 _S_token_bracket_begin,
00072 _S_token_bracket_end,
00073 _S_token_inverse_class,
00074 _S_token_char_class_name,
00075 _S_token_closure0,
00076 _S_token_closure1,
00077 _S_token_collelem_multi,
00078 _S_token_collelem_single,
00079 _S_token_collsymbol,
00080 _S_token_comma,
00081 _S_token_dash,
00082 _S_token_dup_count,
00083 _S_token_eof,
00084 _S_token_equiv_class_name,
00085 _S_token_interval_begin,
00086 _S_token_interval_end,
00087 _S_token_line_begin,
00088 _S_token_line_end,
00089 _S_token_opt,
00090 _S_token_or,
00091 _S_token_ord_char,
00092 _S_token_quoted_char,
00093 _S_token_subexpr_begin,
00094 _S_token_subexpr_end,
00095 _S_token_word_begin,
00096 _S_token_word_end,
00097 _S_token_unknown
00098 };
00099
00100 public:
00101 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00102 std::locale __loc)
00103 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00104 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00105 { _M_advance(); }
00106
00107 void
00108 _M_advance();
00109
00110 _TokenT
00111 _M_token() const
00112 { return _M_curToken; }
00113
00114 const _StringT&
00115 _M_value() const
00116 { return _M_curValue; }
00117
00118 #ifdef _GLIBCXX_DEBUG
00119 std::ostream&
00120 _M_print(std::ostream&);
00121 #endif
00122
00123 private:
00124 void
00125 _M_eat_escape();
00126
00127 void
00128 _M_scan_in_brace();
00129
00130 void
00131 _M_scan_in_bracket();
00132
00133 void
00134 _M_eat_charclass();
00135
00136 void
00137 _M_eat_equivclass();
00138
00139 void
00140 _M_eat_collsymbol();
00141
00142 private:
00143 _IteratorT _M_current;
00144 _IteratorT _M_end;
00145 _FlagT _M_flags;
00146 _CtypeT& _M_ctype;
00147 _TokenT _M_curToken;
00148 _StringT _M_curValue;
00149 _StateT _M_state;
00150 };
00151
00152 template<typename _InputIterator>
00153 void
00154 _Scanner<_InputIterator>::
00155 _M_advance()
00156 {
00157 if (_M_current == _M_end)
00158 {
00159 _M_curToken = _S_token_eof;
00160 return;
00161 }
00162
00163 _CharT __c = *_M_current;
00164 if (_M_state & _S_state_in_bracket)
00165 {
00166 _M_scan_in_bracket();
00167 return;
00168 }
00169 if (_M_state & _S_state_in_brace)
00170 {
00171 _M_scan_in_brace();
00172 return;
00173 }
00174 #if 0
00175
00176
00177 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00178 {
00179 _M_curToken = _S_token_line_begin;
00180 ++_M_current;
00181 return;
00182 }
00183 else if (__c == _M_ctype.widen('$'))
00184 {
00185 _M_curToken = _S_token_line_end;
00186 ++_M_current;
00187 return;
00188 }
00189 #endif
00190 else if (__c == _M_ctype.widen('.'))
00191 {
00192 _M_curToken = _S_token_anychar;
00193 ++_M_current;
00194 return;
00195 }
00196 else if (__c == _M_ctype.widen('*'))
00197 {
00198 _M_curToken = _S_token_closure0;
00199 ++_M_current;
00200 return;
00201 }
00202 else if (__c == _M_ctype.widen('+'))
00203 {
00204 _M_curToken = _S_token_closure1;
00205 ++_M_current;
00206 return;
00207 }
00208 else if (__c == _M_ctype.widen('|'))
00209 {
00210 _M_curToken = _S_token_or;
00211 ++_M_current;
00212 return;
00213 }
00214 else if (__c == _M_ctype.widen('['))
00215 {
00216 _M_curToken = _S_token_bracket_begin;
00217 _M_state |= (_S_state_in_bracket | _S_state_at_start);
00218 ++_M_current;
00219 return;
00220 }
00221 else if (__c == _M_ctype.widen('\\'))
00222 {
00223 _M_eat_escape();
00224 return;
00225 }
00226 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00227 {
00228 if (__c == _M_ctype.widen('('))
00229 {
00230 _M_curToken = _S_token_subexpr_begin;
00231 ++_M_current;
00232 return;
00233 }
00234 else if (__c == _M_ctype.widen(')'))
00235 {
00236 _M_curToken = _S_token_subexpr_end;
00237 ++_M_current;
00238 return;
00239 }
00240 else if (__c == _M_ctype.widen('{'))
00241 {
00242 _M_curToken = _S_token_interval_begin;
00243 _M_state |= _S_state_in_brace;
00244 ++_M_current;
00245 return;
00246 }
00247 }
00248
00249 _M_curToken = _S_token_ord_char;
00250 _M_curValue.assign(1, __c);
00251 ++_M_current;
00252 }
00253
00254
00255 template<typename _InputIterator>
00256 void
00257 _Scanner<_InputIterator>::
00258 _M_scan_in_brace()
00259 {
00260 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00261 {
00262 _M_curToken = _S_token_dup_count;
00263 _M_curValue.assign(1, *_M_current);
00264 ++_M_current;
00265 while (_M_current != _M_end
00266 && _M_ctype.is(_CtypeT::digit, *_M_current))
00267 {
00268 _M_curValue += *_M_current;
00269 ++_M_current;
00270 }
00271 return;
00272 }
00273 else if (*_M_current == _M_ctype.widen(','))
00274 {
00275 _M_curToken = _S_token_comma;
00276 ++_M_current;
00277 return;
00278 }
00279 if (_M_flags & (regex_constants::basic | regex_constants::grep))
00280 {
00281 if (*_M_current == _M_ctype.widen('\\'))
00282 _M_eat_escape();
00283 }
00284 else
00285 {
00286 if (*_M_current == _M_ctype.widen('}'))
00287 {
00288 _M_curToken = _S_token_interval_end;
00289 _M_state &= ~_S_state_in_brace;
00290 ++_M_current;
00291 return;
00292 }
00293 }
00294 }
00295
00296 template<typename _InputIterator>
00297 void
00298 _Scanner<_InputIterator>::
00299 _M_scan_in_bracket()
00300 {
00301 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00302 {
00303 _M_curToken = _S_token_inverse_class;
00304 _M_state &= ~_S_state_at_start;
00305 ++_M_current;
00306 return;
00307 }
00308 else if (*_M_current == _M_ctype.widen('['))
00309 {
00310 ++_M_current;
00311 if (_M_current == _M_end)
00312 {
00313 _M_curToken = _S_token_eof;
00314 return;
00315 }
00316
00317 if (*_M_current == _M_ctype.widen('.'))
00318 {
00319 _M_curToken = _S_token_collsymbol;
00320 _M_eat_collsymbol();
00321 return;
00322 }
00323 else if (*_M_current == _M_ctype.widen(':'))
00324 {
00325 _M_curToken = _S_token_char_class_name;
00326 _M_eat_charclass();
00327 return;
00328 }
00329 else if (*_M_current == _M_ctype.widen('='))
00330 {
00331 _M_curToken = _S_token_equiv_class_name;
00332 _M_eat_equivclass();
00333 return;
00334 }
00335 }
00336 else if (*_M_current == _M_ctype.widen('-'))
00337 {
00338 _M_curToken = _S_token_dash;
00339 ++_M_current;
00340 return;
00341 }
00342 else if (*_M_current == _M_ctype.widen(']'))
00343 {
00344 if (!(_M_flags & regex_constants::ECMAScript)
00345 || !(_M_state & _S_state_at_start))
00346 {
00347
00348
00349 _M_curToken = _S_token_bracket_end;
00350 ++_M_current;
00351 return;
00352 }
00353 }
00354 _M_curToken = _S_token_collelem_single;
00355 _M_curValue.assign(1, *_M_current);
00356 ++_M_current;
00357 }
00358
00359 template<typename _InputIterator>
00360 void
00361 _Scanner<_InputIterator>::
00362 _M_eat_escape()
00363 {
00364 ++_M_current;
00365 if (_M_current == _M_end)
00366 {
00367 _M_curToken = _S_token_eof;
00368 return;
00369 }
00370 _CharT __c = *_M_current;
00371 ++_M_current;
00372
00373 if (__c == _M_ctype.widen('('))
00374 {
00375 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00376 {
00377 _M_curToken = _S_token_ord_char;
00378 _M_curValue.assign(1, __c);
00379 }
00380 else
00381 _M_curToken = _S_token_subexpr_begin;
00382 }
00383 else if (__c == _M_ctype.widen(')'))
00384 {
00385 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00386 {
00387 _M_curToken = _S_token_ord_char;
00388 _M_curValue.assign(1, __c);
00389 }
00390 else
00391 _M_curToken = _S_token_subexpr_end;
00392 }
00393 else if (__c == _M_ctype.widen('{'))
00394 {
00395 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00396 {
00397 _M_curToken = _S_token_ord_char;
00398 _M_curValue.assign(1, __c);
00399 }
00400 else
00401 {
00402 _M_curToken = _S_token_interval_begin;
00403 _M_state |= _S_state_in_brace;
00404 }
00405 }
00406 else if (__c == _M_ctype.widen('}'))
00407 {
00408 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00409 {
00410 _M_curToken = _S_token_ord_char;
00411 _M_curValue.assign(1, __c);
00412 }
00413 else
00414 {
00415 if (!(_M_state && _S_state_in_brace))
00416 __throw_regex_error(regex_constants::error_badbrace);
00417 _M_state &= ~_S_state_in_brace;
00418 _M_curToken = _S_token_interval_end;
00419 }
00420 }
00421 else if (__c == _M_ctype.widen('x'))
00422 {
00423 ++_M_current;
00424 if (_M_current == _M_end)
00425 {
00426 _M_curToken = _S_token_eof;
00427 return;
00428 }
00429 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00430 {
00431 _M_curValue.assign(1, *_M_current);
00432 ++_M_current;
00433 if (_M_current == _M_end)
00434 {
00435 _M_curToken = _S_token_eof;
00436 return;
00437 }
00438 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00439 {
00440 _M_curValue += *_M_current;
00441 ++_M_current;
00442 return;
00443 }
00444 }
00445 }
00446 else if (__c == _M_ctype.widen('^')
00447 || __c == _M_ctype.widen('.')
00448 || __c == _M_ctype.widen('*')
00449 || __c == _M_ctype.widen('$')
00450 || __c == _M_ctype.widen('\\'))
00451 {
00452 _M_curToken = _S_token_ord_char;
00453 _M_curValue.assign(1, __c);
00454 }
00455 else if (_M_ctype.is(_CtypeT::digit, __c))
00456 {
00457 _M_curToken = _S_token_backref;
00458 _M_curValue.assign(1, __c);
00459 }
00460 else
00461 __throw_regex_error(regex_constants::error_escape);
00462 }
00463
00464
00465
00466
00467 template<typename _InputIterator>
00468 void
00469 _Scanner<_InputIterator>::
00470 _M_eat_charclass()
00471 {
00472 ++_M_current;
00473 if (_M_current == _M_end)
00474 __throw_regex_error(regex_constants::error_ctype);
00475 for (_M_curValue.clear();
00476 _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00477 ++_M_current)
00478 _M_curValue += *_M_current;
00479 if (_M_current == _M_end)
00480 __throw_regex_error(regex_constants::error_ctype);
00481 ++_M_current;
00482 if (*_M_current != _M_ctype.widen(']'))
00483 __throw_regex_error(regex_constants::error_ctype);
00484 ++_M_current;
00485 }
00486
00487
00488 template<typename _InputIterator>
00489 void
00490 _Scanner<_InputIterator>::
00491 _M_eat_equivclass()
00492 {
00493 ++_M_current;
00494 if (_M_current == _M_end)
00495 __throw_regex_error(regex_constants::error_collate);
00496 for (_M_curValue.clear();
00497 _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00498 ++_M_current)
00499 _M_curValue += *_M_current;
00500 if (_M_current == _M_end)
00501 __throw_regex_error(regex_constants::error_collate);
00502 ++_M_current;
00503 if (*_M_current != _M_ctype.widen(']'))
00504 __throw_regex_error(regex_constants::error_collate);
00505 ++_M_current;
00506 }
00507
00508
00509 template<typename _InputIterator>
00510 void
00511 _Scanner<_InputIterator>::
00512 _M_eat_collsymbol()
00513 {
00514 ++_M_current;
00515 if (_M_current == _M_end)
00516 __throw_regex_error(regex_constants::error_collate);
00517 for (_M_curValue.clear();
00518 _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00519 ++_M_current)
00520 _M_curValue += *_M_current;
00521 if (_M_current == _M_end)
00522 __throw_regex_error(regex_constants::error_collate);
00523 ++_M_current;
00524 if (*_M_current != _M_ctype.widen(']'))
00525 __throw_regex_error(regex_constants::error_collate);
00526 ++_M_current;
00527 }
00528
00529 #ifdef _GLIBCXX_DEBUG
00530 template<typename _InputIterator>
00531 std::ostream&
00532 _Scanner<_InputIterator>::
00533 _M_print(std::ostream& ostr)
00534 {
00535 switch (_M_curToken)
00536 {
00537 case _S_token_anychar:
00538 ostr << "any-character\n";
00539 break;
00540 case _S_token_backref:
00541 ostr << "backref\n";
00542 break;
00543 case _S_token_bracket_begin:
00544 ostr << "bracket-begin\n";
00545 break;
00546 case _S_token_bracket_end:
00547 ostr << "bracket-end\n";
00548 break;
00549 case _S_token_char_class_name:
00550 ostr << "char-class-name \"" << _M_curValue << "\"\n";
00551 break;
00552 case _S_token_closure0:
00553 ostr << "closure0\n";
00554 break;
00555 case _S_token_closure1:
00556 ostr << "closure1\n";
00557 break;
00558 case _S_token_collelem_multi:
00559 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00560 break;
00561 case _S_token_collelem_single:
00562 ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00563 break;
00564 case _S_token_collsymbol:
00565 ostr << "collsymbol \"" << _M_curValue << "\"\n";
00566 break;
00567 case _S_token_comma:
00568 ostr << "comma\n";
00569 break;
00570 case _S_token_dash:
00571 ostr << "dash\n";
00572 break;
00573 case _S_token_dup_count:
00574 ostr << "dup count: " << _M_curValue << "\n";
00575 break;
00576 case _S_token_eof:
00577 ostr << "EOF\n";
00578 break;
00579 case _S_token_equiv_class_name:
00580 ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00581 break;
00582 case _S_token_interval_begin:
00583 ostr << "interval begin\n";
00584 break;
00585 case _S_token_interval_end:
00586 ostr << "interval end\n";
00587 break;
00588 case _S_token_line_begin:
00589 ostr << "line begin\n";
00590 break;
00591 case _S_token_line_end:
00592 ostr << "line end\n";
00593 break;
00594 case _S_token_opt:
00595 ostr << "opt\n";
00596 break;
00597 case _S_token_or:
00598 ostr << "or\n";
00599 break;
00600 case _S_token_ord_char:
00601 ostr << "ordinary character: \"" << _M_value() << "\"\n";
00602 break;
00603 case _S_token_quoted_char:
00604 ostr << "quoted char\n";
00605 break;
00606 case _S_token_subexpr_begin:
00607 ostr << "subexpr begin\n";
00608 break;
00609 case _S_token_subexpr_end:
00610 ostr << "subexpr end\n";
00611 break;
00612 case _S_token_word_begin:
00613 ostr << "word begin\n";
00614 break;
00615 case _S_token_word_end:
00616 ostr << "word end\n";
00617 break;
00618 case _S_token_unknown:
00619 ostr << "-- unknown token --\n";
00620 break;
00621 }
00622 return ostr;
00623 }
00624 #endif
00625
00626
00627 template<typename _InIter, typename _TraitsT>
00628 class _Compiler
00629 {
00630 public:
00631 typedef _InIter _IterT;
00632 typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00633 typedef std::basic_string<_CharT> _StringT;
00634 typedef regex_constants::syntax_option_type _FlagT;
00635
00636 public:
00637 _Compiler(const _InIter& __b, const _InIter& __e,
00638 _TraitsT& __traits, _FlagT __flags);
00639
00640 const _Nfa&
00641 _M_nfa() const
00642 { return _M_state_store; }
00643
00644 private:
00645 typedef _Scanner<_InIter> _ScannerT;
00646 typedef typename _ScannerT::_TokenT _TokenT;
00647 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00648 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
00649
00650
00651 bool
00652 _M_match_token(_TokenT __token);
00653
00654 void
00655 _M_disjunction();
00656
00657 bool
00658 _M_alternative();
00659
00660 bool
00661 _M_term();
00662
00663 bool
00664 _M_assertion();
00665
00666 bool
00667 _M_quantifier();
00668
00669 bool
00670 _M_atom();
00671
00672 bool
00673 _M_bracket_expression();
00674
00675 bool
00676 _M_bracket_list(_RMatcherT& __matcher);
00677
00678 bool
00679 _M_follow_list(_RMatcherT& __matcher);
00680
00681 bool
00682 _M_follow_list2(_RMatcherT& __matcher);
00683
00684 bool
00685 _M_expression_term(_RMatcherT& __matcher);
00686
00687 bool
00688 _M_range_expression(_RMatcherT& __matcher);
00689
00690 bool
00691 _M_start_range(_RMatcherT& __matcher);
00692
00693 bool
00694 _M_collating_symbol(_RMatcherT& __matcher);
00695
00696 bool
00697 _M_equivalence_class(_RMatcherT& __matcher);
00698
00699 bool
00700 _M_character_class(_RMatcherT& __matcher);
00701
00702 int
00703 _M_cur_int_value(int __radix);
00704
00705 private:
00706 _TraitsT& _M_traits;
00707 _ScannerT _M_scanner;
00708 _StringT _M_cur_value;
00709 _Nfa _M_state_store;
00710 _StackT _M_stack;
00711 };
00712
00713 template<typename _InIter, typename _TraitsT>
00714 _Compiler<_InIter, _TraitsT>::
00715 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00716 _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00717 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00718 _M_state_store(__flags)
00719 {
00720 typedef _StartTagger<_InIter, _TraitsT> _Start;
00721 typedef _EndTagger<_InIter, _TraitsT> _End;
00722
00723 _StateSeq __r(_M_state_store,
00724 _M_state_store._M_insert_subexpr_begin(_Start(0)));
00725 _M_disjunction();
00726 if (!_M_stack.empty())
00727 {
00728 __r._M_append(_M_stack.top());
00729 _M_stack.pop();
00730 }
00731 __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
00732 __r._M_append(_M_state_store._M_insert_accept());
00733 }
00734
00735 template<typename _InIter, typename _TraitsT>
00736 bool
00737 _Compiler<_InIter, _TraitsT>::
00738 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00739 {
00740 if (token == _M_scanner._M_token())
00741 {
00742 _M_cur_value = _M_scanner._M_value();
00743 _M_scanner._M_advance();
00744 return true;
00745 }
00746 return false;
00747 }
00748
00749 template<typename _InIter, typename _TraitsT>
00750 void
00751 _Compiler<_InIter, _TraitsT>::
00752 _M_disjunction()
00753 {
00754 this->_M_alternative();
00755 if (_M_match_token(_ScannerT::_S_token_or))
00756 {
00757 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00758 this->_M_disjunction();
00759 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00760 _M_stack.push(_StateSeq(__alt1, __alt2));
00761 }
00762 }
00763
00764 template<typename _InIter, typename _TraitsT>
00765 bool
00766 _Compiler<_InIter, _TraitsT>::
00767 _M_alternative()
00768 {
00769 if (this->_M_term())
00770 {
00771 _StateSeq __re = _M_stack.top(); _M_stack.pop();
00772 this->_M_alternative();
00773 if (!_M_stack.empty())
00774 {
00775 __re._M_append(_M_stack.top());
00776 _M_stack.pop();
00777 }
00778 _M_stack.push(__re);
00779 return true;
00780 }
00781 return false;
00782 }
00783
00784 template<typename _InIter, typename _TraitsT>
00785 bool
00786 _Compiler<_InIter, _TraitsT>::
00787 _M_term()
00788 {
00789 if (this->_M_assertion())
00790 return true;
00791 if (this->_M_atom())
00792 {
00793 this->_M_quantifier();
00794 return true;
00795 }
00796 return false;
00797 }
00798
00799 template<typename _InIter, typename _TraitsT>
00800 bool
00801 _Compiler<_InIter, _TraitsT>::
00802 _M_assertion()
00803 {
00804 if (_M_match_token(_ScannerT::_S_token_line_begin))
00805 {
00806
00807 return true;
00808 }
00809 if (_M_match_token(_ScannerT::_S_token_line_end))
00810 {
00811
00812 return true;
00813 }
00814 if (_M_match_token(_ScannerT::_S_token_word_begin))
00815 {
00816
00817 return true;
00818 }
00819 if (_M_match_token(_ScannerT::_S_token_word_end))
00820 {
00821
00822 return true;
00823 }
00824 return false;
00825 }
00826
00827 template<typename _InIter, typename _TraitsT>
00828 bool
00829 _Compiler<_InIter, _TraitsT>::
00830 _M_quantifier()
00831 {
00832 if (_M_match_token(_ScannerT::_S_token_closure0))
00833 {
00834 if (_M_stack.empty())
00835 __throw_regex_error(regex_constants::error_badrepeat);
00836 _StateSeq __r(_M_stack.top(), -1);
00837 __r._M_append(__r._M_front());
00838 _M_stack.pop();
00839 _M_stack.push(__r);
00840 return true;
00841 }
00842 if (_M_match_token(_ScannerT::_S_token_closure1))
00843 {
00844 if (_M_stack.empty())
00845 __throw_regex_error(regex_constants::error_badrepeat);
00846 _StateSeq __r(_M_state_store,
00847 _M_state_store.
00848 _M_insert_alt(_S_invalid_state_id,
00849 _M_stack.top()._M_front()));
00850 _M_stack.top()._M_append(__r);
00851 return true;
00852 }
00853 if (_M_match_token(_ScannerT::_S_token_opt))
00854 {
00855 if (_M_stack.empty())
00856 __throw_regex_error(regex_constants::error_badrepeat);
00857 _StateSeq __r(_M_stack.top(), -1);
00858 _M_stack.pop();
00859 _M_stack.push(__r);
00860 return true;
00861 }
00862 if (_M_match_token(_ScannerT::_S_token_interval_begin))
00863 {
00864 if (_M_stack.empty())
00865 __throw_regex_error(regex_constants::error_badrepeat);
00866 if (!_M_match_token(_ScannerT::_S_token_dup_count))
00867 __throw_regex_error(regex_constants::error_badbrace);
00868 _StateSeq __r(_M_stack.top());
00869 int __min_rep = _M_cur_int_value(10);
00870 for (int __i = 1; __i < __min_rep; ++__i)
00871 _M_stack.top()._M_append(__r._M_clone());
00872 if (_M_match_token(_ScannerT::_S_token_comma))
00873 if (_M_match_token(_ScannerT::_S_token_dup_count))
00874 {
00875 int __n = _M_cur_int_value(10) - __min_rep;
00876 if (__n < 0)
00877 __throw_regex_error(regex_constants::error_badbrace);
00878 for (int __i = 0; __i < __n; ++__i)
00879 {
00880 _StateSeq __r(_M_state_store,
00881 _M_state_store.
00882 _M_insert_alt(_S_invalid_state_id,
00883 _M_stack.top()._M_front()));
00884 _M_stack.top()._M_append(__r);
00885 }
00886 }
00887 else
00888 {
00889 _StateSeq __r(_M_stack.top(), -1);
00890 __r._M_push_back(__r._M_front());
00891 _M_stack.pop();
00892 _M_stack.push(__r);
00893 }
00894 if (!_M_match_token(_ScannerT::_S_token_interval_end))
00895 __throw_regex_error(regex_constants::error_brace);
00896 return true;
00897 }
00898 return false;
00899 }
00900
00901 template<typename _InIter, typename _TraitsT>
00902 bool
00903 _Compiler<_InIter, _TraitsT>::
00904 _M_atom()
00905 {
00906 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00907 typedef _StartTagger<_InIter, _TraitsT> _Start;
00908 typedef _EndTagger<_InIter, _TraitsT> _End;
00909
00910 if (_M_match_token(_ScannerT::_S_token_anychar))
00911 {
00912 _M_stack.push(_StateSeq(_M_state_store,
00913 _M_state_store._M_insert_matcher
00914 (_AnyMatcher)));
00915 return true;
00916 }
00917 if (_M_match_token(_ScannerT::_S_token_ord_char))
00918 {
00919 _M_stack.push(_StateSeq(_M_state_store,
00920 _M_state_store._M_insert_matcher
00921 (_CMatcher(_M_cur_value[0], _M_traits))));
00922 return true;
00923 }
00924 if (_M_match_token(_ScannerT::_S_token_quoted_char))
00925 {
00926
00927 _M_stack.push(_StateSeq(_M_state_store,
00928 _M_state_store._M_insert_matcher
00929 (_CMatcher(_M_cur_value[0], _M_traits))));
00930 return true;
00931 }
00932 if (_M_match_token(_ScannerT::_S_token_backref))
00933 {
00934
00935 return true;
00936 }
00937 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00938 {
00939 int __mark = _M_state_store._M_sub_count();
00940 _StateSeq __r(_M_state_store,
00941 _M_state_store.
00942 _M_insert_subexpr_begin(_Start(__mark)));
00943 this->_M_disjunction();
00944 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00945 __throw_regex_error(regex_constants::error_paren);
00946 if (!_M_stack.empty())
00947 {
00948 __r._M_append(_M_stack.top());
00949 _M_stack.pop();
00950 }
00951 __r._M_append(_M_state_store._M_insert_subexpr_end
00952 (__mark, _End(__mark)));
00953 _M_stack.push(__r);
00954 return true;
00955 }
00956 return _M_bracket_expression();
00957 }
00958
00959 template<typename _InIter, typename _TraitsT>
00960 bool
00961 _Compiler<_InIter, _TraitsT>::
00962 _M_bracket_expression()
00963 {
00964 if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00965 {
00966 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00967 _M_traits);
00968 if (!_M_bracket_list(__matcher)
00969 || !_M_match_token(_ScannerT::_S_token_bracket_end))
00970 __throw_regex_error(regex_constants::error_brack);
00971 _M_stack.push(_StateSeq(_M_state_store,
00972 _M_state_store._M_insert_matcher(__matcher)));
00973 return true;
00974 }
00975 return false;
00976 }
00977
00978
00979
00980 template<typename _InIter, typename _TraitsT>
00981 bool
00982 _Compiler<_InIter, _TraitsT>::
00983 _M_bracket_list(_RMatcherT& __matcher)
00984 {
00985 if (_M_follow_list(__matcher))
00986 {
00987 if (_M_match_token(_ScannerT::_S_token_dash))
00988 __matcher._M_add_char(_M_cur_value[0]);
00989 return true;
00990 }
00991 return false;
00992 }
00993
00994 template<typename _InIter, typename _TraitsT>
00995 bool
00996 _Compiler<_InIter, _TraitsT>::
00997 _M_follow_list(_RMatcherT& __matcher)
00998 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
00999
01000 template<typename _InIter, typename _TraitsT>
01001 bool
01002 _Compiler<_InIter, _TraitsT>::
01003 _M_follow_list2(_RMatcherT& __matcher)
01004 {
01005 if (_M_expression_term(__matcher))
01006 return _M_follow_list2(__matcher);
01007 return true;
01008 }
01009
01010 template<typename _InIter, typename _TraitsT>
01011 bool
01012 _Compiler<_InIter, _TraitsT>::
01013 _M_expression_term(_RMatcherT& __matcher)
01014 {
01015 return (_M_collating_symbol(__matcher)
01016 || _M_character_class(__matcher)
01017 || _M_equivalence_class(__matcher)
01018 || (_M_start_range(__matcher)
01019 && _M_range_expression(__matcher)));
01020 }
01021
01022 template<typename _InIter, typename _TraitsT>
01023 bool
01024 _Compiler<_InIter, _TraitsT>::
01025 _M_range_expression(_RMatcherT& __matcher)
01026 {
01027 if (!_M_collating_symbol(__matcher))
01028 if (!_M_match_token(_ScannerT::_S_token_dash))
01029 __throw_regex_error(regex_constants::error_range);
01030 __matcher._M_make_range();
01031 return true;
01032 }
01033
01034 template<typename _InIter, typename _TraitsT>
01035 bool
01036 _Compiler<_InIter, _TraitsT>::
01037 _M_start_range(_RMatcherT& __matcher)
01038 { return _M_match_token(_ScannerT::_S_token_dash); }
01039
01040 template<typename _InIter, typename _TraitsT>
01041 bool
01042 _Compiler<_InIter, _TraitsT>::
01043 _M_collating_symbol(_RMatcherT& __matcher)
01044 {
01045 if (_M_match_token(_ScannerT::_S_token_collelem_single))
01046 {
01047 __matcher._M_add_char(_M_cur_value[0]);
01048 return true;
01049 }
01050 if (_M_match_token(_ScannerT::_S_token_collsymbol))
01051 {
01052 __matcher._M_add_collating_element(_M_cur_value);
01053 return true;
01054 }
01055 return false;
01056 }
01057
01058 template<typename _InIter, typename _TraitsT>
01059 bool
01060 _Compiler<_InIter, _TraitsT>::
01061 _M_equivalence_class(_RMatcherT& __matcher)
01062 {
01063 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01064 {
01065 __matcher._M_add_equivalence_class(_M_cur_value);
01066 return true;
01067 }
01068 return false;
01069 }
01070
01071 template<typename _InIter, typename _TraitsT>
01072 bool
01073 _Compiler<_InIter, _TraitsT>::
01074 _M_character_class(_RMatcherT& __matcher)
01075 {
01076 if (_M_match_token(_ScannerT::_S_token_char_class_name))
01077 {
01078 __matcher._M_add_character_class(_M_cur_value);
01079 return true;
01080 }
01081 return false;
01082 }
01083
01084 template<typename _InIter, typename _TraitsT>
01085 int
01086 _Compiler<_InIter, _TraitsT>::
01087 _M_cur_int_value(int __radix)
01088 {
01089 int __v = 0;
01090 for (typename _StringT::size_type __i = 0;
01091 __i < _M_cur_value.length(); ++__i)
01092 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01093 return __v;
01094 }
01095
01096 template<typename _InIter, typename _TraitsT>
01097 _AutomatonPtr
01098 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01099 regex_constants::syntax_option_type __f)
01100 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01101 __f)._M_nfa())); }
01102
01103 }
01104
01105 _GLIBCXX_END_NAMESPACE_VERSION
01106 }
01107
01108