--- /dev/null
+/*************************************************************************\r
+*\r
+* Licensed under the Apache License, Version 2.0 (the "License");\r
+* you may not use this file except in compliance with the License.\r
+* You may obtain a copy of the License at\r
+*\r
+* http://www.apache.org/licenses/LICENSE-2.0\r
+*\r
+* Unless required by applicable law or agreed to in writing, software\r
+* distributed under the License is distributed on an "AS IS" BASIS,\r
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+* See the License for the specific language governing permissions and\r
+* limitations under the License.\r
+***************************************************************************/\r
+\r
+#include <algorithm>\r
+#include <sstream>\r
+#include <ctime>\r
+#include <cassert>\r
+#include "regxstring.h"\r
+#include "regxstring_impl.h"\r
+\r
+#if _DZ_DEBUG\r
+# include <iostream>\r
+\r
+static void printRefs(__DZ_OSTRINGSTREAM & oss,const REGXSTRING_NS::__Refs & refs)\r
+{\r
+ for(REGXSTRING_NS::__Refs::const_iterator i = refs.begin();i != refs.end();++i)\r
+ std::cout<<"\t"<<oss.str().substr(i->first,i->second);\r
+}\r
+\r
+# define _OSS_OUT(msg) { \\r
+ std::cout<<msg<<" : "<<gdata.oss_.str(); \\r
+ printRefs(gdata.oss_,gdata.refs_); \\r
+ std::cout<<std::endl;}\r
+#else\r
+# define _OSS_OUT(str)\r
+#endif\r
+\r
+NAMESAPCE_BEGIN\r
+\r
+// Replacements for new and delete\r
+template<class T>\r
+T * New(){\r
+ T * ret = __DZ_ALLOC<T>().allocate(1);\r
+ return new (ret) T;\r
+}\r
+\r
+template<class T,class A>\r
+T * New(const A & a){\r
+ T * ret = __DZ_ALLOC<T>().allocate(1);\r
+ return new (ret) T(a);\r
+}\r
+\r
+template<class T,class A,class B>\r
+T * New(const A & a,const B & b){\r
+ T * ret = __DZ_ALLOC<T>().allocate(1);\r
+ return new (ret) T(a,b);\r
+}\r
+\r
+template<class T,class A,class B,class C>\r
+T * New(const A & a,const B & b,const C & c){\r
+ T * ret = __DZ_ALLOC<T>().allocate(1);\r
+ return new (ret) T(a,b,c);\r
+}\r
+template<class T>\r
+void Delete(T * p){\r
+ typedef char __dummy[sizeof(T)];\r
+ if(p){\r
+ p->~T();\r
+ __DZ_ALLOC<T>().deallocate(p,1);\r
+ }\r
+}\r
+\r
+struct __IsNull\r
+{\r
+ bool operator ()(__NodeBase * n) const{\r
+ return !n;\r
+ }\r
+};\r
+\r
+static const char * const SEP = " ";\r
+\r
+static __DZ_STRING sep(int lvl)\r
+{\r
+ __DZ_STRING ret;\r
+ while(lvl-- > 0)\r
+ ret += SEP;\r
+ return ret;\r
+}\r
+\r
+static void appendNode(__NodeBase *& parent,__NodeBase * node)\r
+{\r
+ if(!node)\r
+ return;\r
+ if(!parent)\r
+ parent = New<__Seq>(node);\r
+ else\r
+ parent->AppendNode(node);\r
+}\r
+\r
+namespace Tools{\r
+\r
+ inline bool IsRepeat(int ch){\r
+ return ch == '?' || ch == '+' || ch == '*';\r
+ }\r
+\r
+ inline bool IsBegin(int ch){\r
+ return ch == '^';\r
+ }\r
+\r
+ inline bool IsEnd(int ch){\r
+ return ch == '$';\r
+ }\r
+\r
+ inline bool IsSlash(int ch){\r
+ return ch == '\\';\r
+ }\r
+\r
+ inline bool IsSetBegin(int ch){\r
+ return ch == '[';\r
+ }\r
+\r
+ inline bool IsSetEnd(int ch){\r
+ return ch == ']';\r
+ }\r
+\r
+ inline bool IsGroupBegin(int ch){\r
+ return ch == '(';\r
+ }\r
+\r
+ inline bool IsGroupEnd(int ch){\r
+ return ch == ')';\r
+ }\r
+\r
+ inline bool IsSelect(int ch){\r
+ return ch == '|';\r
+ }\r
+\r
+ inline bool IsRepeatBegin(int ch){\r
+ return ch == '{';\r
+ }\r
+\r
+ inline bool IsRepeatEnd(int ch){\r
+ return ch == '}';\r
+ }\r
+\r
+ inline bool NeedEnd(int ch){\r
+ return IsGroupEnd(ch) || IsRepeatEnd(ch);\r
+ }\r
+\r
+ inline bool IsDigit(int ch){\r
+ return '0' <= ch && ch <= '9';\r
+ }\r
+\r
+ inline int TransDigit(int ch){\r
+ return ch - '0';\r
+ }\r
+\r
+ inline bool IsDash(int ch){\r
+ return ch == '-';\r
+ }\r
+\r
+ inline bool IsAny(int ch){\r
+ return ch == '.';\r
+ }\r
+\r
+ inline int IsSubexpMark(int ch){\r
+ return (ch == ':' || ch == '=' || ch == '!' || ch == '>' ? ch : 0);\r
+ }\r
+\r
+ inline int IsSubexpMark(const char * s){\r
+ return (*s == '?' ? IsSubexpMark(*(s + 1)) : 0);\r
+ }\r
+\r
+ inline char TransSlash(int ch){\r
+ switch(ch){\r
+ case 'f':return '\f';\r
+ case 'n':return '\n';\r
+ case 'r':return '\r';\r
+ case 't':return '\t';\r
+ case 'v':return '\v';\r
+ }\r
+ return ch;\r
+ }\r
+}\r
+\r
+//struct __ParseData\r
+int __ParseData::inEnds(int ch) const\r
+{\r
+ int ret = 1;\r
+ for(__Ends::const_reverse_iterator i = ends_.rbegin();i != ends_.rend();++i,++ret){\r
+ if(ch == *i)\r
+ return ret;\r
+ if(Tools::NeedEnd(*i))\r
+ break;\r
+ }\r
+ return 0;\r
+}\r
+\r
+//struct __NodeBase\r
+__NodeBase * const __NodeBase::REP_NULL = (__NodeBase *)1;\r
+\r
+#if _MEM_LEAK\r
+int __NodeBase::ref = 0;\r
+#endif\r
+\r
+__NodeBase::~__NodeBase()\r
+{\r
+#if _MEM_LEAK\r
+ --ref;\r
+#endif\r
+}\r
+\r
+int __NodeBase::Repeat(int ch)\r
+{\r
+ return 1;\r
+}\r
+\r
+void __NodeBase::AppendNode(__NodeBase * node)\r
+{\r
+ assert(0);\r
+}\r
+\r
+//struct __Edge\r
+__Edge::__Edge(int ch)\r
+ : begin_(ch == '^')\r
+{}\r
+\r
+__NodeBase * __Edge::Optimize(__ParseData & pdata)\r
+{\r
+ return REP_NULL;\r
+}\r
+\r
+void __Edge::RandString(__GenerateData & gdata) const\r
+{\r
+ _OSS_OUT("__Edge");\r
+}\r
+\r
+void __Edge::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<(begin_ ? "BEGIN" : "END")<<"\n";\r
+}\r
+\r
+//struct __Text\r
+__Text::__Text(int ch)\r
+ : str_(1,ch)\r
+{}\r
+\r
+__NodeBase * __Text::Optimize(__ParseData & pdata)\r
+{\r
+ return (str_.empty() ? REP_NULL : 0);\r
+}\r
+\r
+void __Text::RandString(__GenerateData & gdata) const\r
+{\r
+ gdata.oss_<<str_;\r
+ _OSS_OUT("__Text");\r
+}\r
+\r
+void __Text::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Text("<<str_<<")\n";\r
+}\r
+\r
+//class __Charset\r
+__Charset::__Charset()\r
+ : inc_(1)\r
+{}\r
+\r
+__Charset::__Charset(const __DZ_STRING & str,bool include)\r
+ : str_(str)\r
+ , inc_(include)\r
+{}\r
+\r
+__NodeBase * __Charset::Optimize(__ParseData & pdata)\r
+{\r
+ if(!inc_)\r
+ reverse();\r
+ if(str_.empty())\r
+ return REP_NULL;\r
+ inc_ = str_.size();\r
+ return 0;\r
+}\r
+\r
+void __Charset::RandString(__GenerateData & gdata) const\r
+{\r
+ assert(inc_ == str_.size());\r
+ gdata.oss_<<str_[rand() % inc_];\r
+ _OSS_OUT("__Charset");\r
+}\r
+\r
+void __Charset::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Charset(INCLUDE"\r
+ <<", "<<str_<<")\n";\r
+}\r
+\r
+void __Charset::Exclude()\r
+{\r
+ inc_ = 0;\r
+}\r
+\r
+void __Charset::AddChar(int ch)\r
+{\r
+ str_.push_back(ch);\r
+}\r
+\r
+void __Charset::AddRange(int from,int to)\r
+{\r
+ for(;from <= to;++from)\r
+ str_.push_back(from);\r
+}\r
+\r
+void __Charset::AddRange(__Charset * node)\r
+{\r
+ if(!node)\r
+ return;\r
+ unite(*node);\r
+}\r
+\r
+void __Charset::Unique()\r
+{\r
+ inc_ ? unique() : reverse();\r
+}\r
+\r
+void __Charset::unite(__Charset & node)\r
+{\r
+ if(!node.inc_)\r
+ node.reverse();\r
+ str_ += node.str_;\r
+}\r
+\r
+void __Charset::reverse()\r
+{\r
+ const int _CHAR_MIN = 32;\r
+ const int _CHAR_MAX = 126;\r
+ unique();\r
+ __DZ_STRING s;\r
+ s.swap(str_);\r
+ int c = _CHAR_MIN;\r
+ size_t i = 0,e = s.size();\r
+ for(;c <= _CHAR_MAX && i < e;++i){\r
+ int ch = s[i];\r
+ if(c < ch)\r
+ AddRange(c,ch - 1);\r
+ c = std::max(ch + 1,_CHAR_MIN);\r
+ }\r
+ if(c <= _CHAR_MAX)\r
+ AddRange(c,_CHAR_MAX);\r
+ inc_ = !inc_;\r
+}\r
+\r
+void __Charset::unique()\r
+{\r
+ if(!str_.empty()){\r
+ std::sort(str_.begin(),str_.end());\r
+ str_.erase(std::unique(str_.begin(),str_.end()),str_.end());\r
+ }\r
+}\r
+\r
+//class __Repeat\r
+__Repeat::__Repeat(__NodeBase * node,int ch)\r
+ : node_(node)\r
+ , min_(0)\r
+ , max_(0)\r
+{\r
+ switch(ch){\r
+ case '?':min_ = 0;max_ = 1;break;\r
+ case '+':min_ = 1;max_ = INFINITE;break;\r
+ case '*':min_ = 0;max_ = INFINITE;break;\r
+ default:;\r
+ }\r
+}\r
+\r
+__Repeat::__Repeat(__NodeBase * node,int min,int max)\r
+ : node_(node)\r
+ , min_(min)\r
+ , max_(max)\r
+{}\r
+\r
+__Repeat::~__Repeat(){\r
+ Delete(node_);\r
+}\r
+\r
+__NodeBase * __Repeat::Optimize(__ParseData & pdata)\r
+{\r
+ min_ &= _CLEAR_FLAGS;\r
+ max_ &= _CLEAR_FLAGS;\r
+ if(isInfinite()){\r
+ max_ = min_ + pdata.config_.repeatInfinite;\r
+ if( max_ > _REPEAT_MAX)\r
+ max_ = _REPEAT_MAX;\r
+ }\r
+ if(!node_ || (min_ > max_) || (!min_ && !max_))\r
+ return REP_NULL;\r
+ __NodeBase * r = node_->Optimize(pdata);\r
+ if(r == REP_NULL)\r
+ return REP_NULL;\r
+ else if(r){\r
+ Delete(node_);\r
+ node_ = r;\r
+ }\r
+ if(1 == max_ && 1 == min_){\r
+ r = node_;\r
+ node_ = 0;\r
+ return r;\r
+ }\r
+ max_ -= min_ - 1;\r
+ return 0;\r
+}\r
+\r
+void __Repeat::RandString(__GenerateData & gdata) const\r
+{\r
+ for(int t = min_ + rand() % max_;t > 0;t--)\r
+ node_->RandString(gdata);\r
+ _OSS_OUT("__Repeat");\r
+}\r
+\r
+void __Repeat::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Repeat["<<min_<<", "<<(min_ + max_ - 1)<<"]\n";\r
+ ++lvl;\r
+ if(node_)\r
+ node_->Debug(out,lvl);\r
+ else\r
+ out<<sep(lvl)<<"NULL\n";\r
+}\r
+\r
+int __Repeat::Repeat(int ch)\r
+{\r
+ if(canRepeat()){\r
+ switch(ch){\r
+ case '?':min_ |= _NON_GREEDY;return 2;break;\r
+ case '+':min_ |= _PROSSESSIVE;return 2;break;\r
+ default:;\r
+ }\r
+ }\r
+ return 0;\r
+}\r
+\r
+//class __Seq\r
+__Seq::__Seq(__NodeBase * node)\r
+ : seq_(1,node)\r
+{}\r
+\r
+__Seq::~__Seq(){\r
+ for(__Con::const_iterator i = seq_.begin(),e = seq_.end();i != e;++i)\r
+ Delete(*i);\r
+}\r
+\r
+__NodeBase * __Seq::Optimize(__ParseData & pdata)\r
+{\r
+ if(seq_.empty())\r
+ return REP_NULL;\r
+ for(__Con::iterator i = seq_.begin(),e = seq_.end();i != e;++i)\r
+ if(*i){\r
+ __NodeBase * r = (*i)->Optimize(pdata);\r
+ if(r){\r
+ Delete(*i);\r
+ *i = (r == REP_NULL ? 0 : r);\r
+ }\r
+ }\r
+ seq_.erase(std::remove_if(seq_.begin(),seq_.end(),__IsNull()),seq_.end());\r
+ if(seq_.empty())\r
+ return REP_NULL;\r
+ if(seq_.size() == 1){\r
+ __NodeBase * r = seq_[0];\r
+ seq_.clear();\r
+ return r;\r
+ }\r
+ return 0;\r
+}\r
+\r
+void __Seq::RandString(__GenerateData & gdata) const\r
+{\r
+ for(__Con::const_iterator i = seq_.begin(),e = seq_.end();i != e;++i)\r
+ (*i)->RandString(gdata);\r
+ _OSS_OUT("__Seq");\r
+}\r
+\r
+void __Seq::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Seq("<<seq_.size()<<")\n";\r
+ ++lvl;\r
+ for(__Con::const_iterator i = seq_.begin(),e = seq_.end();i != e;++i){\r
+ if(*i)\r
+ (*i)->Debug(out,lvl);\r
+ else\r
+ out<<sep(lvl)<<"NULL\n";\r
+ }\r
+}\r
+\r
+void __Seq::AppendNode(__NodeBase * node)\r
+{\r
+ if(!seq_.empty())\r
+ if(__Text * cur = dynamic_cast<__Text *>(node))\r
+ if(__Text * prev = dynamic_cast<__Text *>(seq_.back())){\r
+ *prev += *cur;\r
+ Delete(node);\r
+ return;\r
+ }\r
+ seq_.push_back(node);\r
+}\r
+\r
+//class __Group\r
+__Group::__Group(__NodeBase * node,int mark)\r
+ : node_(node)\r
+ , mark_(mark)\r
+{\r
+ if(!Tools::IsSubexpMark(mark_))\r
+ mark_ |= INDEX;\r
+}\r
+\r
+__Group::~__Group()\r
+{\r
+ Delete(node_);\r
+}\r
+\r
+__NodeBase * __Group::Optimize(__ParseData & pdata)\r
+{\r
+ if(!node_ || mark_ == '!')\r
+ return REP_NULL;\r
+ __NodeBase * r = node_->Optimize(pdata);\r
+ if(r == REP_NULL)\r
+ return REP_NULL;\r
+ else if(r){\r
+ Delete(node_);\r
+ node_ = r;\r
+ }\r
+ switch(mark_){\r
+ case ':':\r
+ case '=':\r
+ case '>':{\r
+ r = node_;\r
+ node_ = 0;\r
+ return r;}\r
+ default:;\r
+ }\r
+ mark_ = (mark_ & (INDEX - 1)) - 1;\r
+ return 0;\r
+}\r
+\r
+void __Group::RandString(__GenerateData & gdata) const\r
+{\r
+ assert(node_);\r
+ assert(0 <= mark_ && mark_ < MAX_GROUPS);\r
+ if(mark_ >= gdata.refs_.size())\r
+ gdata.refs_.resize(mark_ + 1);\r
+ gdata.refs_.back() = __RefValue(gdata.oss_.str().size(),__DZ_STRING::npos);\r
+ node_->RandString(gdata);\r
+ assert(mark_ < gdata.refs_.size());\r
+ gdata.refs_[mark_].second = gdata.oss_.str().size() - gdata.refs_[mark_].first;\r
+ _OSS_OUT("__Group");\r
+}\r
+\r
+void __Group::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Group(";\r
+ switch(mark_){\r
+ case ':':out<<"?:";break;\r
+ case '=':out<<"?=";break;\r
+ case '!':out<<"?!";break;\r
+ case '>':out<<"?>";break;\r
+ default:out<<(mark_ & (INDEX - 1));\r
+ }\r
+ out<<")\n";\r
+ ++lvl;\r
+ if(node_)\r
+ node_->Debug(out,lvl);\r
+ else\r
+ out<<sep(lvl)<<"NULL\n";\r
+}\r
+\r
+//class __Select\r
+__Select::__Select(__NodeBase * node)\r
+ : sel_(1,node)\r
+ , sz_(0)\r
+{}\r
+\r
+__Select::~__Select()\r
+{\r
+ for(__Con::const_iterator i = sel_.begin(),e = sel_.end();i != e;++i)\r
+ Delete(*i);\r
+}\r
+\r
+__NodeBase * __Select::Optimize(__ParseData & pdata)\r
+{\r
+ if(sel_.empty())\r
+ return REP_NULL;\r
+ for(__Con::iterator i = sel_.begin(),e = sel_.end();i != e;++i)\r
+ if(*i){\r
+ __NodeBase * r = (*i)->Optimize(pdata);\r
+ if(r){\r
+ Delete(*i);\r
+ *i = (r == REP_NULL ? 0 : r);\r
+ }\r
+ }\r
+ sel_.erase(std::remove_if(sel_.begin(),sel_.end(),__IsNull()),sel_.end());\r
+ if(sel_.empty())\r
+ return REP_NULL;\r
+ if(sel_.size() == 1){\r
+ __NodeBase * r = sel_[0];\r
+ sel_.clear();\r
+ return r;\r
+ }\r
+ sz_ = sel_.size();\r
+ return 0;\r
+}\r
+\r
+void __Select::RandString(__GenerateData & gdata) const\r
+{\r
+ if(sz_)\r
+ sel_[rand() % sz_]->RandString(gdata);\r
+ _OSS_OUT("__Select");\r
+}\r
+\r
+void __Select::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Select("<<sel_.size()<<")\n";\r
+ ++lvl;\r
+ for(__Con::const_iterator i = sel_.begin(),e = sel_.end();i != e;++i)\r
+ if(*i)\r
+ (*i)->Debug(out,lvl);\r
+ else\r
+ out<<sep(lvl)<<"NULL\n";\r
+}\r
+\r
+void __Select::AppendNode(__NodeBase * node)\r
+{\r
+ sel_.push_back(node);\r
+}\r
+\r
+//class __Ref\r
+__Ref::__Ref(int index)\r
+ : index_(index)\r
+{}\r
+\r
+__NodeBase * __Ref::Optimize(__ParseData & pdata)\r
+{\r
+ --index_;\r
+ return 0;\r
+}\r
+\r
+void __Ref::RandString(__GenerateData & gdata) const\r
+{\r
+ assert(index_ < gdata.refs_.size());\r
+ const __RefValue & ref = gdata.refs_[index_];\r
+ __DZ_STRING str = gdata.oss_.str();\r
+ if(ref.first < str.size())\r
+ gdata.oss_<<str.substr(ref.first,ref.second);\r
+ _OSS_OUT("__Ref("<<index_<<")");\r
+}\r
+\r
+void __Ref::Debug(std::ostream & out,int lvl) const\r
+{\r
+ out<<sep(lvl)<<"Ref("<<index_<<")\n";\r
+}\r
+\r
+//class __CRegxString\r
+__CRegxString::__CRegxString()\r
+ : top_(0)\r
+{}\r
+\r
+void __CRegxString::ParseRegx(const __DZ_STRING & regx,const Config * config)\r
+{\r
+ uninit();\r
+ regx_ = regx;\r
+ if(regx_.empty())\r
+ return;\r
+ Config def;\r
+ __ParseData pdata(config ? *config : def);\r
+ top_ = processSeq(pdata).first;\r
+ if(!top_)\r
+ return;\r
+ __NodeBase * r = top_->Optimize(pdata);\r
+ if(r){\r
+ Delete(top_);\r
+ top_ = (r == __NodeBase::REP_NULL ? 0 : r);\r
+ }\r
+ // if(top_)\r
+ // srand((unsigned int)time(0));\r
+ // Changed by Adrian Lita\r
+}\r
+\r
+const __DZ_STRING & __CRegxString::RandString()\r
+{\r
+ str_.clear();\r
+ if(top_){\r
+ __DZ_OSTRINGSTREAM oss;\r
+ __GenerateData gdata(oss);\r
+ top_->RandString(gdata);\r
+ str_ = oss.str();\r
+ }\r
+ return str_;\r
+}\r
+\r
+void __CRegxString::Debug(std::ostream & out) const{\r
+ out<<"regx_ : "<<regx_<<"\nstructure :\n";\r
+ if(top_)\r
+ top_->Debug(out,0);\r
+ else\r
+ out<<"NULL\n";\r
+}\r
+\r
+void __CRegxString::uninit()\r
+{\r
+ if(top_){\r
+ Delete(top_);\r
+ top_ = 0;\r
+ }\r
+ str_.clear();\r
+}\r
+\r
+__CRegxString::__Ret __CRegxString::processSeq(__ParseData & pdata)\r
+{\r
+ __Ret ret;\r
+ __NodeBase * cur = 0;\r
+ bool begin = true;\r
+ for(const size_t e = regx_.length();pdata.i_ < e;++pdata.i_){\r
+ int ch = regx_[pdata.i_];\r
+ if(begin){\r
+ if(Tools::IsBegin(ch)){\r
+ cur = New<__Edge>(ch);\r
+ continue;\r
+ }\r
+ begin = false;\r
+ }\r
+ if(Tools::IsRepeat(ch) && cur){\r
+ int r = cur->Repeat(ch);\r
+ if(r){\r
+ if(r == 1)\r
+ cur = New<__Repeat>(cur,ch);\r
+ continue;\r
+ }\r
+ }\r
+ if(Tools::IsRepeatBegin(ch)){\r
+ cur = processRepeat(cur,pdata);\r
+ continue;\r
+ }\r
+ appendNode(ret.first,cur);\r
+ ret.second = pdata.inEnds(ch);\r
+ if(ret.second)\r
+ return ret;\r
+ if(Tools::IsSelect(ch))\r
+ return processSelect(ret.first,pdata);\r
+ if(Tools::IsEnd(ch))\r
+ cur = New<__Edge>(ch);\r
+ else if(Tools::IsAny(ch)){\r
+ __Charset * set = New<__Charset>("\n",false);\r
+ set->Unique();\r
+ cur = set;\r
+ }else if(Tools::IsSetBegin(ch))\r
+ cur = processSet(pdata);\r
+ else if(Tools::IsGroupBegin(ch))\r
+ cur = processGroup(pdata);\r
+ else if(Tools::IsSlash(ch))\r
+ cur = processSlash(true,pdata).first;\r
+ else\r
+ cur = New<__Text>(ch);\r
+ }\r
+ appendNode(ret.first,cur);\r
+ return ret;\r
+}\r
+\r
+__CRegxString::__Ret __CRegxString::processSlash(bool bNode,__ParseData & pdata)\r
+{\r
+ ++pdata.i_;\r
+ __Ret ret((__NodeBase *)0,pdata.i_ < regx_.length() ? Tools::TransSlash(regx_[pdata.i_]) : '\\');\r
+ __Charset * set = 0;\r
+ switch(ret.second){\r
+ case 'd':set = New<__Charset>("0123456789",true);break;\r
+ case 'D':set = New<__Charset>("0123456789",false);break;\r
+ case 's':set = New<__Charset>(/*"\f\n\r\v"*/"\t ",true);break;\r
+ case 'S':set = New<__Charset>(/*"\f\n\r\v"*/"\t ",false);break;\r
+ case 'w':{ //A-Za-z0-9_\r
+ set = New<__Charset>();\r
+ set->AddRange('A','Z');\r
+ set->AddRange('a','z');\r
+ set->AddRange('0','9');\r
+ set->AddChar('_');\r
+ break;}\r
+ case 'W':{ //^A-Za-z0-9_\r
+ set = New<__Charset>();\r
+ set->AddRange('A','Z');\r
+ set->AddRange('a','z');\r
+ set->AddRange('0','9');\r
+ set->AddChar('_');\r
+ set->Exclude();\r
+ break;}\r
+ default:;\r
+ }\r
+ if(set){\r
+ set->Unique();\r
+ ret.first = set;\r
+ }else if(bNode){\r
+ if(Tools::IsDigit(ret.second)){\r
+ int i = ret.second - '0';\r
+ if(!i)\r
+ ret.second = 0;\r
+ else if(i <= pdata.ref_)\r
+ ret.first = New<__Ref>(i);\r
+ }\r
+ if(!ret.first)\r
+ ret.first = New<__Text>(ret.second);\r
+ }\r
+ return ret;\r
+}\r
+\r
+__NodeBase * __CRegxString::processSet(__ParseData & pdata)\r
+{\r
+ size_t bak = pdata.i_++;\r
+ __Charset * ret = New<__Charset>();\r
+ bool begin = true;\r
+ int prev = 0;\r
+ for(const size_t e = regx_.length();pdata.i_ < e;++pdata.i_){\r
+ int ch = regx_[pdata.i_];\r
+ if(begin && Tools::IsBegin(ch)){\r
+ ret->Exclude();\r
+ begin = false;\r
+ continue;\r
+ }\r
+ if(Tools::IsDash(ch) && prev){\r
+ int to = 0;\r
+ if(processRange(to,pdata)){\r
+ ret->AddRange(prev,to);\r
+ prev = 0;\r
+ continue;\r
+ }\r
+ }\r
+ if(prev)\r
+ ret->AddChar(prev);\r
+ if(Tools::IsSetEnd(ch)){\r
+ ret->Unique();\r
+ return ret;\r
+ }\r
+ if(Tools::IsSlash(ch)){\r
+ __Ret s = processSlash(false,pdata);\r
+ if(s.first){ //charset\r
+ ret->AddRange(dynamic_cast<__Charset *>(s.first));\r
+ Delete(s.first);\r
+ prev = 0;\r
+ continue;\r
+ }\r
+ ch = s.second;\r
+ }\r
+ prev = ch;\r
+ }\r
+ Delete(ret);\r
+ pdata.i_ = bak;\r
+ return New<__Text>('[');\r
+}\r
+\r
+__NodeBase * __CRegxString::processGroup(__ParseData & pdata)\r
+{\r
+ int bak = pdata.i_++;\r
+ int mark = ignoreSubexpMarks(pdata);\r
+ pdata.ends_.push_back(')');\r
+ if(!mark)\r
+ mark = ++pdata.ref_;\r
+ __Ret ret = processSeq(pdata);\r
+ pdata.ends_.pop_back();\r
+ if(ret.second)\r
+ return New<__Group>(ret.first,mark);\r
+ Delete(ret.first);\r
+ pdata.i_ = bak;\r
+ return New<__Text>('(');\r
+}\r
+\r
+__CRegxString::__Ret __CRegxString::processSelect(__NodeBase * node,__ParseData & pdata)\r
+{\r
+ __Ret ret(New<__Select>(node),0);\r
+ pdata.ends_.push_back('|');\r
+ for(const size_t e = regx_.length();pdata.i_ < e;){\r
+ ++pdata.i_;\r
+ __Ret r = processSeq(pdata);\r
+ ret.first->AppendNode(r.first);\r
+ if(r.second > 1){\r
+ ret.second = r.second - 1;\r
+ break;\r
+ }\r
+ }\r
+ pdata.ends_.pop_back();\r
+ return ret;\r
+}\r
+\r
+__NodeBase * __CRegxString::processRepeat(__NodeBase * node,__ParseData & pdata)\r
+{\r
+ if(node && node->Repeat(0)){\r
+ size_t bak = pdata.i_++;\r
+ int min = 0,max = __Repeat::INFINITE;\r
+ switch(processInt(min,pdata)){\r
+ case ',':\r
+ ++pdata.i_;\r
+ if(processInt(max,pdata) == '}')\r
+ return New<__Repeat>(node,min,(min < max ? max : min));\r
+ break;\r
+ case '}':\r
+ return New<__Repeat>(node,min,min);\r
+ default:;\r
+ }\r
+ pdata.i_ = bak;\r
+ }\r
+ return New<__Text>('{');\r
+}\r
+\r
+int __CRegxString::processInt(int & result,__ParseData & pdata)\r
+{\r
+ bool begin = true;\r
+ for(const size_t e = regx_.length();pdata.i_ < e;++pdata.i_){\r
+ int ch = regx_[pdata.i_];\r
+ if(Tools::IsDigit(ch)){\r
+ ch = Tools::TransDigit(ch);\r
+ if(begin){\r
+ result = ch;\r
+ begin = false;\r
+ }else{\r
+ result *= 10;\r
+ if(result < 0)\r
+ return 0;\r
+ result += ch;\r
+ }\r
+ }else\r
+ return ch;\r
+ }\r
+ return 0;\r
+}\r
+\r
+bool __CRegxString::processRange(int & result,__ParseData & pdata)\r
+{\r
+ if(++pdata.i_ < regx_.size() && regx_[pdata.i_] != ']'){\r
+ result = regx_[pdata.i_];\r
+ return true;\r
+ }\r
+ --pdata.i_;\r
+ return false;\r
+}\r
+\r
+int __CRegxString::ignoreSubexpMarks(__ParseData & pdata)\r
+{\r
+ int ret = 0;\r
+ if(pdata.i_ + 1 < regx_.size()){\r
+ ret = Tools::IsSubexpMark(®x_[pdata.i_]);\r
+ if(ret)\r
+ pdata.i_ += 2;\r
+ }\r
+ return ret;\r
+}\r
+\r
+NAMESAPCE_END\r