/************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ #ifndef DOZERG_REGXSTRING_IMPL_H_20091012 #define DOZERG_REGXSTRING_IMPL_H_20091012 #include #include #include #include #include #define _DZ_DEBUG 0 #define _MEM_LEAK 0 //allocator choice #ifndef __GNUC__ # define __DZ_ALLOC std::allocator #else # ifndef NDEBUG # define __DZ_ALLOC std::allocator # else # include # define __DZ_ALLOC __gnu_cxx::__pool_alloc # endif #endif //stl containers redefine //Sequence #define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > > #define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > > #define __DZ_STRING __DZ_BASIC_STRING(char) #define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t) #define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > > #define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > > #define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > > //Associative #define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC > > #define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC > > #define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC > > #define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC > > #define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > > #define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > > #define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > > #define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > > //String Stream #define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > > #define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > > #define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > > #define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > > #define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > > #define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > > #define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char) #define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char) #define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char) #define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t) #define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t) #define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t) //Stream Buf #define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > > #define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > > #define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char) #define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t) //Extension #define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > > #define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > > #define REGXSTRING_NS __DZ_Regx_String #define NAMESAPCE_BEGIN namespace __DZ_Regx_String{ #define NAMESAPCE_END } struct Config; NAMESAPCE_BEGIN typedef std::pair __RefValue; typedef __DZ_VECTOR(__RefValue) __Refs; typedef __DZ_VECTOR(char) __Ends; struct __ParseData{ __Ends ends_; const Config & config_; size_t i_; int ref_; //functions: explicit __ParseData(const Config & config) : config_(config) , i_(0) , ref_(0) {} int inEnds(int ch) const; }; struct __GenerateData { __Refs refs_; __DZ_OSTRINGSTREAM & oss_; explicit __GenerateData(__DZ_OSTRINGSTREAM & oss) : oss_(oss) {} }; struct __NodeBase { static __NodeBase * const REP_NULL; //replace with NULL(0) #if _MEM_LEAK static int ref; __NodeBase(){++ref;} #endif virtual ~__NodeBase(); virtual __NodeBase * Optimize(__ParseData & pdata) = 0; virtual void RandString(__GenerateData & gdata) const = 0; virtual void Debug(std::ostream & out,int lvl) const = 0; virtual int Repeat(int ch); virtual void AppendNode(__NodeBase * node); }; class __Edge : public __NodeBase { bool begin_; public: explicit __Edge(int ch); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; }; class __Text : public __NodeBase { __DZ_STRING str_; public: //functions explicit __Text(int ch); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; __Text & operator +=(const __Text & other){str_ += other.str_;return *this;} }; class __Charset : public __NodeBase { __DZ_STRING str_; size_t inc_; public: //functions __Charset(); __Charset(const __DZ_STRING & str,bool include); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; void Exclude(); void AddChar(int ch); void AddRange(int from,int to); void AddRange(__Charset * node); void Unique(); private: void unite(__Charset & node); void reverse(); void unique(); }; struct __Repeat : public __NodeBase { static const int INFINITE = 1 << 16; private: static const int _REPEAT_MAX = __Repeat::INFINITE - 1; static const int _NON_GREEDY = 1 << 17; static const int _PROSSESSIVE = 1 << 18; static const int _CLEAR_FLAGS = _NON_GREEDY - 1; __NodeBase * node_; int min_,max_; public: //functions __Repeat(__NodeBase * node,int ch); __Repeat(__NodeBase * node,int min,int max); ~__Repeat(); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; int Repeat(int ch); private: bool isInfinite() const{return (max_ & INFINITE) != 0;} bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;} bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;} bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));} }; class __Seq : public __NodeBase { typedef __DZ_VECTOR(__NodeBase *) __Con; __Con seq_; public: //functions explicit __Seq(__NodeBase * node); ~__Seq(); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; void AppendNode(__NodeBase * node); }; class __Group : public __NodeBase { static const int INDEX = 1 << 16; //group index flag static const size_t MAX_GROUPS = 9; __NodeBase * node_; size_t mark_; public: //functions __Group(__NodeBase * node,int mark); ~__Group(); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; }; class __Select : public __NodeBase { typedef __DZ_VECTOR(__NodeBase *) __Con; __Con sel_; size_t sz_; public: //functions explicit __Select(__NodeBase * node); ~__Select(); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; void AppendNode(__NodeBase * node); }; class __Ref : public __NodeBase { size_t index_; public: explicit __Ref(int index); __NodeBase * Optimize(__ParseData & pdata); void RandString(__GenerateData & gdata) const; void Debug(std::ostream & out,int lvl) const; }; class __CRegxString { typedef std::pair<__NodeBase *,int> __Ret; public: __CRegxString(); ~__CRegxString(){uninit();} void ParseRegx(const __DZ_STRING & regx,const Config * config); __DZ_STRING Regx() const{return regx_;} const __DZ_STRING & RandString(); const __DZ_STRING & LastString() const{return str_;} void Debug(std::ostream & out) const; private: __CRegxString(const __CRegxString &); __CRegxString & operator =(const __CRegxString &); void uninit(); __Ret processSeq(__ParseData & pdata); __Ret processSlash(bool bNode,__ParseData & pdata); __NodeBase * processSet(__ParseData & pdata); __NodeBase * processGroup(__ParseData & pdata); __Ret processSelect(__NodeBase * node,__ParseData & pdata); __NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata); int processInt(int & result,__ParseData & pdata); bool processRange(int & result,__ParseData & pdata); int ignoreSubexpMarks(__ParseData & pdata); //fields: __DZ_STRING regx_; __DZ_STRING str_; __NodeBase * top_; //regx tree }; NAMESAPCE_END #endif