--- /dev/null
+/*************************************************************************\r
+*\r
+* Licensed under the Apache License, Version 2.0 (the "License");\r
+* you may not use this file except in compliance with the License.\r
+* You may obtain a copy of the License at\r
+*\r
+* http://www.apache.org/licenses/LICENSE-2.0\r
+*\r
+* Unless required by applicable law or agreed to in writing, software\r
+* distributed under the License is distributed on an "AS IS" BASIS,\r
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+* See the License for the specific language governing permissions and\r
+* limitations under the License.\r
+***************************************************************************/\r
+\r
+#ifndef DOZERG_REGXSTRING_IMPL_H_20091012\r
+#define DOZERG_REGXSTRING_IMPL_H_20091012\r
+\r
+#include <string>\r
+#include <vector>\r
+#include <iosfwd>\r
+#include <utility>\r
+#include <memory>\r
+\r
+#define _DZ_DEBUG 0\r
+\r
+#define _MEM_LEAK 0\r
+\r
+//allocator choice\r
+#ifndef __GNUC__ \r
+# define __DZ_ALLOC std::allocator\r
+#else\r
+# ifndef NDEBUG\r
+# define __DZ_ALLOC std::allocator\r
+# else\r
+# include <ext/pool_allocator.h>\r
+# define __DZ_ALLOC __gnu_cxx::__pool_alloc\r
+# endif\r
+#endif\r
+\r
+//stl containers redefine\r
+ //Sequence\r
+#define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > >\r
+#define __DZ_STRING __DZ_BASIC_STRING(char)\r
+#define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t)\r
+#define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > >\r
+#define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > >\r
+#define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > >\r
+ //Associative\r
+#define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >\r
+#define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC<std::pair< K,V > > >\r
+#define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >\r
+#define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC<std::pair< K,V > > >\r
+#define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > >\r
+#define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > >\r
+#define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > >\r
+#define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > >\r
+ //String Stream\r
+#define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > >\r
+#define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char)\r
+#define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char)\r
+#define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char)\r
+#define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t)\r
+#define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t)\r
+#define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t)\r
+ //Stream Buf\r
+#define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > >\r
+#define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > >\r
+#define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char)\r
+#define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t)\r
+ //Extension\r
+#define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > >\r
+#define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > >\r
+\r
+#define REGXSTRING_NS __DZ_Regx_String\r
+\r
+#define NAMESAPCE_BEGIN namespace __DZ_Regx_String{\r
+#define NAMESAPCE_END }\r
+\r
+struct Config;\r
+\r
+NAMESAPCE_BEGIN\r
+\r
+typedef std::pair<size_t,size_t> __RefValue;\r
+\r
+typedef __DZ_VECTOR(__RefValue) __Refs;\r
+\r
+typedef __DZ_VECTOR(char) __Ends;\r
+\r
+struct __ParseData{\r
+ __Ends ends_;\r
+ const Config & config_;\r
+ size_t i_;\r
+ int ref_;\r
+ //functions:\r
+ explicit __ParseData(const Config & config)\r
+ : config_(config)\r
+ , i_(0)\r
+ , ref_(0)\r
+ {}\r
+ int inEnds(int ch) const;\r
+};\r
+\r
+struct __GenerateData\r
+{\r
+ __Refs refs_;\r
+ __DZ_OSTRINGSTREAM & oss_;\r
+ explicit __GenerateData(__DZ_OSTRINGSTREAM & oss)\r
+ : oss_(oss)\r
+ {}\r
+};\r
+\r
+struct __NodeBase\r
+{\r
+ static __NodeBase * const REP_NULL; //replace with NULL(0)\r
+#if _MEM_LEAK\r
+ static int ref;\r
+ __NodeBase(){++ref;}\r
+#endif\r
+ virtual ~__NodeBase();\r
+ virtual __NodeBase * Optimize(__ParseData & pdata) = 0;\r
+ virtual void RandString(__GenerateData & gdata) const = 0;\r
+ virtual void Debug(std::ostream & out,int lvl) const = 0;\r
+ virtual int Repeat(int ch);\r
+ virtual void AppendNode(__NodeBase * node);\r
+};\r
+\r
+class __Edge : public __NodeBase\r
+{\r
+ bool begin_;\r
+public:\r
+ explicit __Edge(int ch);\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+};\r
+\r
+class __Text : public __NodeBase\r
+{\r
+ __DZ_STRING str_;\r
+public:\r
+ //functions\r
+ explicit __Text(int ch);\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+ __Text & operator +=(const __Text & other){str_ += other.str_;return *this;}\r
+};\r
+\r
+class __Charset : public __NodeBase\r
+{\r
+ __DZ_STRING str_;\r
+ size_t inc_;\r
+public:\r
+ //functions\r
+ __Charset();\r
+ __Charset(const __DZ_STRING & str,bool include);\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+ void Exclude();\r
+ void AddChar(int ch);\r
+ void AddRange(int from,int to);\r
+ void AddRange(__Charset * node);\r
+ void Unique();\r
+private:\r
+ void unite(__Charset & node);\r
+ void reverse();\r
+ void unique();\r
+};\r
+\r
+struct __Repeat : public __NodeBase\r
+{\r
+ static const int INFINITE = 1 << 16;\r
+private:\r
+ static const int _REPEAT_MAX = __Repeat::INFINITE - 1;\r
+ static const int _NON_GREEDY = 1 << 17;\r
+ static const int _PROSSESSIVE = 1 << 18;\r
+ static const int _CLEAR_FLAGS = _NON_GREEDY - 1;\r
+ __NodeBase * node_;\r
+ int min_,max_;\r
+public:\r
+ //functions\r
+ __Repeat(__NodeBase * node,int ch);\r
+ __Repeat(__NodeBase * node,int min,int max);\r
+ ~__Repeat();\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+ int Repeat(int ch);\r
+private:\r
+ bool isInfinite() const{return (max_ & INFINITE) != 0;}\r
+ bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;}\r
+ bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;}\r
+ bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));}\r
+};\r
+\r
+class __Seq : public __NodeBase\r
+{\r
+ typedef __DZ_VECTOR(__NodeBase *) __Con;\r
+ __Con seq_;\r
+public:\r
+ //functions\r
+ explicit __Seq(__NodeBase * node);\r
+ ~__Seq();\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+ void AppendNode(__NodeBase * node);\r
+};\r
+\r
+class __Group : public __NodeBase\r
+{\r
+ static const int INDEX = 1 << 16; //group index flag\r
+ static const size_t MAX_GROUPS = 9;\r
+ __NodeBase * node_;\r
+ size_t mark_;\r
+public:\r
+ //functions\r
+ __Group(__NodeBase * node,int mark);\r
+ ~__Group();\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+};\r
+\r
+class __Select : public __NodeBase\r
+{\r
+ typedef __DZ_VECTOR(__NodeBase *) __Con;\r
+ __Con sel_;\r
+ size_t sz_;\r
+public:\r
+ //functions\r
+ explicit __Select(__NodeBase * node);\r
+ ~__Select();\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+ void AppendNode(__NodeBase * node);\r
+};\r
+\r
+class __Ref : public __NodeBase\r
+{\r
+ size_t index_;\r
+public:\r
+ explicit __Ref(int index);\r
+ __NodeBase * Optimize(__ParseData & pdata);\r
+ void RandString(__GenerateData & gdata) const;\r
+ void Debug(std::ostream & out,int lvl) const;\r
+};\r
+\r
+class __CRegxString\r
+{\r
+ typedef std::pair<__NodeBase *,int> __Ret;\r
+public:\r
+ __CRegxString();\r
+ ~__CRegxString(){uninit();}\r
+ void ParseRegx(const __DZ_STRING & regx,const Config * config);\r
+ __DZ_STRING Regx() const{return regx_;}\r
+ const __DZ_STRING & RandString();\r
+ const __DZ_STRING & LastString() const{return str_;}\r
+ void Debug(std::ostream & out) const;\r
+private:\r
+ __CRegxString(const __CRegxString &);\r
+ __CRegxString & operator =(const __CRegxString &);\r
+ void uninit();\r
+ __Ret processSeq(__ParseData & pdata);\r
+ __Ret processSlash(bool bNode,__ParseData & pdata);\r
+ __NodeBase * processSet(__ParseData & pdata);\r
+ __NodeBase * processGroup(__ParseData & pdata);\r
+ __Ret processSelect(__NodeBase * node,__ParseData & pdata);\r
+ __NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata);\r
+ int processInt(int & result,__ParseData & pdata);\r
+ bool processRange(int & result,__ParseData & pdata);\r
+ int ignoreSubexpMarks(__ParseData & pdata);\r
+ //fields:\r
+ __DZ_STRING regx_;\r
+ __DZ_STRING str_;\r
+ __NodeBase * top_; //regx tree\r
+};\r
+\r
+NAMESAPCE_END\r
+\r
+#endif\r