1 /*************************************************************************
\r
3 * Licensed under the Apache License, Version 2.0 (the "License");
\r
4 * you may not use this file except in compliance with the License.
\r
5 * You may obtain a copy of the License at
\r
7 * http://www.apache.org/licenses/LICENSE-2.0
\r
9 * Unless required by applicable law or agreed to in writing, software
\r
10 * distributed under the License is distributed on an "AS IS" BASIS,
\r
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
12 * See the License for the specific language governing permissions and
\r
13 * limitations under the License.
\r
14 ***************************************************************************/
\r
16 #ifndef DOZERG_REGXSTRING_IMPL_H_20091012
\r
17 #define DOZERG_REGXSTRING_IMPL_H_20091012
\r
31 # define __DZ_ALLOC std::allocator
\r
34 # define __DZ_ALLOC std::allocator
\r
36 # include <ext/pool_allocator.h>
\r
37 # define __DZ_ALLOC __gnu_cxx::__pool_alloc
\r
41 //stl containers redefine
\r
43 #define __DZ_BASIC_STRING(C) std::basic_string< C,std::char_traits< C >,__DZ_ALLOC< C > >
\r
44 #define __DZ_BASIC_STRING1(C,T) std::basic_string< C,T,__DZ_ALLOC< C > >
\r
45 #define __DZ_STRING __DZ_BASIC_STRING(char)
\r
46 #define __DZ_WSTRING __DZ_BASIC_STRING(wchar_t)
\r
47 #define __DZ_DEQUE(T) std::deque< T,__DZ_ALLOC< T > >
\r
48 #define __DZ_LIST(T) std::list< T,__DZ_ALLOC< T > >
\r
49 #define __DZ_VECTOR(T) std::vector< T,__DZ_ALLOC< T > >
\r
51 #define __DZ_MAP(K,V) std::map< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
\r
52 #define __DZ_MAP1(K,V,C) std::map< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
\r
53 #define __DZ_MULTIMAP(K,V) std::multimap< K,V,std::less< K >,__DZ_ALLOC<std::pair< K,V > > >
\r
54 #define __DZ_MULTIMAP1(K,V,C) std::multimap< K,V,C,__DZ_ALLOC<std::pair< K,V > > >
\r
55 #define __DZ_SET(K) std::set< K,std::less< K >,__DZ_ALLOC< K > >
\r
56 #define __DZ_SET1(K,C) std::set< K,C,__DZ_ALLOC< K > >
\r
57 #define __DZ_MULTISET(K) std::multiset< K,std::less< K >,__DZ_ALLOC< K > >
\r
58 #define __DZ_MULTISET1(K,C) std::multiset< K,C,__DZ_ALLOC< K > >
\r
60 #define __DZ_BASIC_ISTRINGSTREAM(C) std::basic_istringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
\r
61 #define __DZ_BASIC_ISTRINGSTREAM1(C,T) std::basic_istringstream< C,T,__DZ_ALLOC< C > >
\r
62 #define __DZ_BASIC_OSTRINGSTREAM(C) std::basic_ostringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
\r
63 #define __DZ_BASIC_OSTRINGSTREAM1(C,T) std::basic_ostringstream< C,T,__DZ_ALLOC< C > >
\r
64 #define __DZ_BASIC_STRINGSTREAM(C) std::basic_stringstream< C,std::char_traits< C >,__DZ_ALLOC< C > >
\r
65 #define __DZ_BASIC_STRINGSTREAM1(C,T) std::basic_stringstream< C,T,__DZ_ALLOC< C > >
\r
66 #define __DZ_ISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(char)
\r
67 #define __DZ_OSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(char)
\r
68 #define __DZ_STRINGSTREAM __DZ_BASIC_STRINGSTREAM(char)
\r
69 #define __DZ_WISTRINGSTREAM __DZ_BASIC_ISTRINGSTREAM(wchar_t)
\r
70 #define __DZ_WOSTRINGSTREAM __DZ_BASIC_OSTRINGSTREAM(wchar_t)
\r
71 #define __DZ_WSTRINGSTREAM __DZ_BASIC_STRINGSTREAM(wchar_t)
\r
73 #define __DZ_BASIC_STRINGBUF(C) std::basic_stringbuf< C,std::char_traits< C >,__DZ_ALLOC< C > >
\r
74 #define __DZ_BASIC_STRINGBUF1(C,T) std::basic_stringbuf< C,T,__DZ_ALLOC< C > >
\r
75 #define __DZ_STRINGBUF __DZ_BASIC_STRINGBUF(char)
\r
76 #define __DZ_WSTRINGBUF __DZ_BASIC_STRINGBUF(wchar_t)
\r
78 #define __DZ_ROPE(T) __gnu_cxx::rope< T,__DZ_ALLOC< T > >
\r
79 #define __DZ_SLIST(T) __gnu_cxx::slist< T,__DZ_ALLOC< T > >
\r
81 #define REGXSTRING_NS __DZ_Regx_String
\r
83 #define NAMESAPCE_BEGIN namespace __DZ_Regx_String{
\r
84 #define NAMESAPCE_END }
\r
90 typedef std::pair<size_t,size_t> __RefValue;
\r
92 typedef __DZ_VECTOR(__RefValue) __Refs;
\r
94 typedef __DZ_VECTOR(char) __Ends;
\r
98 const Config & config_;
\r
102 explicit __ParseData(const Config & config)
\r
107 int inEnds(int ch) const;
\r
110 struct __GenerateData
\r
113 __DZ_OSTRINGSTREAM & oss_;
\r
114 explicit __GenerateData(__DZ_OSTRINGSTREAM & oss)
\r
121 static __NodeBase * const REP_NULL; //replace with NULL(0)
\r
124 __NodeBase(){++ref;}
\r
126 virtual ~__NodeBase();
\r
127 virtual __NodeBase * Optimize(__ParseData & pdata) = 0;
\r
128 virtual void RandString(__GenerateData & gdata) const = 0;
\r
129 virtual void Debug(std::ostream & out,int lvl) const = 0;
\r
130 virtual int Repeat(int ch);
\r
131 virtual void AppendNode(__NodeBase * node);
\r
134 class __Edge : public __NodeBase
\r
138 explicit __Edge(int ch);
\r
139 __NodeBase * Optimize(__ParseData & pdata);
\r
140 void RandString(__GenerateData & gdata) const;
\r
141 void Debug(std::ostream & out,int lvl) const;
\r
144 class __Text : public __NodeBase
\r
149 explicit __Text(int ch);
\r
150 __NodeBase * Optimize(__ParseData & pdata);
\r
151 void RandString(__GenerateData & gdata) const;
\r
152 void Debug(std::ostream & out,int lvl) const;
\r
153 __Text & operator +=(const __Text & other){str_ += other.str_;return *this;}
\r
156 class __Charset : public __NodeBase
\r
163 __Charset(const __DZ_STRING & str,bool include);
\r
164 __NodeBase * Optimize(__ParseData & pdata);
\r
165 void RandString(__GenerateData & gdata) const;
\r
166 void Debug(std::ostream & out,int lvl) const;
\r
168 void AddChar(int ch);
\r
169 void AddRange(int from,int to);
\r
170 void AddRange(__Charset * node);
\r
173 void unite(__Charset & node);
\r
178 struct __Repeat : public __NodeBase
\r
180 static const int INFINITE = 1 << 16;
\r
182 static const int _REPEAT_MAX = __Repeat::INFINITE - 1;
\r
183 static const int _NON_GREEDY = 1 << 17;
\r
184 static const int _PROSSESSIVE = 1 << 18;
\r
185 static const int _CLEAR_FLAGS = _NON_GREEDY - 1;
\r
186 __NodeBase * node_;
\r
190 __Repeat(__NodeBase * node,int ch);
\r
191 __Repeat(__NodeBase * node,int min,int max);
\r
193 __NodeBase * Optimize(__ParseData & pdata);
\r
194 void RandString(__GenerateData & gdata) const;
\r
195 void Debug(std::ostream & out,int lvl) const;
\r
196 int Repeat(int ch);
\r
198 bool isInfinite() const{return (max_ & INFINITE) != 0;}
\r
199 bool isNonGreedy() const{return (min_ & _NON_GREEDY) != 0;}
\r
200 bool isPossessive() const{return (min_ & _PROSSESSIVE) != 0;}
\r
201 bool canRepeat() const{return !(min_ & (_NON_GREEDY | _PROSSESSIVE));}
\r
204 class __Seq : public __NodeBase
\r
206 typedef __DZ_VECTOR(__NodeBase *) __Con;
\r
210 explicit __Seq(__NodeBase * node);
\r
212 __NodeBase * Optimize(__ParseData & pdata);
\r
213 void RandString(__GenerateData & gdata) const;
\r
214 void Debug(std::ostream & out,int lvl) const;
\r
215 void AppendNode(__NodeBase * node);
\r
218 class __Group : public __NodeBase
\r
220 static const int INDEX = 1 << 16; //group index flag
\r
221 static const size_t MAX_GROUPS = 9;
\r
222 __NodeBase * node_;
\r
226 __Group(__NodeBase * node,int mark);
\r
228 __NodeBase * Optimize(__ParseData & pdata);
\r
229 void RandString(__GenerateData & gdata) const;
\r
230 void Debug(std::ostream & out,int lvl) const;
\r
233 class __Select : public __NodeBase
\r
235 typedef __DZ_VECTOR(__NodeBase *) __Con;
\r
240 explicit __Select(__NodeBase * node);
\r
242 __NodeBase * Optimize(__ParseData & pdata);
\r
243 void RandString(__GenerateData & gdata) const;
\r
244 void Debug(std::ostream & out,int lvl) const;
\r
245 void AppendNode(__NodeBase * node);
\r
248 class __Ref : public __NodeBase
\r
252 explicit __Ref(int index);
\r
253 __NodeBase * Optimize(__ParseData & pdata);
\r
254 void RandString(__GenerateData & gdata) const;
\r
255 void Debug(std::ostream & out,int lvl) const;
\r
258 class __CRegxString
\r
260 typedef std::pair<__NodeBase *,int> __Ret;
\r
263 ~__CRegxString(){uninit();}
\r
264 void ParseRegx(const __DZ_STRING & regx,const Config * config);
\r
265 __DZ_STRING Regx() const{return regx_;}
\r
266 const __DZ_STRING & RandString();
\r
267 const __DZ_STRING & LastString() const{return str_;}
\r
268 void Debug(std::ostream & out) const;
\r
270 __CRegxString(const __CRegxString &);
\r
271 __CRegxString & operator =(const __CRegxString &);
\r
273 __Ret processSeq(__ParseData & pdata);
\r
274 __Ret processSlash(bool bNode,__ParseData & pdata);
\r
275 __NodeBase * processSet(__ParseData & pdata);
\r
276 __NodeBase * processGroup(__ParseData & pdata);
\r
277 __Ret processSelect(__NodeBase * node,__ParseData & pdata);
\r
278 __NodeBase * processRepeat(__NodeBase * node,__ParseData & pdata);
\r
279 int processInt(int & result,__ParseData & pdata);
\r
280 bool processRange(int & result,__ParseData & pdata);
\r
281 int ignoreSubexpMarks(__ParseData & pdata);
\r
285 __NodeBase * top_; //regx tree
\r