2 * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
\r
3 * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
\r
4 * All rights reserved.
\r
5 * Redistribution and modifications are permitted subject to BSD license.
\r
7 #include <asn_system.h>
\r
8 #include <xer_support.h>
\r
16 ST_TAG_QUOTED_STRING,
\r
17 ST_TAG_UNQUOTED_STRING,
\r
18 ST_COMMENT_WAIT_DASH1, /* "<!--"[1] */
\r
19 ST_COMMENT_WAIT_DASH2, /* "<!--"[2] */
\r
21 ST_COMMENT_CLO_DASH2, /* "-->"[0] */
\r
22 ST_COMMENT_CLO_RT /* "-->"[1] */
\r
27 0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
\r
28 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
\r
29 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
\r
30 2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */
\r
31 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */
\r
32 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */
\r
33 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */
\r
34 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */
\r
36 #define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)
\r
37 #define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)
\r
38 #define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)
\r
40 /* Aliases for characters, ASCII/UTF-8 */
\r
41 #define EXCLAM 0x21 /* '!' */
\r
42 #define CQUOTE 0x22 /* '"' */
\r
43 #define CDASH 0x2d /* '-' */
\r
44 #define CSLASH 0x2f /* '/' */
\r
45 #define LANGLE 0x3c /* '<' */
\r
46 #define CEQUAL 0x3d /* '=' */
\r
47 #define RANGLE 0x3e /* '>' */
\r
48 #define CQUEST 0x3f /* '?' */
\r
50 /* Invoke token callback */
\r
51 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \
\r
53 pstate_e ns = _ns; \
\r
54 ssize_t _sz = (p - chunk_start) + _current_too; \
\r
60 _ret = cb(type, chunk_start, _sz, key); \
\r
62 if(_current_too && _ret == -1) \
\r
66 chunk_start = p + _current_too; \
\r
70 #define TOKEN_CB(_type, _ns, _current_too) \
\r
71 TOKEN_CB_CALL(_type, _ns, _current_too, 0)
\r
73 #define PXML_TAG_FINAL_CHUNK_TYPE PXML_TAG_END
\r
74 #define PXML_COMMENT_FINAL_CHUNK_TYPE PXML_COMMENT_END
\r
76 #define TOKEN_CB_FINAL(_type, _ns, _current_too) \
\r
77 TOKEN_CB_CALL( _type ## _FINAL_CHUNK_TYPE , _ns, _current_too, 1)
\r
82 ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
\r
83 pstate_e state = (pstate_e)*stateContext;
\r
84 const char *chunk_start = (const char *)xmlbuf;
\r
85 const char *p = chunk_start;
\r
86 const char *end = p + size;
\r
88 for(; p < end; p++) {
\r
89 int C = *(const unsigned char *)p;
\r
93 * Initial state: we're in the middle of some text,
\r
94 * or just have started.
\r
97 /* We're now in the tag, probably */
\r
98 TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
\r
101 if (ALPHA(C) || (C == CSLASH))
\r
102 state = ST_TAG_BODY;
\r
103 else if (C == EXCLAM)
\r
104 state = ST_COMMENT_WAIT_DASH1;
\r
107 * Not characters and not whitespace.
\r
108 * Must be something like "3 < 4".
\r
110 TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
\r
115 /* End of the tag */
\r
116 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
\r
120 * The previous tag wasn't completed, but still
\r
121 * recognized as valid. (Mozilla-compatible)
\r
123 TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);
\r
126 state = ST_TAG_QUOTE_WAIT;
\r
130 case ST_TAG_QUOTE_WAIT:
\r
132 * State after the equal sign ("=") in the tag.
\r
136 state = ST_TAG_QUOTED_STRING;
\r
139 /* End of the tag */
\r
140 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
\r
144 /* Unquoted string value */
\r
145 state = ST_TAG_UNQUOTED_STRING;
\r
148 case ST_TAG_QUOTED_STRING:
\r
150 * Tag attribute's string value in quotes.
\r
153 /* Return back to the tag state */
\r
154 state = ST_TAG_BODY;
\r
157 case ST_TAG_UNQUOTED_STRING:
\r
159 /* End of the tag */
\r
160 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
\r
161 } else if(WHITESPACE(C)) {
\r
162 /* Return back to the tag state */
\r
163 state = ST_TAG_BODY;
\r
166 case ST_COMMENT_WAIT_DASH1:
\r
168 state = ST_COMMENT_WAIT_DASH2;
\r
170 /* Some ordinary tag. */
\r
171 state = ST_TAG_BODY;
\r
174 case ST_COMMENT_WAIT_DASH2:
\r
177 state = ST_COMMENT;
\r
179 /* Some ordinary tag */
\r
180 state = ST_TAG_BODY;
\r
185 state = ST_COMMENT_CLO_DASH2;
\r
188 case ST_COMMENT_CLO_DASH2:
\r
190 state = ST_COMMENT_CLO_RT;
\r
192 /* This is not an end of a comment */
\r
193 state = ST_COMMENT;
\r
196 case ST_COMMENT_CLO_RT:
\r
198 TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
\r
199 } else if(C == CDASH) {
\r
200 /* Maintain current state, still waiting for '>' */
\r
202 state = ST_COMMENT;
\r
205 } /* switch(*ptr) */
\r
209 * Flush the partially processed chunk, state permitting.
\r
211 if(p - chunk_start) {
\r
214 TOKEN_CB(PXML_COMMENT, state, 0);
\r
217 TOKEN_CB(PXML_TEXT, state, 0);
\r
219 default: break; /* a no-op */
\r
224 *stateContext = (int)state;
\r
225 return chunk_start - (const char *)xmlbuf;
\r