1 /*****************************************************************************
3 # Copyright 2019 AT&T Intellectual Property *
5 # Licensed under the Apache License, Version 2.0 (the "License"); *
6 # you may not use this file except in compliance with the License. *
7 # You may obtain a copy of the License at *
9 # http://www.apache.org/licenses/LICENSE-2.0 *
11 # Unless required by applicable law or agreed to in writing, software *
12 # distributed under the License is distributed on an "AS IS" BASIS, *
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14 # See the License for the specific language governing permissions and *
15 # limitations under the License. *
17 ******************************************************************************/
20 * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
21 * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
22 * All rights reserved.
23 * Redistribution and modifications are permitted subject to BSD license.
25 #include <asn_system.h>
26 #include <xer_support.h>
35 ST_TAG_UNQUOTED_STRING,
36 ST_COMMENT_WAIT_DASH1, /* "<!--"[1] */
37 ST_COMMENT_WAIT_DASH2, /* "<!--"[2] */
39 ST_COMMENT_CLO_DASH2, /* "-->"[0] */
40 ST_COMMENT_CLO_RT /* "-->"[1] */
45 0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
46 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
47 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
48 2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */
49 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */
50 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */
51 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */
52 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */
54 #define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)
55 #define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)
56 #define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)
58 /* Aliases for characters, ASCII/UTF-8 */
59 #define EXCLAM 0x21 /* '!' */
60 #define CQUOTE 0x22 /* '"' */
61 #define CDASH 0x2d /* '-' */
62 #define CSLASH 0x2f /* '/' */
63 #define LANGLE 0x3c /* '<' */
64 #define CEQUAL 0x3d /* '=' */
65 #define RANGLE 0x3e /* '>' */
66 #define CQUEST 0x3f /* '?' */
68 /* Invoke token callback */
69 #define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \
72 ssize_t _sz = (p - chunk_start) + _current_too; \
78 _ret = cb(type, chunk_start, _sz, key); \
80 if(_current_too && _ret == -1) \
84 chunk_start = p + _current_too; \
88 #define TOKEN_CB(_type, _ns, _current_too) \
89 TOKEN_CB_CALL(_type, _ns, _current_too, 0)
91 #define PXML_TAG_FINAL_CHUNK_TYPE PXML_TAG_END
92 #define PXML_COMMENT_FINAL_CHUNK_TYPE PXML_COMMENT_END
94 #define TOKEN_CB_FINAL(_type, _ns, _current_too) \
95 TOKEN_CB_CALL( _type ## _FINAL_CHUNK_TYPE , _ns, _current_too, 1)
100 ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
101 pstate_e state = (pstate_e)*stateContext;
102 const char *chunk_start = (const char *)xmlbuf;
103 const char *p = chunk_start;
104 const char *end = p + size;
106 for(; p < end; p++) {
107 int C = *(const unsigned char *)p;
111 * Initial state: we're in the middle of some text,
112 * or just have started.
115 /* We're now in the tag, probably */
116 TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
119 if (ALPHA(C) || (C == CSLASH))
121 else if (C == EXCLAM)
122 state = ST_COMMENT_WAIT_DASH1;
125 * Not characters and not whitespace.
126 * Must be something like "3 < 4".
128 TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
134 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
138 * The previous tag wasn't completed, but still
139 * recognized as valid. (Mozilla-compatible)
141 TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);
144 state = ST_TAG_QUOTE_WAIT;
148 case ST_TAG_QUOTE_WAIT:
150 * State after the equal sign ("=") in the tag.
154 state = ST_TAG_QUOTED_STRING;
158 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
162 /* Unquoted string value */
163 state = ST_TAG_UNQUOTED_STRING;
166 case ST_TAG_QUOTED_STRING:
168 * Tag attribute's string value in quotes.
171 /* Return back to the tag state */
175 case ST_TAG_UNQUOTED_STRING:
178 TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
179 } else if(WHITESPACE(C)) {
180 /* Return back to the tag state */
184 case ST_COMMENT_WAIT_DASH1:
186 state = ST_COMMENT_WAIT_DASH2;
188 /* Some ordinary tag. */
192 case ST_COMMENT_WAIT_DASH2:
197 /* Some ordinary tag */
203 state = ST_COMMENT_CLO_DASH2;
206 case ST_COMMENT_CLO_DASH2:
208 state = ST_COMMENT_CLO_RT;
210 /* This is not an end of a comment */
214 case ST_COMMENT_CLO_RT:
216 TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
217 } else if(C == CDASH) {
218 /* Maintain current state, still waiting for '>' */
227 * Flush the partially processed chunk, state permitting.
229 if(p - chunk_start) {
232 TOKEN_CB(PXML_COMMENT, state, 0);
235 TOKEN_CB(PXML_TEXT, state, 0);
237 default: break; /* a no-op */
242 *stateContext = (int)state;
243 return chunk_start - (const char *)xmlbuf;