--- /dev/null
+/*\r
+ * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.\r
+ * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.\r
+ * All rights reserved.\r
+ * Redistribution and modifications are permitted subject to BSD license.\r
+ */\r
+#include <asn_system.h>\r
+#include <xer_support.h>\r
+\r
+/* Parser states */\r
+typedef enum {\r
+ ST_TEXT,\r
+ ST_TAG_START,\r
+ ST_TAG_BODY,\r
+ ST_TAG_QUOTE_WAIT,\r
+ ST_TAG_QUOTED_STRING,\r
+ ST_TAG_UNQUOTED_STRING,\r
+ ST_COMMENT_WAIT_DASH1, /* "<!--"[1] */\r
+ ST_COMMENT_WAIT_DASH2, /* "<!--"[2] */\r
+ ST_COMMENT,\r
+ ST_COMMENT_CLO_DASH2, /* "-->"[0] */\r
+ ST_COMMENT_CLO_RT /* "-->"[1] */\r
+} pstate_e;\r
+\r
+static const int\r
+_charclass[256] = {\r
+ 0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,\r
+ 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,\r
+ 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,\r
+ 2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0, /* 01234567 89 */\r
+ 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* ABCDEFG HIJKLMNO */\r
+ 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0, /* PQRSTUVW XYZ */\r
+ 0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, /* abcdefg hijklmno */\r
+ 3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0 /* pqrstuvw xyz */\r
+};\r
+#define WHITESPACE(c) (_charclass[(unsigned char)(c)] == 1)\r
+#define ALNUM(c) (_charclass[(unsigned char)(c)] >= 2)\r
+#define ALPHA(c) (_charclass[(unsigned char)(c)] == 3)\r
+\r
+/* Aliases for characters, ASCII/UTF-8 */\r
+#define EXCLAM 0x21 /* '!' */\r
+#define CQUOTE 0x22 /* '"' */\r
+#define CDASH 0x2d /* '-' */\r
+#define CSLASH 0x2f /* '/' */\r
+#define LANGLE 0x3c /* '<' */\r
+#define CEQUAL 0x3d /* '=' */\r
+#define RANGLE 0x3e /* '>' */\r
+#define CQUEST 0x3f /* '?' */\r
+\r
+/* Invoke token callback */\r
+#define TOKEN_CB_CALL(type, _ns, _current_too, _final) do { \\r
+ int _ret; \\r
+ pstate_e ns = _ns; \\r
+ ssize_t _sz = (p - chunk_start) + _current_too; \\r
+ if (!_sz) { \\r
+ /* Shortcut */ \\r
+ state = _ns; \\r
+ break; \\r
+ } \\r
+ _ret = cb(type, chunk_start, _sz, key); \\r
+ if(_ret < _sz) { \\r
+ if(_current_too && _ret == -1) \\r
+ state = ns; \\r
+ goto finish; \\r
+ } \\r
+ chunk_start = p + _current_too; \\r
+ state = ns; \\r
+ } while(0)\r
+\r
+#define TOKEN_CB(_type, _ns, _current_too) \\r
+ TOKEN_CB_CALL(_type, _ns, _current_too, 0)\r
+\r
+#define PXML_TAG_FINAL_CHUNK_TYPE PXML_TAG_END\r
+#define PXML_COMMENT_FINAL_CHUNK_TYPE PXML_COMMENT_END\r
+\r
+#define TOKEN_CB_FINAL(_type, _ns, _current_too) \\r
+ TOKEN_CB_CALL( _type ## _FINAL_CHUNK_TYPE , _ns, _current_too, 1)\r
+\r
+/*\r
+ * Parser itself\r
+ */\r
+ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {\r
+ pstate_e state = (pstate_e)*stateContext;\r
+ const char *chunk_start = (const char *)xmlbuf;\r
+ const char *p = chunk_start;\r
+ const char *end = p + size;\r
+\r
+ for(; p < end; p++) {\r
+ int C = *(const unsigned char *)p;\r
+ switch(state) {\r
+ case ST_TEXT:\r
+ /*\r
+ * Initial state: we're in the middle of some text,\r
+ * or just have started.\r
+ */\r
+ if (C == LANGLE) \r
+ /* We're now in the tag, probably */\r
+ TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);\r
+ break;\r
+ case ST_TAG_START:\r
+ if (ALPHA(C) || (C == CSLASH))\r
+ state = ST_TAG_BODY;\r
+ else if (C == EXCLAM)\r
+ state = ST_COMMENT_WAIT_DASH1;\r
+ else \r
+ /*\r
+ * Not characters and not whitespace.\r
+ * Must be something like "3 < 4".\r
+ */\r
+ TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */\r
+ break;\r
+ case ST_TAG_BODY:\r
+ switch(C) {\r
+ case RANGLE:\r
+ /* End of the tag */\r
+ TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);\r
+ break;\r
+ case LANGLE:\r
+ /*\r
+ * The previous tag wasn't completed, but still\r
+ * recognized as valid. (Mozilla-compatible)\r
+ */\r
+ TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0); \r
+ break;\r
+ case CEQUAL:\r
+ state = ST_TAG_QUOTE_WAIT;\r
+ break;\r
+ }\r
+ break;\r
+ case ST_TAG_QUOTE_WAIT:\r
+ /*\r
+ * State after the equal sign ("=") in the tag.\r
+ */\r
+ switch(C) {\r
+ case CQUOTE:\r
+ state = ST_TAG_QUOTED_STRING;\r
+ break;\r
+ case RANGLE:\r
+ /* End of the tag */\r
+ TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);\r
+ break;\r
+ default:\r
+ if(!WHITESPACE(C))\r
+ /* Unquoted string value */\r
+ state = ST_TAG_UNQUOTED_STRING;\r
+ }\r
+ break;\r
+ case ST_TAG_QUOTED_STRING:\r
+ /*\r
+ * Tag attribute's string value in quotes.\r
+ */\r
+ if(C == CQUOTE) {\r
+ /* Return back to the tag state */\r
+ state = ST_TAG_BODY;\r
+ }\r
+ break;\r
+ case ST_TAG_UNQUOTED_STRING:\r
+ if(C == RANGLE) {\r
+ /* End of the tag */\r
+ TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);\r
+ } else if(WHITESPACE(C)) {\r
+ /* Return back to the tag state */\r
+ state = ST_TAG_BODY;\r
+ }\r
+ break;\r
+ case ST_COMMENT_WAIT_DASH1:\r
+ if(C == CDASH) {\r
+ state = ST_COMMENT_WAIT_DASH2;\r
+ } else {\r
+ /* Some ordinary tag. */\r
+ state = ST_TAG_BODY;\r
+ }\r
+ break;\r
+ case ST_COMMENT_WAIT_DASH2:\r
+ if(C == CDASH) {\r
+ /* Seen "<--" */\r
+ state = ST_COMMENT;\r
+ } else {\r
+ /* Some ordinary tag */\r
+ state = ST_TAG_BODY;\r
+ }\r
+ break;\r
+ case ST_COMMENT:\r
+ if(C == CDASH) {\r
+ state = ST_COMMENT_CLO_DASH2;\r
+ }\r
+ break;\r
+ case ST_COMMENT_CLO_DASH2:\r
+ if(C == CDASH) {\r
+ state = ST_COMMENT_CLO_RT;\r
+ } else {\r
+ /* This is not an end of a comment */\r
+ state = ST_COMMENT;\r
+ }\r
+ break;\r
+ case ST_COMMENT_CLO_RT:\r
+ if(C == RANGLE) {\r
+ TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);\r
+ } else if(C == CDASH) {\r
+ /* Maintain current state, still waiting for '>' */\r
+ } else {\r
+ state = ST_COMMENT;\r
+ }\r
+ break;\r
+ } /* switch(*ptr) */\r
+ } /* for() */\r
+\r
+ /*\r
+ * Flush the partially processed chunk, state permitting.\r
+ */\r
+ if(p - chunk_start) {\r
+ switch (state) {\r
+ case ST_COMMENT:\r
+ TOKEN_CB(PXML_COMMENT, state, 0);\r
+ break;\r
+ case ST_TEXT:\r
+ TOKEN_CB(PXML_TEXT, state, 0);\r
+ break;\r
+ default: break; /* a no-op */\r
+ }\r
+ }\r
+\r
+finish:\r
+ *stateContext = (int)state;\r
+ return chunk_start - (const char *)xmlbuf;\r
+}\r
+\r