Initial version of asn1c
[com/asn1c.git] / libasn1parser / asn1p_l.l
diff --git a/libasn1parser/asn1p_l.l b/libasn1parser/asn1p_l.l
new file mode 100644 (file)
index 0000000..7757216
--- /dev/null
@@ -0,0 +1,614 @@
+%{
+
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "asn1parser.h"
+#include "asn1p_y.h"
+
+int asn1p_lex(void);
+void asn1p_lexer_hack_push_opaque_state(void);         /* Used in .y */
+void asn1p_lexer_hack_enable_with_syntax(void);                /* Used in .y */
+void asn1p_lexer_hack_push_encoding_control(void);     /* Used in .y */
+
+#define        YY_FATAL_ERROR(msg)     do {                    \
+               fprintf(stderr,                         \
+                       "lexer error at line %d, "      \
+                       "text \"%s\"\n",                \
+                       yylineno, yytext);              \
+               exit(1);                                \
+       } while(0)
+
+int asn1p_lexer_pedantic_1990 = 0;
+int asn1p_lexer_types_year = 0;
+int asn1p_lexer_constructs_year = 0;
+int asn1p_lexer_extended_values = 0;
+
+int asn1p_as_pointer;
+
+static asn1c_integer_t _lex_atoi(const char *ptr);
+static double          _lex_atod(const char *ptr);
+
+/*
+ * Check that the type is defined in the year of the standard choosen.
+ */
+#define        TYPE_LIFETIME(fyr, lyr)                         \
+       (!asn1p_lexer_types_year                        \
+       || (fyr && fyr <= asn1p_lexer_types_year)       \
+       || (lyr && lyr  > asn1p_lexer_types_year))
+
+/*
+ * Check the the construction (or concept, i.e. CLASS) is defined in
+ * a given year.
+ */
+#define        CONSTRUCT_LIFETIME(fyr, lyr)                    \
+       (!asn1p_lexer_constructs_year                   \
+       || (fyr && fyr <= asn1p_lexer_constructs_year)  \
+       || (lyr && lyr  > asn1p_lexer_constructs_year))
+
+/*
+ * Append quoted string.
+ */
+#define        QAPPEND(text, tlen)     do {                            \
+               char *prev_text = asn1p_lval.tv_opaque.buf;     \
+               int prev_len = asn1p_lval.tv_opaque.len;        \
+               char *p;                                        \
+                                                               \
+               p = malloc((tlen) + prev_len + 1);              \
+               if(p == NULL) return -1;                        \
+                                                               \
+               if(prev_text) memcpy(p, prev_text, prev_len);   \
+               memcpy(p + prev_len, text, tlen);               \
+               p[prev_len + (tlen)] = '\0';                    \
+                                                               \
+               free(asn1p_lval.tv_opaque.buf);                 \
+               asn1p_lval.tv_opaque.buf = p;                   \
+               asn1p_lval.tv_opaque.len = (tlen) + prev_len;   \
+       } while(0)
+
+%}
+
+%option        never-interactive
+%option        noinput 
+%option        noyywrap stack
+/* Performance penalty is OK */
+%option yylineno       
+/* Controlled from within application */
+%option debug          
+
+%pointer
+
+%x dash_comment
+%x idash_comment
+%x cpp_comment
+%x quoted
+%x opaque
+%x encoding_control
+%x with_syntax
+%x extended_values
+
+/* Newline */
+NL     [\r\v\f\n]
+/* White-space */
+WSP    [\t\r\v\f\n ]
+
+%%
+
+<INITIAL>"\xef\xbb\xbf"                return UTF8_BOM;
+
+-{3,}/[\r\n]   /* Immediately terminated long comment */
+-{3,}/[^-\r\n] yy_push_state(idash_comment);   /* Incorrect, but acceptable */
+<idash_comment>{
+       -{3,}   yy_pop_state(); /* Acceptable end of comment */
+}
+
+--<[ \t]*ASN1C.RepresentAsPointer[ \t]*>--     asn1p_as_pointer = 1;
+
+<extended_values>{
+    "#BIT STRING"    {
+        yy_pop_state();
+        return TOK_ExtValue_BIT_STRING;
+    }
+}
+
+<INITIAL,with_syntax>--                yy_push_state(dash_comment);
+<dash_comment,idash_comment>{
+
+       {NL}    yy_pop_state();
+
+       --      yy_pop_state(); /* End of comment */
+       -       /* Eat single dash */
+       [^\r\v\f\n-]+   /* Eat */
+}
+
+<INITIAL,cpp_comment,with_syntax>"/*"          yy_push_state(cpp_comment);
+<cpp_comment>{
+       [^*/<]  /* Eat */
+       "*/"    yy_pop_state();
+       .       /* Eat */
+}
+
+
+       /*
+        * This is state is being set from corresponding .y module when
+        * higher-level data is necessary to make proper parsing of the
+        * underlying data. Thus, we enter the <opaque> state and save
+        * everything for later processing.
+        */
+<opaque>{
+
+       "{"     {
+                       yy_push_state(opaque);
+                       asn1p_lval.tv_opaque.buf = strdup(yytext);
+                       asn1p_lval.tv_opaque.len = yyleng;
+                       return TOK_opaque;
+               }
+
+       "}"     {
+                       yy_pop_state();
+                       asn1p_lval.tv_opaque.buf = strdup(yytext);
+                       asn1p_lval.tv_opaque.len = yyleng;
+                       return TOK_opaque;
+               }
+
+       [^{}:=]+        {
+                       asn1p_lval.tv_opaque.buf = strdup(yytext);
+                       asn1p_lval.tv_opaque.len = yyleng;
+                       return TOK_opaque;
+               }
+
+       "::="   {
+                       fprintf(stderr,
+                               "ASN.1 Parser synchronization failure: "
+                               "\"%s\" at line %d must not appear "
+                               "inside value definition\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+
+       [:=]    {
+                       asn1p_lval.tv_opaque.buf = strdup(yytext);
+                       asn1p_lval.tv_opaque.len = yyleng;
+                       return TOK_opaque;
+               }
+
+       }
+
+\"[^\"]*               {
+                       asn1p_lval.tv_opaque.buf = 0;
+                       asn1p_lval.tv_opaque.len = 0;
+                       QAPPEND(yytext+1, yyleng-1);
+                       yy_push_state(quoted);
+               }
+<quoted>{
+
+       \"\"    { QAPPEND(yytext, yyleng-1); }  /* Add a single quote */
+       [^\"]+  { QAPPEND(yytext, yyleng); }
+
+       \"      {
+                       yy_pop_state();
+                       /* Do not append last quote:
+                       // QAPPEND(yytext, yyleng); */
+
+                       if(asn1p_lexer_pedantic_1990
+                       && strchr(yytext, '\n')) {
+                               fprintf(stderr, "%s: "
+                               "Newlines are prohibited by ASN.1:1990\n",
+                               asn1p_lval.tv_opaque.buf);
+                               return -1;
+                       }
+
+                       return TOK_cstring;
+               }
+
+       }
+
+<encoding_control>{
+       ENCODING-CONTROL        {
+                       const char *s = "ENCODING-CONTROL";
+                       const char *p = s + sizeof("ENCODING-CONTROL") - 2;
+                       for(; p >= s; p--) unput(*p);
+                       yy_pop_state();
+               }
+       END     unput('D'); unput('N'); unput('E'); yy_pop_state();
+       [^{} \t\r\v\f\n]+
+       [[:alnum:]]+
+       .       /* Eat everything else */
+       "\n"
+       }
+
+'[0-9A-F \t\r\v\f\n]+'H {
+               /* " \t\r\n" weren't allowed in ASN.1:1990. */
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_hstring;
+       }
+
+'[01 \t\r\v\f\n]+'B    {
+               /* " \t\r\n" weren't allowed in ASN.1:1990. */
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_bstring;
+       }
+
+
+-[1-9][0-9]*   {
+               asn1p_lval.a_int = _lex_atoi(yytext);
+               if(errno == ERANGE)
+                       return -1;
+               return TOK_number_negative;
+       }
+
+[1-9][0-9]*    {
+               asn1p_lval.a_int = _lex_atoi(yytext);
+               if(errno == ERANGE)
+                       return -1;
+               return TOK_number;
+       }
+
+"0"    {
+               asn1p_lval.a_int = _lex_atoi(yytext);
+               if(errno == ERANGE)
+                       return -1;
+               return TOK_number;
+       }
+
+[-+]?[0-9]+[.]?([eE][-+]?)?[0-9]+ {
+               asn1p_lval.a_dbl = _lex_atod(yytext);
+               if(errno == ERANGE)
+                       return -1;
+               return TOK_realnumber;
+       }
+
+ABSENT                 return TOK_ABSENT;
+ALL                    return TOK_ALL;
+ANY                    {
+                               /* Appeared in 1990, removed in 1997 */
+                               if(TYPE_LIFETIME(1990, 1997))
+                                       return TOK_ANY; 
+                               fprintf(stderr, "Keyword \"%s\" at line %d "
+                                       "is obsolete\n", yytext, yylineno);
+                               REJECT;
+                       }
+APPLICATION            return TOK_APPLICATION;
+AUTOMATIC              return TOK_AUTOMATIC;
+BEGIN                  {
+        if(asn1p_lexer_extended_values) {
+            yy_push_state(extended_values);
+        }
+        return TOK_BEGIN;
+    }
+BIT                    return TOK_BIT;
+BMPString              {
+                               if(TYPE_LIFETIME(1994, 0))
+                                       return TOK_BMPString;
+                               REJECT;
+                       }
+BOOLEAN                        return TOK_BOOLEAN;
+BY                     return TOK_BY;
+CHARACTER              return TOK_CHARACTER;
+CHOICE                 return TOK_CHOICE;
+CLASS                  return TOK_CLASS;
+COMPONENT              return TOK_COMPONENT;
+COMPONENTS             return TOK_COMPONENTS;
+CONSTRAINED            return TOK_CONSTRAINED;
+CONTAINING             return TOK_CONTAINING;
+DEFAULT                        return TOK_DEFAULT;
+DEFINED                        {
+                               /* Appeared in 1990, removed in 1997 */
+                               if(TYPE_LIFETIME(1990, 1997))
+                                       return TOK_DEFINED;
+                               fprintf(stderr, "Keyword \"%s\" at line %d "
+                                       "is obsolete\n", yytext, yylineno);
+                               /* Deprecated since */
+                               REJECT;
+                       }
+DEFINITIONS            return TOK_DEFINITIONS;
+EMBEDDED               return TOK_EMBEDDED;
+ENCODED                        return TOK_ENCODED;
+ENCODING-CONTROL       return TOK_ENCODING_CONTROL;
+END                    {
+                   if(YYSTATE == extended_values) {
+                yy_pop_state();
+            }
+            return TOK_END;
+        }
+ENUMERATED             return TOK_ENUMERATED;
+EXCEPT                 return TOK_EXCEPT;
+EXPLICIT               return TOK_EXPLICIT;
+EXPORTS                        return TOK_EXPORTS;
+EXTENSIBILITY          return TOK_EXTENSIBILITY;
+EXTERNAL               return TOK_EXTERNAL;
+FALSE                  return TOK_FALSE;
+FROM                   return TOK_FROM;
+GeneralizedTime                return TOK_GeneralizedTime;
+GeneralString          return TOK_GeneralString;
+GraphicString          return TOK_GraphicString;
+IA5String              return TOK_IA5String;
+IDENTIFIER             return TOK_IDENTIFIER;
+IMPLICIT               return TOK_IMPLICIT;
+IMPLIED                        return TOK_IMPLIED;
+IMPORTS                        return TOK_IMPORTS;
+INCLUDES               return TOK_INCLUDES;
+INSTANCE               return TOK_INSTANCE;
+INSTRUCTIONS           return TOK_INSTRUCTIONS;
+INTEGER                        return TOK_INTEGER;
+INTERSECTION           return TOK_INTERSECTION;
+ISO646String           return TOK_ISO646String;
+MAX                    return TOK_MAX;
+MIN                    return TOK_MIN;
+MINUS-INFINITY         return TOK_MINUS_INFINITY;
+NULL                   return TOK_NULL;
+NumericString          return TOK_NumericString;
+OBJECT                 return TOK_OBJECT;
+ObjectDescriptor       return TOK_ObjectDescriptor;
+OCTET                  return TOK_OCTET;
+OF                     return TOK_OF;
+OPTIONAL               return TOK_OPTIONAL;
+PATTERN                        return TOK_PATTERN;
+PDV                    return TOK_PDV;
+PLUS-INFINITY          return TOK_PLUS_INFINITY;
+PRESENT                        return TOK_PRESENT;
+PrintableString                return TOK_PrintableString;
+PRIVATE                        return TOK_PRIVATE;
+REAL                   return TOK_REAL;
+RELATIVE-OID           return TOK_RELATIVE_OID;
+SEQUENCE               return TOK_SEQUENCE;
+SET                    return TOK_SET;
+SIZE                   return TOK_SIZE;
+STRING                 return TOK_STRING;
+SYNTAX                 return TOK_SYNTAX;
+T61String              return TOK_T61String;
+TAGS                   return TOK_TAGS;
+TeletexString          return TOK_TeletexString;
+TRUE                   return TOK_TRUE;
+UNION                  return TOK_UNION;
+UNIQUE                 return TOK_UNIQUE;
+UNIVERSAL              return TOK_UNIVERSAL;
+UniversalString                {
+                               if(TYPE_LIFETIME(1994, 0))
+                                       return TOK_UniversalString;
+                               REJECT;
+                       }
+UTCTime                        return TOK_UTCTime;
+UTF8String             {
+                               if(TYPE_LIFETIME(1994, 0))
+                                       return TOK_UTF8String;
+                               REJECT;
+                       }
+VideotexString         return TOK_VideotexString;
+VisibleString          return TOK_VisibleString;
+WITH                   return TOK_WITH;
+
+
+<INITIAL,with_syntax>&[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*      {
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_typefieldreference;
+       }
+
+<INITIAL,with_syntax>&[a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*      {
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_valuefieldreference;
+       }
+
+
+[a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*    {
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_identifier;
+       }
+
+       /*
+        * objectclassreference
+        */
+<INITIAL,extended_values>[A-Z][A-Z0-9]*([-][A-Z0-9]+)* {
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_capitalreference;
+       }
+
+       /*
+        * typereference, modulereference
+        * NOTE: TOK_objectclassreference must be combined
+        * with this token to produce true typereference.
+        */
+[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*    {
+               asn1p_lval.tv_str = strdup(yytext);
+               return TOK_typereference;
+       }
+
+<INITIAL,extended_values>"::="         return TOK_PPEQ;
+
+"..."          return TOK_ThreeDots;
+".."           return TOK_TwoDots;
+
+<with_syntax>{
+
+       [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*     {
+                               asn1p_lval.tv_str = strdup(yytext);
+                               return TOK_Literal;
+                       }
+
+       ","             {
+                               asn1p_lval.tv_str = strdup(yytext);
+                               return TOK_Literal;
+                       }
+
+       "{"             {
+                               yy_push_state(with_syntax);
+                               asn1p_lval.tv_str = strdup(yytext);
+                               return TOK_Literal;
+                       }
+
+       "["             return '[';
+       "]"             return ']';
+
+       {WSP}+          {
+                       asn1p_lval.tv_opaque.buf = strdup(yytext);
+                       asn1p_lval.tv_opaque.len = yyleng;
+                       return TOK_whitespace;
+                       }
+
+       "}"             {
+                               yy_pop_state();
+                               if(YYSTATE == with_syntax) {
+                                       asn1p_lval.tv_str = strdup(yytext);
+                                       return TOK_Literal;
+                               } else {
+                                       return '}';
+                               }
+                       }
+
+}
+
+
+<INITIAL,extended_values>{WSP}+        /* Ignore whitespace */
+
+
+[{][\t\r\v\f\n ]*[0-7][,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}] {
+               asn1c_integer_t v1 = -1, v2 = -1;
+               char *p;
+               for(p = yytext; *p; p++)
+                       if(*p >= '0' && *p <= '9')
+                       { v1 = _lex_atoi(p); break; }
+               while(*p >= '0' && *p <= '9') p++;      /* Skip digits */
+               for(; *p; p++) if(*p >= '0' && *p <= '9')
+                       { v2 = _lex_atoi(p); break; }
+               if(v1 < 0 || v1 > 7) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
+                               "mandates 0..7 range for Tuple's TableColumn\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               if(v2 < 0 || v2 > 15) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
+                               "mandates 0..15 range for Tuple's TableRow\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               asn1p_lval.a_int = (v1 << 4) + v2;
+               return TOK_tuple;
+       }
+
+[{][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}]  {
+               asn1c_integer_t v1 = -1, v2 = -1, v3 = -1, v4 = -1;
+               char *p;
+               for(p = yytext; *p; p++)
+                       if(*p >= '0' && *p <= '9')
+                       { v1 = _lex_atoi(p); break; }
+               while(*p >= '0' && *p <= '9') p++;      /* Skip digits */
+               for(; *p; p++) if(*p >= '0' && *p <= '9')
+                       { v2 = _lex_atoi(p); break; }
+               while(*p >= '0' && *p <= '9') p++;
+               for(; *p; p++) if(*p >= '0' && *p <= '9')
+                       { v3 = _lex_atoi(p); break; }
+               while(*p >= '0' && *p <= '9') p++;
+               for(; *p; p++) if(*p >= '0' && *p <= '9')
+                       { v4 = _lex_atoi(p); break; }
+               if(v1 < 0 || v1 > 127) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
+                               "mandates 0..127 range for Quadruple's Group\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               if(v2 < 0 || v2 > 255) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
+                               "mandates 0..255 range for Quadruple's Plane\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               if(v3 < 0 || v3 > 255) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
+                               "mandates 0..255 range for Quadruple's Row\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               if(v4 < 0 || v4 > 255) {
+                       fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
+                               "mandates 0..255 range for Quadruple's Cell\n",
+                               yytext, yylineno);
+                       return -1;
+               }
+               asn1p_lval.a_int = (v1 << 24) | (v2 << 16) | (v3 << 8) | v4;
+               return TOK_quadruple;
+       }
+
+
+"[["        return TOK_VBracketLeft;
+"]]"        return TOK_VBracketRight;
+
+[(){},;:|!.&@\[\]^]    return yytext[0];
+
+[^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
+               if(TYPE_LIFETIME(1994, 0))
+                       fprintf(stderr, "ERROR: ");
+               fprintf(stderr,
+               "Symbol '%c' at line %d is prohibited "
+               "by ASN.1:1994 and ASN.1:1997\n",
+                       yytext[0], yylineno);
+               if(TYPE_LIFETIME(1994, 0))
+                       return -1;
+       }
+
+<*>.   {
+               fprintf(stderr,
+                       "Unexpected token at line %d: \"%s\"\n",
+                       yylineno, yytext);
+               while(YYSTATE != INITIAL)
+                       yy_pop_state();
+               if(0) {
+                       yy_top_state(); /* Just to use this function. */
+                       yy_fatal_error("Parse error");
+               }
+               return -1;
+}
+
+<*><<EOF>>      {
+               while(YYSTATE != INITIAL)
+                       yy_pop_state();
+               yyterminate();
+       }
+
+
+%%
+
+/*
+ * Very dirty but wonderful hack allowing to rule states from within .y file.
+ */
+void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
+
+/*
+ * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
+ */
+void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
+
+/* Yet another */
+void asn1p_lexer_hack_push_encoding_control() {
+       yy_push_state(encoding_control);
+}
+
+static asn1c_integer_t
+_lex_atoi(const char *ptr) {
+       asn1c_integer_t value;
+       if(asn1p_atoi(ptr, &value)) {
+               fprintf(stderr,
+                       "Value \"%s\" at line %d is too large "
+                       "for this compiler! Please contact the asn1c author.\n",
+                       ptr, yylineno);
+               errno = ERANGE;
+       }
+       return value;
+}
+
+static double
+_lex_atod(const char *ptr) {
+       double value;
+       errno = 0;
+       value = strtod(ptr, 0);
+       if(errno) {
+               fprintf(stderr,
+                       "Value \"%s\" at line %d is outside of `double` range "
+                       "in this compiler! Please contact the asn1c author.\n",
+                       ptr, yylineno);
+               errno = ERANGE;
+       }
+       return value;
+}
+