7757216312f04dc09e2d3a45135ba47991a9390e
[com/asn1c.git] / libasn1parser / asn1p_l.l
1 %{
2
3 #include <string.h>
4 #include <errno.h>
5 #include <assert.h>
6
7 #include "asn1parser.h"
8 #include "asn1p_y.h"
9
10 int asn1p_lex(void);
11 void asn1p_lexer_hack_push_opaque_state(void);          /* Used in .y */
12 void asn1p_lexer_hack_enable_with_syntax(void);         /* Used in .y */
13 void asn1p_lexer_hack_push_encoding_control(void);      /* Used in .y */
14
15 #define YY_FATAL_ERROR(msg)     do {                    \
16                 fprintf(stderr,                         \
17                         "lexer error at line %d, "      \
18                         "text \"%s\"\n",                \
19                         yylineno, yytext);              \
20                 exit(1);                                \
21         } while(0)
22
23 int asn1p_lexer_pedantic_1990 = 0;
24 int asn1p_lexer_types_year = 0;
25 int asn1p_lexer_constructs_year = 0;
26 int asn1p_lexer_extended_values = 0;
27
28 int asn1p_as_pointer;
29
30 static asn1c_integer_t _lex_atoi(const char *ptr);
31 static double          _lex_atod(const char *ptr);
32
33 /*
34  * Check that the type is defined in the year of the standard choosen.
35  */
36 #define TYPE_LIFETIME(fyr, lyr)                         \
37         (!asn1p_lexer_types_year                        \
38         || (fyr && fyr <= asn1p_lexer_types_year)       \
39         || (lyr && lyr  > asn1p_lexer_types_year))
40
41 /*
42  * Check the the construction (or concept, i.e. CLASS) is defined in
43  * a given year.
44  */
45 #define CONSTRUCT_LIFETIME(fyr, lyr)                    \
46         (!asn1p_lexer_constructs_year                   \
47         || (fyr && fyr <= asn1p_lexer_constructs_year)  \
48         || (lyr && lyr  > asn1p_lexer_constructs_year))
49
50 /*
51  * Append quoted string.
52  */
53 #define QAPPEND(text, tlen)     do {                            \
54                 char *prev_text = asn1p_lval.tv_opaque.buf;     \
55                 int prev_len = asn1p_lval.tv_opaque.len;        \
56                 char *p;                                        \
57                                                                 \
58                 p = malloc((tlen) + prev_len + 1);              \
59                 if(p == NULL) return -1;                        \
60                                                                 \
61                 if(prev_text) memcpy(p, prev_text, prev_len);   \
62                 memcpy(p + prev_len, text, tlen);               \
63                 p[prev_len + (tlen)] = '\0';                    \
64                                                                 \
65                 free(asn1p_lval.tv_opaque.buf);                 \
66                 asn1p_lval.tv_opaque.buf = p;                   \
67                 asn1p_lval.tv_opaque.len = (tlen) + prev_len;   \
68         } while(0)
69
70 %}
71
72 %option never-interactive
73 %option noinput 
74 %option noyywrap stack
75 /* Performance penalty is OK */
76 %option yylineno        
77 /* Controlled from within application */
78 %option debug           
79
80 %pointer
81
82 %x dash_comment
83 %x idash_comment
84 %x cpp_comment
85 %x quoted
86 %x opaque
87 %x encoding_control
88 %x with_syntax
89 %x extended_values
90
91 /* Newline */
92 NL      [\r\v\f\n]
93 /* White-space */
94 WSP     [\t\r\v\f\n ]
95
96 %%
97
98 <INITIAL>"\xef\xbb\xbf"         return UTF8_BOM;
99
100 -{3,}/[\r\n]    /* Immediately terminated long comment */
101 -{3,}/[^-\r\n]  yy_push_state(idash_comment);   /* Incorrect, but acceptable */
102 <idash_comment>{
103         -{3,}   yy_pop_state(); /* Acceptable end of comment */
104 }
105
106 --<[ \t]*ASN1C.RepresentAsPointer[ \t]*>--      asn1p_as_pointer = 1;
107
108 <extended_values>{
109     "#BIT STRING"    {
110         yy_pop_state();
111         return TOK_ExtValue_BIT_STRING;
112     }
113 }
114
115 <INITIAL,with_syntax>--         yy_push_state(dash_comment);
116 <dash_comment,idash_comment>{
117
118         {NL}    yy_pop_state();
119
120         --      yy_pop_state(); /* End of comment */
121         -       /* Eat single dash */
122         [^\r\v\f\n-]+   /* Eat */
123 }
124
125 <INITIAL,cpp_comment,with_syntax>"/*"           yy_push_state(cpp_comment);
126 <cpp_comment>{
127         [^*/<]  /* Eat */
128         "*/"    yy_pop_state();
129         .       /* Eat */
130 }
131
132
133         /*
134          * This is state is being set from corresponding .y module when
135          * higher-level data is necessary to make proper parsing of the
136          * underlying data. Thus, we enter the <opaque> state and save
137          * everything for later processing.
138          */
139 <opaque>{
140
141         "{"     {
142                         yy_push_state(opaque);
143                         asn1p_lval.tv_opaque.buf = strdup(yytext);
144                         asn1p_lval.tv_opaque.len = yyleng;
145                         return TOK_opaque;
146                 }
147
148         "}"     {
149                         yy_pop_state();
150                         asn1p_lval.tv_opaque.buf = strdup(yytext);
151                         asn1p_lval.tv_opaque.len = yyleng;
152                         return TOK_opaque;
153                 }
154
155         [^{}:=]+        {
156                         asn1p_lval.tv_opaque.buf = strdup(yytext);
157                         asn1p_lval.tv_opaque.len = yyleng;
158                         return TOK_opaque;
159                 }
160
161         "::="   {
162                         fprintf(stderr,
163                                 "ASN.1 Parser synchronization failure: "
164                                 "\"%s\" at line %d must not appear "
165                                 "inside value definition\n",
166                                 yytext, yylineno);
167                         return -1;
168                 }
169
170         [:=]    {
171                         asn1p_lval.tv_opaque.buf = strdup(yytext);
172                         asn1p_lval.tv_opaque.len = yyleng;
173                         return TOK_opaque;
174                 }
175
176         }
177
178 \"[^\"]*                {
179                         asn1p_lval.tv_opaque.buf = 0;
180                         asn1p_lval.tv_opaque.len = 0;
181                         QAPPEND(yytext+1, yyleng-1);
182                         yy_push_state(quoted);
183                 }
184 <quoted>{
185
186         \"\"    { QAPPEND(yytext, yyleng-1); }  /* Add a single quote */
187         [^\"]+  { QAPPEND(yytext, yyleng); }
188
189         \"      {
190                         yy_pop_state();
191                         /* Do not append last quote:
192                         // QAPPEND(yytext, yyleng); */
193
194                         if(asn1p_lexer_pedantic_1990
195                         && strchr(yytext, '\n')) {
196                                 fprintf(stderr, "%s: "
197                                 "Newlines are prohibited by ASN.1:1990\n",
198                                 asn1p_lval.tv_opaque.buf);
199                                 return -1;
200                         }
201
202                         return TOK_cstring;
203                 }
204
205         }
206
207 <encoding_control>{
208         ENCODING-CONTROL        {
209                         const char *s = "ENCODING-CONTROL";
210                         const char *p = s + sizeof("ENCODING-CONTROL") - 2;
211                         for(; p >= s; p--) unput(*p);
212                         yy_pop_state();
213                 }
214         END     unput('D'); unput('N'); unput('E'); yy_pop_state();
215         [^{} \t\r\v\f\n]+
216         [[:alnum:]]+
217         .       /* Eat everything else */
218         "\n"
219         }
220
221 '[0-9A-F \t\r\v\f\n]+'H {
222                 /* " \t\r\n" weren't allowed in ASN.1:1990. */
223                 asn1p_lval.tv_str = strdup(yytext);
224                 return TOK_hstring;
225         }
226
227 '[01 \t\r\v\f\n]+'B     {
228                 /* " \t\r\n" weren't allowed in ASN.1:1990. */
229                 asn1p_lval.tv_str = strdup(yytext);
230                 return TOK_bstring;
231         }
232
233
234 -[1-9][0-9]*    {
235                 asn1p_lval.a_int = _lex_atoi(yytext);
236                 if(errno == ERANGE)
237                         return -1;
238                 return TOK_number_negative;
239         }
240
241 [1-9][0-9]*     {
242                 asn1p_lval.a_int = _lex_atoi(yytext);
243                 if(errno == ERANGE)
244                         return -1;
245                 return TOK_number;
246         }
247
248 "0"     {
249                 asn1p_lval.a_int = _lex_atoi(yytext);
250                 if(errno == ERANGE)
251                         return -1;
252                 return TOK_number;
253         }
254
255 [-+]?[0-9]+[.]?([eE][-+]?)?[0-9]+ {
256                 asn1p_lval.a_dbl = _lex_atod(yytext);
257                 if(errno == ERANGE)
258                         return -1;
259                 return TOK_realnumber;
260         }
261
262 ABSENT                  return TOK_ABSENT;
263 ALL                     return TOK_ALL;
264 ANY                     {
265                                 /* Appeared in 1990, removed in 1997 */
266                                 if(TYPE_LIFETIME(1990, 1997))
267                                         return TOK_ANY; 
268                                 fprintf(stderr, "Keyword \"%s\" at line %d "
269                                         "is obsolete\n", yytext, yylineno);
270                                 REJECT;
271                         }
272 APPLICATION             return TOK_APPLICATION;
273 AUTOMATIC               return TOK_AUTOMATIC;
274 BEGIN                   {
275         if(asn1p_lexer_extended_values) {
276             yy_push_state(extended_values);
277         }
278         return TOK_BEGIN;
279     }
280 BIT                     return TOK_BIT;
281 BMPString               {
282                                 if(TYPE_LIFETIME(1994, 0))
283                                         return TOK_BMPString;
284                                 REJECT;
285                         }
286 BOOLEAN                 return TOK_BOOLEAN;
287 BY                      return TOK_BY;
288 CHARACTER               return TOK_CHARACTER;
289 CHOICE                  return TOK_CHOICE;
290 CLASS                   return TOK_CLASS;
291 COMPONENT               return TOK_COMPONENT;
292 COMPONENTS              return TOK_COMPONENTS;
293 CONSTRAINED             return TOK_CONSTRAINED;
294 CONTAINING              return TOK_CONTAINING;
295 DEFAULT                 return TOK_DEFAULT;
296 DEFINED                 {
297                                 /* Appeared in 1990, removed in 1997 */
298                                 if(TYPE_LIFETIME(1990, 1997))
299                                         return TOK_DEFINED;
300                                 fprintf(stderr, "Keyword \"%s\" at line %d "
301                                         "is obsolete\n", yytext, yylineno);
302                                 /* Deprecated since */
303                                 REJECT;
304                         }
305 DEFINITIONS             return TOK_DEFINITIONS;
306 EMBEDDED                return TOK_EMBEDDED;
307 ENCODED                 return TOK_ENCODED;
308 ENCODING-CONTROL        return TOK_ENCODING_CONTROL;
309 END                     {
310                     if(YYSTATE == extended_values) {
311                 yy_pop_state();
312             }
313             return TOK_END;
314         }
315 ENUMERATED              return TOK_ENUMERATED;
316 EXCEPT                  return TOK_EXCEPT;
317 EXPLICIT                return TOK_EXPLICIT;
318 EXPORTS                 return TOK_EXPORTS;
319 EXTENSIBILITY           return TOK_EXTENSIBILITY;
320 EXTERNAL                return TOK_EXTERNAL;
321 FALSE                   return TOK_FALSE;
322 FROM                    return TOK_FROM;
323 GeneralizedTime         return TOK_GeneralizedTime;
324 GeneralString           return TOK_GeneralString;
325 GraphicString           return TOK_GraphicString;
326 IA5String               return TOK_IA5String;
327 IDENTIFIER              return TOK_IDENTIFIER;
328 IMPLICIT                return TOK_IMPLICIT;
329 IMPLIED                 return TOK_IMPLIED;
330 IMPORTS                 return TOK_IMPORTS;
331 INCLUDES                return TOK_INCLUDES;
332 INSTANCE                return TOK_INSTANCE;
333 INSTRUCTIONS            return TOK_INSTRUCTIONS;
334 INTEGER                 return TOK_INTEGER;
335 INTERSECTION            return TOK_INTERSECTION;
336 ISO646String            return TOK_ISO646String;
337 MAX                     return TOK_MAX;
338 MIN                     return TOK_MIN;
339 MINUS-INFINITY          return TOK_MINUS_INFINITY;
340 NULL                    return TOK_NULL;
341 NumericString           return TOK_NumericString;
342 OBJECT                  return TOK_OBJECT;
343 ObjectDescriptor        return TOK_ObjectDescriptor;
344 OCTET                   return TOK_OCTET;
345 OF                      return TOK_OF;
346 OPTIONAL                return TOK_OPTIONAL;
347 PATTERN                 return TOK_PATTERN;
348 PDV                     return TOK_PDV;
349 PLUS-INFINITY           return TOK_PLUS_INFINITY;
350 PRESENT                 return TOK_PRESENT;
351 PrintableString         return TOK_PrintableString;
352 PRIVATE                 return TOK_PRIVATE;
353 REAL                    return TOK_REAL;
354 RELATIVE-OID            return TOK_RELATIVE_OID;
355 SEQUENCE                return TOK_SEQUENCE;
356 SET                     return TOK_SET;
357 SIZE                    return TOK_SIZE;
358 STRING                  return TOK_STRING;
359 SYNTAX                  return TOK_SYNTAX;
360 T61String               return TOK_T61String;
361 TAGS                    return TOK_TAGS;
362 TeletexString           return TOK_TeletexString;
363 TRUE                    return TOK_TRUE;
364 UNION                   return TOK_UNION;
365 UNIQUE                  return TOK_UNIQUE;
366 UNIVERSAL               return TOK_UNIVERSAL;
367 UniversalString         {
368                                 if(TYPE_LIFETIME(1994, 0))
369                                         return TOK_UniversalString;
370                                 REJECT;
371                         }
372 UTCTime                 return TOK_UTCTime;
373 UTF8String              {
374                                 if(TYPE_LIFETIME(1994, 0))
375                                         return TOK_UTF8String;
376                                 REJECT;
377                         }
378 VideotexString          return TOK_VideotexString;
379 VisibleString           return TOK_VisibleString;
380 WITH                    return TOK_WITH;
381
382
383 <INITIAL,with_syntax>&[A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*       {
384                 asn1p_lval.tv_str = strdup(yytext);
385                 return TOK_typefieldreference;
386         }
387
388 <INITIAL,with_syntax>&[a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*       {
389                 asn1p_lval.tv_str = strdup(yytext);
390                 return TOK_valuefieldreference;
391         }
392
393
394 [a-z][a-zA-Z0-9]*([-][a-zA-Z0-9]+)*     {
395                 asn1p_lval.tv_str = strdup(yytext);
396                 return TOK_identifier;
397         }
398
399         /*
400          * objectclassreference
401          */
402 <INITIAL,extended_values>[A-Z][A-Z0-9]*([-][A-Z0-9]+)*  {
403                 asn1p_lval.tv_str = strdup(yytext);
404                 return TOK_capitalreference;
405         }
406
407         /*
408          * typereference, modulereference
409          * NOTE: TOK_objectclassreference must be combined
410          * with this token to produce true typereference.
411          */
412 [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*     {
413                 asn1p_lval.tv_str = strdup(yytext);
414                 return TOK_typereference;
415         }
416
417 <INITIAL,extended_values>"::="          return TOK_PPEQ;
418
419 "..."           return TOK_ThreeDots;
420 ".."            return TOK_TwoDots;
421
422 <with_syntax>{
423
424         [A-Z][A-Za-z0-9]*([-][A-Za-z0-9]+)*     {
425                                 asn1p_lval.tv_str = strdup(yytext);
426                                 return TOK_Literal;
427                         }
428
429         ","             {
430                                 asn1p_lval.tv_str = strdup(yytext);
431                                 return TOK_Literal;
432                         }
433
434         "{"             {
435                                 yy_push_state(with_syntax);
436                                 asn1p_lval.tv_str = strdup(yytext);
437                                 return TOK_Literal;
438                         }
439
440         "["             return '[';
441         "]"             return ']';
442
443         {WSP}+          {
444                         asn1p_lval.tv_opaque.buf = strdup(yytext);
445                         asn1p_lval.tv_opaque.len = yyleng;
446                         return TOK_whitespace;
447                         }
448
449         "}"             {
450                                 yy_pop_state();
451                                 if(YYSTATE == with_syntax) {
452                                         asn1p_lval.tv_str = strdup(yytext);
453                                         return TOK_Literal;
454                                 } else {
455                                         return '}';
456                                 }
457                         }
458
459 }
460
461
462 <INITIAL,extended_values>{WSP}+ /* Ignore whitespace */
463
464
465 [{][\t\r\v\f\n ]*[0-7][,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}]  {
466                 asn1c_integer_t v1 = -1, v2 = -1;
467                 char *p;
468                 for(p = yytext; *p; p++)
469                         if(*p >= '0' && *p <= '9')
470                         { v1 = _lex_atoi(p); break; }
471                 while(*p >= '0' && *p <= '9') p++;      /* Skip digits */
472                 for(; *p; p++) if(*p >= '0' && *p <= '9')
473                         { v2 = _lex_atoi(p); break; }
474                 if(v1 < 0 || v1 > 7) {
475                         fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
476                                 "mandates 0..7 range for Tuple's TableColumn\n",
477                                 yytext, yylineno);
478                         return -1;
479                 }
480                 if(v2 < 0 || v2 > 15) {
481                         fprintf(stderr, "%s at line %d: X.680:2003, #37.14 "
482                                 "mandates 0..15 range for Tuple's TableRow\n",
483                                 yytext, yylineno);
484                         return -1;
485                 }
486                 asn1p_lval.a_int = (v1 << 4) + v2;
487                 return TOK_tuple;
488         }
489
490 [{][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[,][\t\r\v\f\n ]*[0-9]+[\t\r\v\f\n ]*[}]   {
491                 asn1c_integer_t v1 = -1, v2 = -1, v3 = -1, v4 = -1;
492                 char *p;
493                 for(p = yytext; *p; p++)
494                         if(*p >= '0' && *p <= '9')
495                         { v1 = _lex_atoi(p); break; }
496                 while(*p >= '0' && *p <= '9') p++;      /* Skip digits */
497                 for(; *p; p++) if(*p >= '0' && *p <= '9')
498                         { v2 = _lex_atoi(p); break; }
499                 while(*p >= '0' && *p <= '9') p++;
500                 for(; *p; p++) if(*p >= '0' && *p <= '9')
501                         { v3 = _lex_atoi(p); break; }
502                 while(*p >= '0' && *p <= '9') p++;
503                 for(; *p; p++) if(*p >= '0' && *p <= '9')
504                         { v4 = _lex_atoi(p); break; }
505                 if(v1 < 0 || v1 > 127) {
506                         fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
507                                 "mandates 0..127 range for Quadruple's Group\n",
508                                 yytext, yylineno);
509                         return -1;
510                 }
511                 if(v2 < 0 || v2 > 255) {
512                         fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
513                                 "mandates 0..255 range for Quadruple's Plane\n",
514                                 yytext, yylineno);
515                         return -1;
516                 }
517                 if(v3 < 0 || v3 > 255) {
518                         fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
519                                 "mandates 0..255 range for Quadruple's Row\n",
520                                 yytext, yylineno);
521                         return -1;
522                 }
523                 if(v4 < 0 || v4 > 255) {
524                         fprintf(stderr, "%s at line %d: X.680:2003, #37.12 "
525                                 "mandates 0..255 range for Quadruple's Cell\n",
526                                 yytext, yylineno);
527                         return -1;
528                 }
529                 asn1p_lval.a_int = (v1 << 24) | (v2 << 16) | (v3 << 8) | v4;
530                 return TOK_quadruple;
531         }
532
533
534 "[["        return TOK_VBracketLeft;
535 "]]"        return TOK_VBracketRight;
536
537 [(){},;:|!.&@\[\]^]     return yytext[0];
538
539 [^A-Za-z0-9:=,{}<.@()[]'\"|&^*;!-] {
540                 if(TYPE_LIFETIME(1994, 0))
541                         fprintf(stderr, "ERROR: ");
542                 fprintf(stderr,
543                 "Symbol '%c' at line %d is prohibited "
544                 "by ASN.1:1994 and ASN.1:1997\n",
545                         yytext[0], yylineno);
546                 if(TYPE_LIFETIME(1994, 0))
547                         return -1;
548         }
549
550 <*>.    {
551                 fprintf(stderr,
552                         "Unexpected token at line %d: \"%s\"\n",
553                         yylineno, yytext);
554                 while(YYSTATE != INITIAL)
555                         yy_pop_state();
556                 if(0) {
557                         yy_top_state(); /* Just to use this function. */
558                         yy_fatal_error("Parse error");
559                 }
560                 return -1;
561 }
562
563 <*><<EOF>>      {
564                 while(YYSTATE != INITIAL)
565                         yy_pop_state();
566                 yyterminate();
567         }
568
569
570 %%
571
572 /*
573  * Very dirty but wonderful hack allowing to rule states from within .y file.
574  */
575 void asn1p_lexer_hack_push_opaque_state() { yy_push_state(opaque); }
576
577 /*
578  * Another hack which disables recognizing some tokens when inside WITH SYNTAX.
579  */
580 void asn1p_lexer_hack_enable_with_syntax() { yy_push_state(with_syntax); }
581
582 /* Yet another */
583 void asn1p_lexer_hack_push_encoding_control() {
584         yy_push_state(encoding_control);
585 }
586
587 static asn1c_integer_t
588 _lex_atoi(const char *ptr) {
589         asn1c_integer_t value;
590         if(asn1p_atoi(ptr, &value)) {
591                 fprintf(stderr,
592                         "Value \"%s\" at line %d is too large "
593                         "for this compiler! Please contact the asn1c author.\n",
594                         ptr, yylineno);
595                 errno = ERANGE;
596         }
597         return value;
598 }
599
600 static double
601 _lex_atod(const char *ptr) {
602         double value;
603         errno = 0;
604         value = strtod(ptr, 0);
605         if(errno) {
606                 fprintf(stderr,
607                         "Value \"%s\" at line %d is outside of `double` range "
608                         "in this compiler! Please contact the asn1c author.\n",
609                         ptr, yylineno);
610                 errno = ERANGE;
611         }
612         return value;
613 }
614