1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 5 * 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 9 * Taken from Linux modutils 2.4.22. 10 */ 11 12 %{ 13 14 #include <limits.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <ctype.h> 18 19 #include "genksyms.h" 20 #include "parse.tab.h" 21 22 /* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens in the second stage. */ 24 #define YY_DECL static int yylex1(void) 25 26 %} 27 28 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 30 O_INT 0[0-7]* 31 D_INT [1-9][0-9]* 32 X_INT 0[Xx][0-9A-Fa-f]+ 33 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 36 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37 EXP [Ee][+-]?[0-9]+ 38 F_SUF [FfLl] 39 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 41 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 44 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 46 /* We don't do multiple input files. */ 47 %option noyywrap 48 49 %option noinput 50 51 %% 52 53 54 /* Keep track of our location in the original source files. */ 55 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56 ^#.*\n cur_line++; 57 \n cur_line++; 58 59 /* Ignore all other whitespace. */ 60 [ \t\f\v\r]+ ; 61 62 63 {STRING} return STRING; 64 {CHAR} return CHAR; 65 {IDENT} return IDENT; 66 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 71 {MC_TOKEN} return OTHER; 72 {INT} return INT; 73 {REAL} return REAL; 74 75 "..." return DOTS; 76 77 /* All other tokens are single characters. */ 78 . return yytext[0]; 79 80 81 %% 82 83 /* Bring in the keyword recognizer. */ 84 85 #include "keywords.c" 86 87 88 /* Macros to append to our phrase collection list. */ 89 90 /* 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linux/telephony.h. 98 */ 99 #define _APP(T,L) do { \ 100 cur_node = next_node; \ 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cur_node; \ 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = \ 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 109 110 #define APP _APP(yytext, yyleng) 111 112 113 /* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are returned. */ 115 116 int 117 yylex(void) 118 { 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, 122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 123 ST_TABLE_5, ST_TABLE_6 124 } lexstate = ST_NOTSTARTED; 125 126 static int suppress_type_lookup, dont_want_brace_phrase; 127 static struct string_list *next_node; 128 129 int token, count = 0; 130 struct string_list *cur_node; 131 132 if (lexstate == ST_NOTSTARTED) 133 { 134 next_node = xmalloc(sizeof(*next_node)); 135 next_node->next = NULL; 136 lexstate = ST_NORMAL; 137 } 138 139 repeat: 140 token = yylex1(); 141 142 if (token == 0) 143 return 0; 144 else if (token == FILENAME) 145 { 146 char *file, *e; 147 148 /* Save the filename and line number for later error messages. */ 149 150 if (cur_filename) 151 free(cur_filename); 152 153 file = strchr(yytext, '\"')+1; 154 e = strchr(file, '\"'); 155 *e = '\0'; 156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 157 cur_line = atoi(yytext+2); 158 159 if (!source_file) { 160 source_file = xstrdup(cur_filename); 161 in_source_file = 1; 162 } else { 163 in_source_file = (strcmp(cur_filename, source_file) == 0); 164 } 165 166 goto repeat; 167 } 168 169 switch (lexstate) 170 { 171 case ST_NORMAL: 172 switch (token) 173 { 174 case IDENT: 175 APP; 176 { 177 int r = is_reserved_word(yytext, yyleng); 178 if (r >= 0) 179 { 180 switch (token = r) 181 { 182 case ATTRIBUTE_KEYW: 183 lexstate = ST_ATTRIBUTE; 184 count = 0; 185 goto repeat; 186 case ASM_KEYW: 187 lexstate = ST_ASM; 188 count = 0; 189 goto repeat; 190 case TYPEOF_KEYW: 191 lexstate = ST_TYPEOF; 192 count = 0; 193 goto repeat; 194 195 case STRUCT_KEYW: 196 case UNION_KEYW: 197 case ENUM_KEYW: 198 dont_want_brace_phrase = 3; 199 suppress_type_lookup = 2; 200 goto fini; 201 202 case EXPORT_SYMBOL_KEYW: 203 goto fini; 204 } 205 } 206 if (!suppress_type_lookup) 207 { 208 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 209 token = TYPE; 210 } 211 } 212 break; 213 214 case '[': 215 APP; 216 lexstate = ST_BRACKET; 217 count = 1; 218 goto repeat; 219 220 case '{': 221 APP; 222 if (dont_want_brace_phrase) 223 break; 224 lexstate = ST_BRACE; 225 count = 1; 226 goto repeat; 227 228 case '=': case ':': 229 APP; 230 lexstate = ST_EXPRESSION; 231 break; 232 233 case DOTS: 234 default: 235 APP; 236 break; 237 } 238 break; 239 240 case ST_ATTRIBUTE: 241 APP; 242 switch (token) 243 { 244 case '(': 245 ++count; 246 goto repeat; 247 case ')': 248 if (--count == 0) 249 { 250 lexstate = ST_NORMAL; 251 token = ATTRIBUTE_PHRASE; 252 break; 253 } 254 goto repeat; 255 default: 256 goto repeat; 257 } 258 break; 259 260 case ST_ASM: 261 APP; 262 switch (token) 263 { 264 case '(': 265 ++count; 266 goto repeat; 267 case ')': 268 if (--count == 0) 269 { 270 lexstate = ST_NORMAL; 271 token = ASM_PHRASE; 272 break; 273 } 274 goto repeat; 275 default: 276 goto repeat; 277 } 278 break; 279 280 case ST_TYPEOF_1: 281 if (token == IDENT) 282 { 283 if (is_reserved_word(yytext, yyleng) >= 0 284 || find_symbol(yytext, SYM_TYPEDEF, 1)) 285 { 286 yyless(0); 287 unput('('); 288 lexstate = ST_NORMAL; 289 token = TYPEOF_KEYW; 290 break; 291 } 292 _APP("(", 1); 293 } 294 lexstate = ST_TYPEOF; 295 /* FALLTHRU */ 296 297 case ST_TYPEOF: 298 switch (token) 299 { 300 case '(': 301 if ( ++count == 1 ) 302 lexstate = ST_TYPEOF_1; 303 else 304 APP; 305 goto repeat; 306 case ')': 307 APP; 308 if (--count == 0) 309 { 310 lexstate = ST_NORMAL; 311 token = TYPEOF_PHRASE; 312 break; 313 } 314 goto repeat; 315 default: 316 APP; 317 goto repeat; 318 } 319 break; 320 321 case ST_BRACKET: 322 APP; 323 switch (token) 324 { 325 case '[': 326 ++count; 327 goto repeat; 328 case ']': 329 if (--count == 0) 330 { 331 lexstate = ST_NORMAL; 332 token = BRACKET_PHRASE; 333 break; 334 } 335 goto repeat; 336 default: 337 goto repeat; 338 } 339 break; 340 341 case ST_BRACE: 342 APP; 343 switch (token) 344 { 345 case '{': 346 ++count; 347 goto repeat; 348 case '}': 349 if (--count == 0) 350 { 351 lexstate = ST_NORMAL; 352 token = BRACE_PHRASE; 353 break; 354 } 355 goto repeat; 356 default: 357 goto repeat; 358 } 359 break; 360 361 case ST_EXPRESSION: 362 switch (token) 363 { 364 case '(': case '[': case '{': 365 ++count; 366 APP; 367 goto repeat; 368 case '}': 369 /* is this the last line of an enum declaration? */ 370 if (count == 0) 371 { 372 /* Put back the token we just read so's we can find it again 373 after registering the expression. */ 374 unput(token); 375 376 lexstate = ST_NORMAL; 377 token = EXPRESSION_PHRASE; 378 break; 379 } 380 /* FALLTHRU */ 381 case ')': case ']': 382 --count; 383 APP; 384 goto repeat; 385 case ',': case ';': 386 if (count == 0) 387 { 388 /* Put back the token we just read so's we can find it again 389 after registering the expression. */ 390 unput(token); 391 392 lexstate = ST_NORMAL; 393 token = EXPRESSION_PHRASE; 394 break; 395 } 396 APP; 397 goto repeat; 398 default: 399 APP; 400 goto repeat; 401 } 402 break; 403 404 case ST_TABLE_1: 405 goto repeat; 406 407 case ST_TABLE_2: 408 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 409 { 410 token = EXPORT_SYMBOL_KEYW; 411 lexstate = ST_TABLE_5; 412 APP; 413 break; 414 } 415 lexstate = ST_TABLE_6; 416 /* FALLTHRU */ 417 418 case ST_TABLE_6: 419 switch (token) 420 { 421 case '{': case '[': case '(': 422 ++count; 423 break; 424 case '}': case ']': case ')': 425 --count; 426 break; 427 case ',': 428 if (count == 0) 429 lexstate = ST_TABLE_2; 430 break; 431 }; 432 goto repeat; 433 434 case ST_TABLE_3: 435 goto repeat; 436 437 case ST_TABLE_4: 438 if (token == ';') 439 lexstate = ST_NORMAL; 440 goto repeat; 441 442 case ST_TABLE_5: 443 switch (token) 444 { 445 case ',': 446 token = ';'; 447 lexstate = ST_TABLE_2; 448 APP; 449 break; 450 default: 451 APP; 452 break; 453 } 454 break; 455 456 default: 457 exit(1); 458 } 459 fini: 460 461 if (suppress_type_lookup > 0) 462 --suppress_type_lookup; 463 if (dont_want_brace_phrase > 0) 464 --dont_want_brace_phrase; 465 466 yylval = &next_node->next; 467 468 return token; 469 } 470