1 /* Lexical analysis for genksyms. 2 Copyright 1996, 1997 Linux International. 3 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 7 Taken from Linux modutils 2.4.22. 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2 of the License, or (at your 12 option) any later version. 13 14 This program is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software Foundation, 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 24 %{ 25 26 #include <limits.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <ctype.h> 30 31 #include "genksyms.h" 32 #include "parse.tab.h" 33 34 /* We've got a two-level lexer here. We let flex do basic tokenization 35 and then we categorize those basic tokens in the second stage. */ 36 #define YY_DECL static int yylex1(void) 37 38 %} 39 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 41 42 O_INT 0[0-7]* 43 D_INT [1-9][0-9]* 44 X_INT 0[Xx][0-9A-Fa-f]+ 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 47 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 49 EXP [Ee][+-]?[0-9]+ 50 F_SUF [FfLl] 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 52 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 55 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 57 58 /* We don't do multiple input files. */ 59 %option noyywrap 60 61 %option noinput 62 63 %% 64 65 66 /* Keep track of our location in the original source files. */ 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 68 ^#.*\n cur_line++; 69 \n cur_line++; 70 71 /* Ignore all other whitespace. */ 72 [ \t\f\v\r]+ ; 73 74 75 {STRING} return STRING; 76 {CHAR} return CHAR; 77 {IDENT} return IDENT; 78 79 /* The Pedant requires that the other C multi-character tokens be 80 recognized as tokens. We don't actually use them since we don't 81 parse expressions, but we do want whitespace to be arranged 82 around them properly. */ 83 {MC_TOKEN} return OTHER; 84 {INT} return INT; 85 {REAL} return REAL; 86 87 "..." return DOTS; 88 89 /* All other tokens are single characters. */ 90 . return yytext[0]; 91 92 93 %% 94 95 /* Bring in the keyword recognizer. */ 96 97 #include "keywords.c" 98 99 100 /* Macros to append to our phrase collection list. */ 101 102 /* 103 * We mark any token, that that equals to a known enumerator, as 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 105 * the only problem is struct and union members: 106 * enum e { a, b }; struct s { int a, b; } 107 * but in this case, the only effect will be, that the ABI checksums become 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 109 * so far it was only observed in include/linux/telephony.h. 110 */ 111 #define _APP(T,L) do { \ 112 cur_node = next_node; \ 113 next_node = xmalloc(sizeof(*next_node)); \ 114 next_node->next = cur_node; \ 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 116 cur_node->tag = \ 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 119 cur_node->in_source_file = in_source_file; \ 120 } while (0) 121 122 #define APP _APP(yytext, yyleng) 123 124 125 /* The second stage lexer. Here we incorporate knowledge of the state 126 of the parser to tailor the tokens that are returned. */ 127 128 int 129 yylex(void) 130 { 131 static enum { 132 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 133 ST_BRACKET, ST_BRACE, ST_EXPRESSION, 134 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 135 ST_TABLE_5, ST_TABLE_6 136 } lexstate = ST_NOTSTARTED; 137 138 static int suppress_type_lookup, dont_want_brace_phrase; 139 static struct string_list *next_node; 140 141 int token, count = 0; 142 struct string_list *cur_node; 143 144 if (lexstate == ST_NOTSTARTED) 145 { 146 next_node = xmalloc(sizeof(*next_node)); 147 next_node->next = NULL; 148 lexstate = ST_NORMAL; 149 } 150 151 repeat: 152 token = yylex1(); 153 154 if (token == 0) 155 return 0; 156 else if (token == FILENAME) 157 { 158 char *file, *e; 159 160 /* Save the filename and line number for later error messages. */ 161 162 if (cur_filename) 163 free(cur_filename); 164 165 file = strchr(yytext, '\"')+1; 166 e = strchr(file, '\"'); 167 *e = '\0'; 168 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 169 cur_line = atoi(yytext+2); 170 171 if (!source_file) { 172 source_file = xstrdup(cur_filename); 173 in_source_file = 1; 174 } else { 175 in_source_file = (strcmp(cur_filename, source_file) == 0); 176 } 177 178 goto repeat; 179 } 180 181 switch (lexstate) 182 { 183 case ST_NORMAL: 184 switch (token) 185 { 186 case IDENT: 187 APP; 188 { 189 int r = is_reserved_word(yytext, yyleng); 190 if (r >= 0) 191 { 192 switch (token = r) 193 { 194 case ATTRIBUTE_KEYW: 195 lexstate = ST_ATTRIBUTE; 196 count = 0; 197 goto repeat; 198 case ASM_KEYW: 199 lexstate = ST_ASM; 200 count = 0; 201 goto repeat; 202 case TYPEOF_KEYW: 203 lexstate = ST_TYPEOF; 204 count = 0; 205 goto repeat; 206 207 case STRUCT_KEYW: 208 case UNION_KEYW: 209 case ENUM_KEYW: 210 dont_want_brace_phrase = 3; 211 suppress_type_lookup = 2; 212 goto fini; 213 214 case EXPORT_SYMBOL_KEYW: 215 goto fini; 216 } 217 } 218 if (!suppress_type_lookup) 219 { 220 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 221 token = TYPE; 222 } 223 } 224 break; 225 226 case '[': 227 APP; 228 lexstate = ST_BRACKET; 229 count = 1; 230 goto repeat; 231 232 case '{': 233 APP; 234 if (dont_want_brace_phrase) 235 break; 236 lexstate = ST_BRACE; 237 count = 1; 238 goto repeat; 239 240 case '=': case ':': 241 APP; 242 lexstate = ST_EXPRESSION; 243 break; 244 245 case DOTS: 246 default: 247 APP; 248 break; 249 } 250 break; 251 252 case ST_ATTRIBUTE: 253 APP; 254 switch (token) 255 { 256 case '(': 257 ++count; 258 goto repeat; 259 case ')': 260 if (--count == 0) 261 { 262 lexstate = ST_NORMAL; 263 token = ATTRIBUTE_PHRASE; 264 break; 265 } 266 goto repeat; 267 default: 268 goto repeat; 269 } 270 break; 271 272 case ST_ASM: 273 APP; 274 switch (token) 275 { 276 case '(': 277 ++count; 278 goto repeat; 279 case ')': 280 if (--count == 0) 281 { 282 lexstate = ST_NORMAL; 283 token = ASM_PHRASE; 284 break; 285 } 286 goto repeat; 287 default: 288 goto repeat; 289 } 290 break; 291 292 case ST_TYPEOF_1: 293 if (token == IDENT) 294 { 295 if (is_reserved_word(yytext, yyleng) >= 0 296 || find_symbol(yytext, SYM_TYPEDEF, 1)) 297 { 298 yyless(0); 299 unput('('); 300 lexstate = ST_NORMAL; 301 token = TYPEOF_KEYW; 302 break; 303 } 304 _APP("(", 1); 305 } 306 lexstate = ST_TYPEOF; 307 /* FALLTHRU */ 308 309 case ST_TYPEOF: 310 switch (token) 311 { 312 case '(': 313 if ( ++count == 1 ) 314 lexstate = ST_TYPEOF_1; 315 else 316 APP; 317 goto repeat; 318 case ')': 319 APP; 320 if (--count == 0) 321 { 322 lexstate = ST_NORMAL; 323 token = TYPEOF_PHRASE; 324 break; 325 } 326 goto repeat; 327 default: 328 APP; 329 goto repeat; 330 } 331 break; 332 333 case ST_BRACKET: 334 APP; 335 switch (token) 336 { 337 case '[': 338 ++count; 339 goto repeat; 340 case ']': 341 if (--count == 0) 342 { 343 lexstate = ST_NORMAL; 344 token = BRACKET_PHRASE; 345 break; 346 } 347 goto repeat; 348 default: 349 goto repeat; 350 } 351 break; 352 353 case ST_BRACE: 354 APP; 355 switch (token) 356 { 357 case '{': 358 ++count; 359 goto repeat; 360 case '}': 361 if (--count == 0) 362 { 363 lexstate = ST_NORMAL; 364 token = BRACE_PHRASE; 365 break; 366 } 367 goto repeat; 368 default: 369 goto repeat; 370 } 371 break; 372 373 case ST_EXPRESSION: 374 switch (token) 375 { 376 case '(': case '[': case '{': 377 ++count; 378 APP; 379 goto repeat; 380 case '}': 381 /* is this the last line of an enum declaration? */ 382 if (count == 0) 383 { 384 /* Put back the token we just read so's we can find it again 385 after registering the expression. */ 386 unput(token); 387 388 lexstate = ST_NORMAL; 389 token = EXPRESSION_PHRASE; 390 break; 391 } 392 /* FALLTHRU */ 393 case ')': case ']': 394 --count; 395 APP; 396 goto repeat; 397 case ',': case ';': 398 if (count == 0) 399 { 400 /* Put back the token we just read so's we can find it again 401 after registering the expression. */ 402 unput(token); 403 404 lexstate = ST_NORMAL; 405 token = EXPRESSION_PHRASE; 406 break; 407 } 408 APP; 409 goto repeat; 410 default: 411 APP; 412 goto repeat; 413 } 414 break; 415 416 case ST_TABLE_1: 417 goto repeat; 418 419 case ST_TABLE_2: 420 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 421 { 422 token = EXPORT_SYMBOL_KEYW; 423 lexstate = ST_TABLE_5; 424 APP; 425 break; 426 } 427 lexstate = ST_TABLE_6; 428 /* FALLTHRU */ 429 430 case ST_TABLE_6: 431 switch (token) 432 { 433 case '{': case '[': case '(': 434 ++count; 435 break; 436 case '}': case ']': case ')': 437 --count; 438 break; 439 case ',': 440 if (count == 0) 441 lexstate = ST_TABLE_2; 442 break; 443 }; 444 goto repeat; 445 446 case ST_TABLE_3: 447 goto repeat; 448 449 case ST_TABLE_4: 450 if (token == ';') 451 lexstate = ST_NORMAL; 452 goto repeat; 453 454 case ST_TABLE_5: 455 switch (token) 456 { 457 case ',': 458 token = ';'; 459 lexstate = ST_TABLE_2; 460 APP; 461 break; 462 default: 463 APP; 464 break; 465 } 466 break; 467 468 default: 469 exit(1); 470 } 471 fini: 472 473 if (suppress_type_lookup > 0) 474 --suppress_type_lookup; 475 if (dont_want_brace_phrase > 0) 476 --dont_want_brace_phrase; 477 478 yylval = &next_node->next; 479 480 return token; 481 } 482