1 /* Lexical analysis for genksyms. 2 Copyright 1996, 1997 Linux International. 3 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 7 Taken from Linux modutils 2.4.22. 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2 of the License, or (at your 12 option) any later version. 13 14 This program is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software Foundation, 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 24 %{ 25 26 #include <limits.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <ctype.h> 30 31 #include "genksyms.h" 32 #include "parse.tab.h" 33 34 /* We've got a two-level lexer here. We let flex do basic tokenization 35 and then we categorize those basic tokens in the second stage. */ 36 #define YY_DECL static int yylex1(void) 37 38 %} 39 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 41 42 O_INT 0[0-7]* 43 D_INT [1-9][0-9]* 44 X_INT 0[Xx][0-9A-Fa-f]+ 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 47 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 49 EXP [Ee][+-]?[0-9]+ 50 F_SUF [FfLl] 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 52 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 55 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 57 58 /* We don't do multiple input files. */ 59 %option noyywrap 60 61 %option noinput 62 63 %% 64 65 66 /* Keep track of our location in the original source files. */ 67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 68 ^#.*\n cur_line++; 69 \n cur_line++; 70 71 /* Ignore all other whitespace. */ 72 [ \t\f\v\r]+ ; 73 74 75 {STRING} return STRING; 76 {CHAR} return CHAR; 77 {IDENT} return IDENT; 78 79 /* The Pedant requires that the other C multi-character tokens be 80 recognized as tokens. We don't actually use them since we don't 81 parse expressions, but we do want whitespace to be arranged 82 around them properly. */ 83 {MC_TOKEN} return OTHER; 84 {INT} return INT; 85 {REAL} return REAL; 86 87 "..." return DOTS; 88 89 /* All other tokens are single characters. */ 90 . return yytext[0]; 91 92 93 %% 94 95 /* Bring in the keyword recognizer. */ 96 97 #include "keywords.hash.c" 98 99 100 /* Macros to append to our phrase collection list. */ 101 102 /* 103 * We mark any token, that that equals to a known enumerator, as 104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 105 * the only problem is struct and union members: 106 * enum e { a, b }; struct s { int a, b; } 107 * but in this case, the only effect will be, that the ABI checksums become 108 * more volatile, which is acceptable. Also, such collisions are quite rare, 109 * so far it was only observed in include/linux/telephony.h. 110 */ 111 #define _APP(T,L) do { \ 112 cur_node = next_node; \ 113 next_node = xmalloc(sizeof(*next_node)); \ 114 next_node->next = cur_node; \ 115 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 116 cur_node->tag = \ 117 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 118 SYM_ENUM_CONST : SYM_NORMAL ; \ 119 } while (0) 120 121 #define APP _APP(yytext, yyleng) 122 123 124 /* The second stage lexer. Here we incorporate knowledge of the state 125 of the parser to tailor the tokens that are returned. */ 126 127 int 128 yylex(void) 129 { 130 static enum { 131 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 132 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 133 ST_TABLE_5, ST_TABLE_6 134 } lexstate = ST_NOTSTARTED; 135 136 static int suppress_type_lookup, dont_want_brace_phrase; 137 static struct string_list *next_node; 138 139 int token, count = 0; 140 struct string_list *cur_node; 141 142 if (lexstate == ST_NOTSTARTED) 143 { 144 next_node = xmalloc(sizeof(*next_node)); 145 next_node->next = NULL; 146 lexstate = ST_NORMAL; 147 } 148 149 repeat: 150 token = yylex1(); 151 152 if (token == 0) 153 return 0; 154 else if (token == FILENAME) 155 { 156 char *file, *e; 157 158 /* Save the filename and line number for later error messages. */ 159 160 if (cur_filename) 161 free(cur_filename); 162 163 file = strchr(yytext, '\"')+1; 164 e = strchr(file, '\"'); 165 *e = '\0'; 166 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 167 cur_line = atoi(yytext+2); 168 169 goto repeat; 170 } 171 172 switch (lexstate) 173 { 174 case ST_NORMAL: 175 switch (token) 176 { 177 case IDENT: 178 APP; 179 { 180 const struct resword *r = is_reserved_word(yytext, yyleng); 181 if (r) 182 { 183 switch (token = r->token) 184 { 185 case ATTRIBUTE_KEYW: 186 lexstate = ST_ATTRIBUTE; 187 count = 0; 188 goto repeat; 189 case ASM_KEYW: 190 lexstate = ST_ASM; 191 count = 0; 192 goto repeat; 193 194 case STRUCT_KEYW: 195 case UNION_KEYW: 196 case ENUM_KEYW: 197 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 199 goto fini; 200 201 case EXPORT_SYMBOL_KEYW: 202 goto fini; 203 } 204 } 205 if (!suppress_type_lookup) 206 { 207 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 208 token = TYPE; 209 } 210 } 211 break; 212 213 case '[': 214 APP; 215 lexstate = ST_BRACKET; 216 count = 1; 217 goto repeat; 218 219 case '{': 220 APP; 221 if (dont_want_brace_phrase) 222 break; 223 lexstate = ST_BRACE; 224 count = 1; 225 goto repeat; 226 227 case '=': case ':': 228 APP; 229 lexstate = ST_EXPRESSION; 230 break; 231 232 case DOTS: 233 default: 234 APP; 235 break; 236 } 237 break; 238 239 case ST_ATTRIBUTE: 240 APP; 241 switch (token) 242 { 243 case '(': 244 ++count; 245 goto repeat; 246 case ')': 247 if (--count == 0) 248 { 249 lexstate = ST_NORMAL; 250 token = ATTRIBUTE_PHRASE; 251 break; 252 } 253 goto repeat; 254 default: 255 goto repeat; 256 } 257 break; 258 259 case ST_ASM: 260 APP; 261 switch (token) 262 { 263 case '(': 264 ++count; 265 goto repeat; 266 case ')': 267 if (--count == 0) 268 { 269 lexstate = ST_NORMAL; 270 token = ASM_PHRASE; 271 break; 272 } 273 goto repeat; 274 default: 275 goto repeat; 276 } 277 break; 278 279 case ST_BRACKET: 280 APP; 281 switch (token) 282 { 283 case '[': 284 ++count; 285 goto repeat; 286 case ']': 287 if (--count == 0) 288 { 289 lexstate = ST_NORMAL; 290 token = BRACKET_PHRASE; 291 break; 292 } 293 goto repeat; 294 default: 295 goto repeat; 296 } 297 break; 298 299 case ST_BRACE: 300 APP; 301 switch (token) 302 { 303 case '{': 304 ++count; 305 goto repeat; 306 case '}': 307 if (--count == 0) 308 { 309 lexstate = ST_NORMAL; 310 token = BRACE_PHRASE; 311 break; 312 } 313 goto repeat; 314 default: 315 goto repeat; 316 } 317 break; 318 319 case ST_EXPRESSION: 320 switch (token) 321 { 322 case '(': case '[': case '{': 323 ++count; 324 APP; 325 goto repeat; 326 case '}': 327 /* is this the last line of an enum declaration? */ 328 if (count == 0) 329 { 330 /* Put back the token we just read so's we can find it again 331 after registering the expression. */ 332 unput(token); 333 334 lexstate = ST_NORMAL; 335 token = EXPRESSION_PHRASE; 336 break; 337 } 338 /* FALLTHRU */ 339 case ')': case ']': 340 --count; 341 APP; 342 goto repeat; 343 case ',': case ';': 344 if (count == 0) 345 { 346 /* Put back the token we just read so's we can find it again 347 after registering the expression. */ 348 unput(token); 349 350 lexstate = ST_NORMAL; 351 token = EXPRESSION_PHRASE; 352 break; 353 } 354 APP; 355 goto repeat; 356 default: 357 APP; 358 goto repeat; 359 } 360 break; 361 362 case ST_TABLE_1: 363 goto repeat; 364 365 case ST_TABLE_2: 366 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 367 { 368 token = EXPORT_SYMBOL_KEYW; 369 lexstate = ST_TABLE_5; 370 APP; 371 break; 372 } 373 lexstate = ST_TABLE_6; 374 /* FALLTHRU */ 375 376 case ST_TABLE_6: 377 switch (token) 378 { 379 case '{': case '[': case '(': 380 ++count; 381 break; 382 case '}': case ']': case ')': 383 --count; 384 break; 385 case ',': 386 if (count == 0) 387 lexstate = ST_TABLE_2; 388 break; 389 }; 390 goto repeat; 391 392 case ST_TABLE_3: 393 goto repeat; 394 395 case ST_TABLE_4: 396 if (token == ';') 397 lexstate = ST_NORMAL; 398 goto repeat; 399 400 case ST_TABLE_5: 401 switch (token) 402 { 403 case ',': 404 token = ';'; 405 lexstate = ST_TABLE_2; 406 APP; 407 break; 408 default: 409 APP; 410 break; 411 } 412 break; 413 414 default: 415 exit(1); 416 } 417 fini: 418 419 if (suppress_type_lookup > 0) 420 --suppress_type_lookup; 421 if (dont_want_brace_phrase > 0) 422 --dont_want_brace_phrase; 423 424 yylval = &next_node->next; 425 426 return token; 427 } 428