1 /* Lexical analysis for genksyms. 2 Copyright 1996, 1997 Linux International. 3 4 New implementation contributed by Richard Henderson <rth@tamu.edu> 5 Based on original work by Bjorn Ekwall <bj0rn@blox.se> 6 7 Taken from Linux modutils 2.4.22. 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2 of the License, or (at your 12 option) any later version. 13 14 This program is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software Foundation, 21 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 24 %{ 25 26 #include <limits.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <ctype.h> 30 31 #include "genksyms.h" 32 #include "parse.h" 33 34 /* We've got a two-level lexer here. We let flex do basic tokenization 35 and then we categorize those basic tokens in the second stage. */ 36 #define YY_DECL static int yylex1(void) 37 38 %} 39 40 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 41 42 O_INT 0[0-7]* 43 D_INT [1-9][0-9]* 44 X_INT 0[Xx][0-9A-Fa-f]+ 45 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 46 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 47 48 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 49 EXP [Ee][+-]?[0-9]+ 50 F_SUF [FfLl] 51 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 52 53 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 54 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 55 56 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 57 58 /* Version 2 checksumming does proper tokenization; version 1 wasn't 59 quite so pedantic. */ 60 %s V2_TOKENS 61 62 /* We don't do multiple input files. */ 63 %option noyywrap 64 65 %% 66 67 68 /* Keep track of our location in the original source files. */ 69 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 70 ^#.*\n cur_line++; 71 \n cur_line++; 72 73 /* Ignore all other whitespace. */ 74 [ \t\f\v\r]+ ; 75 76 77 {STRING} return STRING; 78 {CHAR} return CHAR; 79 {IDENT} return IDENT; 80 81 /* The Pedant requires that the other C multi-character tokens be 82 recognized as tokens. We don't actually use them since we don't 83 parse expressions, but we do want whitespace to be arranged 84 around them properly. */ 85 <V2_TOKENS>{MC_TOKEN} return OTHER; 86 <V2_TOKENS>{INT} return INT; 87 <V2_TOKENS>{REAL} return REAL; 88 89 "..." return DOTS; 90 91 /* All other tokens are single characters. */ 92 . return yytext[0]; 93 94 95 %% 96 97 /* Bring in the keyword recognizer. */ 98 99 #include "keywords.c" 100 101 102 /* Macros to append to our phrase collection list. */ 103 104 #define _APP(T,L) do { \ 105 cur_node = next_node; \ 106 next_node = xmalloc(sizeof(*next_node)); \ 107 next_node->next = cur_node; \ 108 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 109 cur_node->tag = SYM_NORMAL; \ 110 } while (0) 111 112 #define APP _APP(yytext, yyleng) 113 114 115 /* The second stage lexer. Here we incorporate knowledge of the state 116 of the parser to tailor the tokens that are returned. */ 117 118 int 119 yylex(void) 120 { 121 static enum { 122 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE, 123 ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 124 ST_TABLE_5, ST_TABLE_6 125 } lexstate = ST_NOTSTARTED; 126 127 static int suppress_type_lookup, dont_want_brace_phrase; 128 static struct string_list *next_node; 129 130 int token, count = 0; 131 struct string_list *cur_node; 132 133 if (lexstate == ST_NOTSTARTED) 134 { 135 BEGIN(V2_TOKENS); 136 next_node = xmalloc(sizeof(*next_node)); 137 next_node->next = NULL; 138 lexstate = ST_NORMAL; 139 } 140 141 repeat: 142 token = yylex1(); 143 144 if (token == 0) 145 return 0; 146 else if (token == FILENAME) 147 { 148 char *file, *e; 149 150 /* Save the filename and line number for later error messages. */ 151 152 if (cur_filename) 153 free(cur_filename); 154 155 file = strchr(yytext, '\"')+1; 156 e = strchr(file, '\"'); 157 *e = '\0'; 158 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 159 cur_line = atoi(yytext+2); 160 161 goto repeat; 162 } 163 164 switch (lexstate) 165 { 166 case ST_NORMAL: 167 switch (token) 168 { 169 case IDENT: 170 APP; 171 { 172 const struct resword *r = is_reserved_word(yytext, yyleng); 173 if (r) 174 { 175 switch (token = r->token) 176 { 177 case ATTRIBUTE_KEYW: 178 lexstate = ST_ATTRIBUTE; 179 count = 0; 180 goto repeat; 181 case ASM_KEYW: 182 lexstate = ST_ASM; 183 count = 0; 184 goto repeat; 185 186 case STRUCT_KEYW: 187 case UNION_KEYW: 188 dont_want_brace_phrase = 3; 189 case ENUM_KEYW: 190 suppress_type_lookup = 2; 191 goto fini; 192 193 case EXPORT_SYMBOL_KEYW: 194 goto fini; 195 } 196 } 197 if (!suppress_type_lookup) 198 { 199 struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF); 200 if (sym && sym->type == SYM_TYPEDEF) 201 token = TYPE; 202 } 203 } 204 break; 205 206 case '[': 207 APP; 208 lexstate = ST_BRACKET; 209 count = 1; 210 goto repeat; 211 212 case '{': 213 APP; 214 if (dont_want_brace_phrase) 215 break; 216 lexstate = ST_BRACE; 217 count = 1; 218 goto repeat; 219 220 case '=': case ':': 221 APP; 222 lexstate = ST_EXPRESSION; 223 break; 224 225 case DOTS: 226 default: 227 APP; 228 break; 229 } 230 break; 231 232 case ST_ATTRIBUTE: 233 APP; 234 switch (token) 235 { 236 case '(': 237 ++count; 238 goto repeat; 239 case ')': 240 if (--count == 0) 241 { 242 lexstate = ST_NORMAL; 243 token = ATTRIBUTE_PHRASE; 244 break; 245 } 246 goto repeat; 247 default: 248 goto repeat; 249 } 250 break; 251 252 case ST_ASM: 253 APP; 254 switch (token) 255 { 256 case '(': 257 ++count; 258 goto repeat; 259 case ')': 260 if (--count == 0) 261 { 262 lexstate = ST_NORMAL; 263 token = ASM_PHRASE; 264 break; 265 } 266 goto repeat; 267 default: 268 goto repeat; 269 } 270 break; 271 272 case ST_BRACKET: 273 APP; 274 switch (token) 275 { 276 case '[': 277 ++count; 278 goto repeat; 279 case ']': 280 if (--count == 0) 281 { 282 lexstate = ST_NORMAL; 283 token = BRACKET_PHRASE; 284 break; 285 } 286 goto repeat; 287 default: 288 goto repeat; 289 } 290 break; 291 292 case ST_BRACE: 293 APP; 294 switch (token) 295 { 296 case '{': 297 ++count; 298 goto repeat; 299 case '}': 300 if (--count == 0) 301 { 302 lexstate = ST_NORMAL; 303 token = BRACE_PHRASE; 304 break; 305 } 306 goto repeat; 307 default: 308 goto repeat; 309 } 310 break; 311 312 case ST_EXPRESSION: 313 switch (token) 314 { 315 case '(': case '[': case '{': 316 ++count; 317 APP; 318 goto repeat; 319 case ')': case ']': case '}': 320 --count; 321 APP; 322 goto repeat; 323 case ',': case ';': 324 if (count == 0) 325 { 326 /* Put back the token we just read so's we can find it again 327 after registering the expression. */ 328 unput(token); 329 330 lexstate = ST_NORMAL; 331 token = EXPRESSION_PHRASE; 332 break; 333 } 334 APP; 335 goto repeat; 336 default: 337 APP; 338 goto repeat; 339 } 340 break; 341 342 case ST_TABLE_1: 343 goto repeat; 344 345 case ST_TABLE_2: 346 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 347 { 348 token = EXPORT_SYMBOL_KEYW; 349 lexstate = ST_TABLE_5; 350 APP; 351 break; 352 } 353 lexstate = ST_TABLE_6; 354 /* FALLTHRU */ 355 356 case ST_TABLE_6: 357 switch (token) 358 { 359 case '{': case '[': case '(': 360 ++count; 361 break; 362 case '}': case ']': case ')': 363 --count; 364 break; 365 case ',': 366 if (count == 0) 367 lexstate = ST_TABLE_2; 368 break; 369 }; 370 goto repeat; 371 372 case ST_TABLE_3: 373 goto repeat; 374 375 case ST_TABLE_4: 376 if (token == ';') 377 lexstate = ST_NORMAL; 378 goto repeat; 379 380 case ST_TABLE_5: 381 switch (token) 382 { 383 case ',': 384 token = ';'; 385 lexstate = ST_TABLE_2; 386 APP; 387 break; 388 default: 389 APP; 390 break; 391 } 392 break; 393 394 default: 395 exit(1); 396 } 397 fini: 398 399 if (suppress_type_lookup > 0) 400 --suppress_type_lookup; 401 if (dont_want_brace_phrase > 0) 402 --dont_want_brace_phrase; 403 404 yylval = &next_node->next; 405 406 return token; 407 } 408