xref: /openbmc/linux/scripts/genksyms/lex.l (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 /* Lexical analysis for genksyms.
2    Copyright 1996, 1997 Linux International.
3 
4    New implementation contributed by Richard Henderson <rth@tamu.edu>
5    Based on original work by Bjorn Ekwall <bj0rn@blox.se>
6 
7    Taken from Linux modutils 2.4.22.
8 
9    This program is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published by the
11    Free Software Foundation; either version 2 of the License, or (at your
12    option) any later version.
13 
14    This program is distributed in the hope that it will be useful, but
15    WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17    General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 
24 %{
25 
26 #include <limits.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 
31 #include "genksyms.h"
32 #include "parse.h"
33 
34 /* We've got a two-level lexer here.  We let flex do basic tokenization
35    and then we categorize those basic tokens in the second stage.  */
36 #define YY_DECL		static int yylex1(void)
37 
38 %}
39 
40 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
41 
42 O_INT			0[0-7]*
43 D_INT			[1-9][0-9]*
44 X_INT			0[Xx][0-9A-Fa-f]+
45 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
46 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
47 
48 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
49 EXP			[Ee][+-]?[0-9]+
50 F_SUF			[FfLl]
51 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
52 
53 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
54 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
55 
56 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
57 
58 /* Version 2 checksumming does proper tokenization; version 1 wasn't
59    quite so pedantic.  */
60 %s V2_TOKENS
61 
62 /* We don't do multiple input files.  */
63 %option noyywrap
64 
65 %%
66 
67 
68  /* Keep track of our location in the original source files.  */
69 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
70 ^#.*\n					cur_line++;
71 \n					cur_line++;
72 
73  /* Ignore all other whitespace.  */
74 [ \t\f\v\r]+				;
75 
76 
77 {STRING}				return STRING;
78 {CHAR}					return CHAR;
79 {IDENT}					return IDENT;
80 
81  /* The Pedant requires that the other C multi-character tokens be
82     recognized as tokens.  We don't actually use them since we don't
83     parse expressions, but we do want whitespace to be arranged
84     around them properly.  */
85 <V2_TOKENS>{MC_TOKEN}			return OTHER;
86 <V2_TOKENS>{INT}			return INT;
87 <V2_TOKENS>{REAL}			return REAL;
88 
89 "..."					return DOTS;
90 
91  /* All other tokens are single characters.  */
92 .					return yytext[0];
93 
94 
95 %%
96 
97 /* Bring in the keyword recognizer.  */
98 
99 #include "keywords.c"
100 
101 
102 /* Macros to append to our phrase collection list.  */
103 
104 #define _APP(T,L)	do {						   \
105 			  cur_node = next_node;				   \
106 			  next_node = xmalloc(sizeof(*next_node));	   \
107 			  next_node->next = cur_node;			   \
108 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
109 			  cur_node->tag = SYM_NORMAL;			   \
110 			} while (0)
111 
112 #define APP		_APP(yytext, yyleng)
113 
114 
115 /* The second stage lexer.  Here we incorporate knowledge of the state
116    of the parser to tailor the tokens that are returned.  */
117 
118 int
119 yylex(void)
120 {
121   static enum {
122     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
123     ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
124     ST_TABLE_5, ST_TABLE_6
125   } lexstate = ST_NOTSTARTED;
126 
127   static int suppress_type_lookup, dont_want_brace_phrase;
128   static struct string_list *next_node;
129 
130   int token, count = 0;
131   struct string_list *cur_node;
132 
133   if (lexstate == ST_NOTSTARTED)
134     {
135       BEGIN(V2_TOKENS);
136       next_node = xmalloc(sizeof(*next_node));
137       next_node->next = NULL;
138       lexstate = ST_NORMAL;
139     }
140 
141 repeat:
142   token = yylex1();
143 
144   if (token == 0)
145     return 0;
146   else if (token == FILENAME)
147     {
148       char *file, *e;
149 
150       /* Save the filename and line number for later error messages.  */
151 
152       if (cur_filename)
153 	free(cur_filename);
154 
155       file = strchr(yytext, '\"')+1;
156       e = strchr(file, '\"');
157       *e = '\0';
158       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
159       cur_line = atoi(yytext+2);
160 
161       goto repeat;
162     }
163 
164   switch (lexstate)
165     {
166     case ST_NORMAL:
167       switch (token)
168 	{
169 	case IDENT:
170 	  APP;
171 	  {
172 	    const struct resword *r = is_reserved_word(yytext, yyleng);
173 	    if (r)
174 	      {
175 		switch (token = r->token)
176 		  {
177 		  case ATTRIBUTE_KEYW:
178 		    lexstate = ST_ATTRIBUTE;
179 		    count = 0;
180 		    goto repeat;
181 		  case ASM_KEYW:
182 		    lexstate = ST_ASM;
183 		    count = 0;
184 		    goto repeat;
185 
186 		  case STRUCT_KEYW:
187 		  case UNION_KEYW:
188 		    dont_want_brace_phrase = 3;
189 		  case ENUM_KEYW:
190 		    suppress_type_lookup = 2;
191 		    goto fini;
192 
193 		  case EXPORT_SYMBOL_KEYW:
194 		      goto fini;
195 		  }
196 	      }
197 	    if (!suppress_type_lookup)
198 	      {
199 		struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
200 		if (sym && sym->type == SYM_TYPEDEF)
201 		  token = TYPE;
202 	      }
203 	  }
204 	  break;
205 
206 	case '[':
207 	  APP;
208 	  lexstate = ST_BRACKET;
209 	  count = 1;
210 	  goto repeat;
211 
212 	case '{':
213 	  APP;
214 	  if (dont_want_brace_phrase)
215 	    break;
216 	  lexstate = ST_BRACE;
217 	  count = 1;
218 	  goto repeat;
219 
220 	case '=': case ':':
221 	  APP;
222 	  lexstate = ST_EXPRESSION;
223 	  break;
224 
225 	case DOTS:
226 	default:
227 	  APP;
228 	  break;
229 	}
230       break;
231 
232     case ST_ATTRIBUTE:
233       APP;
234       switch (token)
235 	{
236 	case '(':
237 	  ++count;
238 	  goto repeat;
239 	case ')':
240 	  if (--count == 0)
241 	    {
242 	      lexstate = ST_NORMAL;
243 	      token = ATTRIBUTE_PHRASE;
244 	      break;
245 	    }
246 	  goto repeat;
247 	default:
248 	  goto repeat;
249 	}
250       break;
251 
252     case ST_ASM:
253       APP;
254       switch (token)
255 	{
256 	case '(':
257 	  ++count;
258 	  goto repeat;
259 	case ')':
260 	  if (--count == 0)
261 	    {
262 	      lexstate = ST_NORMAL;
263 	      token = ASM_PHRASE;
264 	      break;
265 	    }
266 	  goto repeat;
267 	default:
268 	  goto repeat;
269 	}
270       break;
271 
272     case ST_BRACKET:
273       APP;
274       switch (token)
275 	{
276 	case '[':
277 	  ++count;
278 	  goto repeat;
279 	case ']':
280 	  if (--count == 0)
281 	    {
282 	      lexstate = ST_NORMAL;
283 	      token = BRACKET_PHRASE;
284 	      break;
285 	    }
286 	  goto repeat;
287 	default:
288 	  goto repeat;
289 	}
290       break;
291 
292     case ST_BRACE:
293       APP;
294       switch (token)
295 	{
296 	case '{':
297 	  ++count;
298 	  goto repeat;
299 	case '}':
300 	  if (--count == 0)
301 	    {
302 	      lexstate = ST_NORMAL;
303 	      token = BRACE_PHRASE;
304 	      break;
305 	    }
306 	  goto repeat;
307 	default:
308 	  goto repeat;
309 	}
310       break;
311 
312     case ST_EXPRESSION:
313       switch (token)
314 	{
315 	case '(': case '[': case '{':
316 	  ++count;
317 	  APP;
318 	  goto repeat;
319 	case ')': case ']': case '}':
320 	  --count;
321 	  APP;
322 	  goto repeat;
323 	case ',': case ';':
324 	  if (count == 0)
325 	    {
326 	      /* Put back the token we just read so's we can find it again
327 		 after registering the expression.  */
328 	      unput(token);
329 
330 	      lexstate = ST_NORMAL;
331 	      token = EXPRESSION_PHRASE;
332 	      break;
333 	    }
334 	  APP;
335 	  goto repeat;
336 	default:
337 	  APP;
338 	  goto repeat;
339 	}
340       break;
341 
342     case ST_TABLE_1:
343       goto repeat;
344 
345     case ST_TABLE_2:
346       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
347 	{
348 	  token = EXPORT_SYMBOL_KEYW;
349 	  lexstate = ST_TABLE_5;
350 	  APP;
351 	  break;
352 	}
353       lexstate = ST_TABLE_6;
354       /* FALLTHRU */
355 
356     case ST_TABLE_6:
357       switch (token)
358 	{
359 	case '{': case '[': case '(':
360 	  ++count;
361 	  break;
362 	case '}': case ']': case ')':
363 	  --count;
364 	  break;
365 	case ',':
366 	  if (count == 0)
367 	    lexstate = ST_TABLE_2;
368 	  break;
369 	};
370       goto repeat;
371 
372     case ST_TABLE_3:
373       goto repeat;
374 
375     case ST_TABLE_4:
376       if (token == ';')
377 	lexstate = ST_NORMAL;
378       goto repeat;
379 
380     case ST_TABLE_5:
381       switch (token)
382 	{
383 	case ',':
384 	  token = ';';
385 	  lexstate = ST_TABLE_2;
386 	  APP;
387 	  break;
388 	default:
389 	  APP;
390 	  break;
391 	}
392       break;
393 
394     default:
395       exit(1);
396     }
397 fini:
398 
399   if (suppress_type_lookup > 0)
400     --suppress_type_lookup;
401   if (dont_want_brace_phrase > 0)
402     --dont_want_brace_phrase;
403 
404   yylval = &next_node->next;
405 
406   return token;
407 }
408