xref: /openbmc/linux/scripts/genksyms/lex.l (revision 31b90347)
1 /* Lexical analysis for genksyms.
2    Copyright 1996, 1997 Linux International.
3 
4    New implementation contributed by Richard Henderson <rth@tamu.edu>
5    Based on original work by Bjorn Ekwall <bj0rn@blox.se>
6 
7    Taken from Linux modutils 2.4.22.
8 
9    This program is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published by the
11    Free Software Foundation; either version 2 of the License, or (at your
12    option) any later version.
13 
14    This program is distributed in the hope that it will be useful, but
15    WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17    General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 
24 %{
25 
26 #include <limits.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <ctype.h>
30 
31 #include "genksyms.h"
32 #include "parse.tab.h"
33 
34 /* We've got a two-level lexer here.  We let flex do basic tokenization
35    and then we categorize those basic tokens in the second stage.  */
36 #define YY_DECL		static int yylex1(void)
37 
38 %}
39 
40 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
41 
42 O_INT			0[0-7]*
43 D_INT			[1-9][0-9]*
44 X_INT			0[Xx][0-9A-Fa-f]+
45 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
46 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
47 
48 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
49 EXP			[Ee][+-]?[0-9]+
50 F_SUF			[FfLl]
51 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
52 
53 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
54 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
55 
56 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
57 
58 /* We don't do multiple input files.  */
59 %option noyywrap
60 
61 %option noinput
62 
63 %%
64 
65 
66  /* Keep track of our location in the original source files.  */
67 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
68 ^#.*\n					cur_line++;
69 \n					cur_line++;
70 
71  /* Ignore all other whitespace.  */
72 [ \t\f\v\r]+				;
73 
74 
75 {STRING}				return STRING;
76 {CHAR}					return CHAR;
77 {IDENT}					return IDENT;
78 
79  /* The Pedant requires that the other C multi-character tokens be
80     recognized as tokens.  We don't actually use them since we don't
81     parse expressions, but we do want whitespace to be arranged
82     around them properly.  */
83 {MC_TOKEN}				return OTHER;
84 {INT}					return INT;
85 {REAL}					return REAL;
86 
87 "..."					return DOTS;
88 
89  /* All other tokens are single characters.  */
90 .					return yytext[0];
91 
92 
93 %%
94 
95 /* Bring in the keyword recognizer.  */
96 
97 #include "keywords.hash.c"
98 
99 
100 /* Macros to append to our phrase collection list.  */
101 
102 /*
103  * We mark any token, that that equals to a known enumerator, as
104  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
105  * the only problem is struct and union members:
106  *    enum e { a, b }; struct s { int a, b; }
107  * but in this case, the only effect will be, that the ABI checksums become
108  * more volatile, which is acceptable. Also, such collisions are quite rare,
109  * so far it was only observed in include/linux/telephony.h.
110  */
111 #define _APP(T,L)	do {						   \
112 			  cur_node = next_node;				   \
113 			  next_node = xmalloc(sizeof(*next_node));	   \
114 			  next_node->next = cur_node;			   \
115 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
116 			  cur_node->tag =				   \
117 			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
118 			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
119 			  cur_node->in_source_file = in_source_file;       \
120 			} while (0)
121 
122 #define APP		_APP(yytext, yyleng)
123 
124 
125 /* The second stage lexer.  Here we incorporate knowledge of the state
126    of the parser to tailor the tokens that are returned.  */
127 
128 int
129 yylex(void)
130 {
131   static enum {
132     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
133     ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
134     ST_TABLE_5, ST_TABLE_6
135   } lexstate = ST_NOTSTARTED;
136 
137   static int suppress_type_lookup, dont_want_brace_phrase;
138   static struct string_list *next_node;
139 
140   int token, count = 0;
141   struct string_list *cur_node;
142 
143   if (lexstate == ST_NOTSTARTED)
144     {
145       next_node = xmalloc(sizeof(*next_node));
146       next_node->next = NULL;
147       lexstate = ST_NORMAL;
148     }
149 
150 repeat:
151   token = yylex1();
152 
153   if (token == 0)
154     return 0;
155   else if (token == FILENAME)
156     {
157       char *file, *e;
158 
159       /* Save the filename and line number for later error messages.  */
160 
161       if (cur_filename)
162 	free(cur_filename);
163 
164       file = strchr(yytext, '\"')+1;
165       e = strchr(file, '\"');
166       *e = '\0';
167       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
168       cur_line = atoi(yytext+2);
169 
170       if (!source_file) {
171         source_file = xstrdup(cur_filename);
172         in_source_file = 1;
173       } else {
174         in_source_file = (strcmp(cur_filename, source_file) == 0);
175       }
176 
177       goto repeat;
178     }
179 
180   switch (lexstate)
181     {
182     case ST_NORMAL:
183       switch (token)
184 	{
185 	case IDENT:
186 	  APP;
187 	  {
188 	    const struct resword *r = is_reserved_word(yytext, yyleng);
189 	    if (r)
190 	      {
191 		switch (token = r->token)
192 		  {
193 		  case ATTRIBUTE_KEYW:
194 		    lexstate = ST_ATTRIBUTE;
195 		    count = 0;
196 		    goto repeat;
197 		  case ASM_KEYW:
198 		    lexstate = ST_ASM;
199 		    count = 0;
200 		    goto repeat;
201 
202 		  case STRUCT_KEYW:
203 		  case UNION_KEYW:
204 		  case ENUM_KEYW:
205 		    dont_want_brace_phrase = 3;
206 		    suppress_type_lookup = 2;
207 		    goto fini;
208 
209 		  case EXPORT_SYMBOL_KEYW:
210 		      goto fini;
211 		  }
212 	      }
213 	    if (!suppress_type_lookup)
214 	      {
215 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
216 		  token = TYPE;
217 	      }
218 	  }
219 	  break;
220 
221 	case '[':
222 	  APP;
223 	  lexstate = ST_BRACKET;
224 	  count = 1;
225 	  goto repeat;
226 
227 	case '{':
228 	  APP;
229 	  if (dont_want_brace_phrase)
230 	    break;
231 	  lexstate = ST_BRACE;
232 	  count = 1;
233 	  goto repeat;
234 
235 	case '=': case ':':
236 	  APP;
237 	  lexstate = ST_EXPRESSION;
238 	  break;
239 
240 	case DOTS:
241 	default:
242 	  APP;
243 	  break;
244 	}
245       break;
246 
247     case ST_ATTRIBUTE:
248       APP;
249       switch (token)
250 	{
251 	case '(':
252 	  ++count;
253 	  goto repeat;
254 	case ')':
255 	  if (--count == 0)
256 	    {
257 	      lexstate = ST_NORMAL;
258 	      token = ATTRIBUTE_PHRASE;
259 	      break;
260 	    }
261 	  goto repeat;
262 	default:
263 	  goto repeat;
264 	}
265       break;
266 
267     case ST_ASM:
268       APP;
269       switch (token)
270 	{
271 	case '(':
272 	  ++count;
273 	  goto repeat;
274 	case ')':
275 	  if (--count == 0)
276 	    {
277 	      lexstate = ST_NORMAL;
278 	      token = ASM_PHRASE;
279 	      break;
280 	    }
281 	  goto repeat;
282 	default:
283 	  goto repeat;
284 	}
285       break;
286 
287     case ST_BRACKET:
288       APP;
289       switch (token)
290 	{
291 	case '[':
292 	  ++count;
293 	  goto repeat;
294 	case ']':
295 	  if (--count == 0)
296 	    {
297 	      lexstate = ST_NORMAL;
298 	      token = BRACKET_PHRASE;
299 	      break;
300 	    }
301 	  goto repeat;
302 	default:
303 	  goto repeat;
304 	}
305       break;
306 
307     case ST_BRACE:
308       APP;
309       switch (token)
310 	{
311 	case '{':
312 	  ++count;
313 	  goto repeat;
314 	case '}':
315 	  if (--count == 0)
316 	    {
317 	      lexstate = ST_NORMAL;
318 	      token = BRACE_PHRASE;
319 	      break;
320 	    }
321 	  goto repeat;
322 	default:
323 	  goto repeat;
324 	}
325       break;
326 
327     case ST_EXPRESSION:
328       switch (token)
329 	{
330 	case '(': case '[': case '{':
331 	  ++count;
332 	  APP;
333 	  goto repeat;
334 	case '}':
335 	  /* is this the last line of an enum declaration? */
336 	  if (count == 0)
337 	    {
338 	      /* Put back the token we just read so's we can find it again
339 		 after registering the expression.  */
340 	      unput(token);
341 
342 	      lexstate = ST_NORMAL;
343 	      token = EXPRESSION_PHRASE;
344 	      break;
345 	    }
346 	  /* FALLTHRU */
347 	case ')': case ']':
348 	  --count;
349 	  APP;
350 	  goto repeat;
351 	case ',': case ';':
352 	  if (count == 0)
353 	    {
354 	      /* Put back the token we just read so's we can find it again
355 		 after registering the expression.  */
356 	      unput(token);
357 
358 	      lexstate = ST_NORMAL;
359 	      token = EXPRESSION_PHRASE;
360 	      break;
361 	    }
362 	  APP;
363 	  goto repeat;
364 	default:
365 	  APP;
366 	  goto repeat;
367 	}
368       break;
369 
370     case ST_TABLE_1:
371       goto repeat;
372 
373     case ST_TABLE_2:
374       if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
375 	{
376 	  token = EXPORT_SYMBOL_KEYW;
377 	  lexstate = ST_TABLE_5;
378 	  APP;
379 	  break;
380 	}
381       lexstate = ST_TABLE_6;
382       /* FALLTHRU */
383 
384     case ST_TABLE_6:
385       switch (token)
386 	{
387 	case '{': case '[': case '(':
388 	  ++count;
389 	  break;
390 	case '}': case ']': case ')':
391 	  --count;
392 	  break;
393 	case ',':
394 	  if (count == 0)
395 	    lexstate = ST_TABLE_2;
396 	  break;
397 	};
398       goto repeat;
399 
400     case ST_TABLE_3:
401       goto repeat;
402 
403     case ST_TABLE_4:
404       if (token == ';')
405 	lexstate = ST_NORMAL;
406       goto repeat;
407 
408     case ST_TABLE_5:
409       switch (token)
410 	{
411 	case ',':
412 	  token = ';';
413 	  lexstate = ST_TABLE_2;
414 	  APP;
415 	  break;
416 	default:
417 	  APP;
418 	  break;
419 	}
420       break;
421 
422     default:
423       exit(1);
424     }
425 fini:
426 
427   if (suppress_type_lookup > 0)
428     --suppress_type_lookup;
429   if (dont_want_brace_phrase > 0)
430     --dont_want_brace_phrase;
431 
432   yylval = &next_node->next;
433 
434   return token;
435 }
436