1eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2eb8dc403SDave Cobbley# ply: yacc.py 3eb8dc403SDave Cobbley# 4eb8dc403SDave Cobbley# Copyright (C) 2001-2009, 5eb8dc403SDave Cobbley# David M. Beazley (Dabeaz LLC) 6eb8dc403SDave Cobbley# All rights reserved. 7eb8dc403SDave Cobbley# 8eb8dc403SDave Cobbley# Redistribution and use in source and binary forms, with or without 9eb8dc403SDave Cobbley# modification, are permitted provided that the following conditions are 10eb8dc403SDave Cobbley# met: 11eb8dc403SDave Cobbley# 12eb8dc403SDave Cobbley# * Redistributions of source code must retain the above copyright notice, 13eb8dc403SDave Cobbley# this list of conditions and the following disclaimer. 14eb8dc403SDave Cobbley# * Redistributions in binary form must reproduce the above copyright notice, 15eb8dc403SDave Cobbley# this list of conditions and the following disclaimer in the documentation 16eb8dc403SDave Cobbley# and/or other materials provided with the distribution. 17eb8dc403SDave Cobbley# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18eb8dc403SDave Cobbley# endorse or promote products derived from this software without 19eb8dc403SDave Cobbley# specific prior written permission. 20eb8dc403SDave Cobbley# 21eb8dc403SDave Cobbley# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22eb8dc403SDave Cobbley# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23eb8dc403SDave Cobbley# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24eb8dc403SDave Cobbley# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25eb8dc403SDave Cobbley# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26eb8dc403SDave Cobbley# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27eb8dc403SDave Cobbley# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28eb8dc403SDave Cobbley# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29eb8dc403SDave Cobbley# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30eb8dc403SDave Cobbley# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31eb8dc403SDave Cobbley# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 33eb8dc403SDave Cobbley# 34eb8dc403SDave Cobbley# This implements an LR parser that is constructed from grammar rules defined 35eb8dc403SDave Cobbley# as Python functions. The grammer is specified by supplying the BNF inside 36eb8dc403SDave Cobbley# Python documentation strings. The inspiration for this technique was borrowed 37eb8dc403SDave Cobbley# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38eb8dc403SDave Cobbley# Spark and the GNU bison utility. 39eb8dc403SDave Cobbley# 40eb8dc403SDave Cobbley# The current implementation is only somewhat object-oriented. The 41eb8dc403SDave Cobbley# LR parser itself is defined in terms of an object (which allows multiple 42eb8dc403SDave Cobbley# parsers to co-exist). However, most of the variables used during table 43eb8dc403SDave Cobbley# construction are defined in terms of global variables. Users shouldn't 44eb8dc403SDave Cobbley# notice unless they are trying to define multiple parsers at the same 45eb8dc403SDave Cobbley# time using threads (in which case they should have their head examined). 46eb8dc403SDave Cobbley# 47eb8dc403SDave Cobbley# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48eb8dc403SDave Cobbley# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 49eb8dc403SDave Cobbley# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50eb8dc403SDave Cobbley# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51eb8dc403SDave Cobbley# by the more efficient DeRemer and Pennello algorithm. 52eb8dc403SDave Cobbley# 53eb8dc403SDave Cobbley# :::::::: WARNING ::::::: 54eb8dc403SDave Cobbley# 55eb8dc403SDave Cobbley# Construction of LR parsing tables is fairly complicated and expensive. 56eb8dc403SDave Cobbley# To make this module run fast, a *LOT* of work has been put into 57eb8dc403SDave Cobbley# optimization---often at the expensive of readability and what might 58eb8dc403SDave Cobbley# consider to be good Python "coding style." Modify the code at your 59eb8dc403SDave Cobbley# own risk! 60eb8dc403SDave Cobbley# ---------------------------------------------------------------------------- 61eb8dc403SDave Cobbley 62eb8dc403SDave Cobbley__version__ = "3.3" 63eb8dc403SDave Cobbley__tabversion__ = "3.2" # Table version 64eb8dc403SDave Cobbley 65eb8dc403SDave Cobbley#----------------------------------------------------------------------------- 66eb8dc403SDave Cobbley# === User configurable parameters === 67eb8dc403SDave Cobbley# 68eb8dc403SDave Cobbley# Change these to modify the default behavior of yacc (if you wish) 69eb8dc403SDave Cobbley#----------------------------------------------------------------------------- 70eb8dc403SDave Cobbley 71eb8dc403SDave Cobbleyyaccdebug = 0 # Debugging mode. If set, yacc generates a 72eb8dc403SDave Cobbley # a 'parser.out' file in the current directory 73eb8dc403SDave Cobbley 74eb8dc403SDave Cobbleydebug_file = 'parser.out' # Default name of the debugging file 75eb8dc403SDave Cobbleytab_module = 'parsetab' # Default name of the table module 76eb8dc403SDave Cobbleydefault_lr = 'LALR' # Default LR table generation method 77eb8dc403SDave Cobbley 78eb8dc403SDave Cobbleyerror_count = 3 # Number of symbols that must be shifted to leave recovery mode 79eb8dc403SDave Cobbley 80eb8dc403SDave Cobbleyyaccdevel = 0 # Set to True if developing yacc. This turns off optimized 81eb8dc403SDave Cobbley # implementations of certain functions. 82eb8dc403SDave Cobbley 83eb8dc403SDave Cobbleyresultlimit = 40 # Size limit of results when running in debug mode. 84eb8dc403SDave Cobbley 85eb8dc403SDave Cobbleypickle_protocol = 0 # Protocol to use when writing pickle files 86eb8dc403SDave Cobbley 87eb8dc403SDave Cobbleyimport re, types, sys, os.path 88eb8dc403SDave Cobbley 89eb8dc403SDave Cobbley# Compatibility function for python 2.6/3.0 90eb8dc403SDave Cobbleyif sys.version_info[0] < 3: 91eb8dc403SDave Cobbley def func_code(f): 92eb8dc403SDave Cobbley return f.func_code 93eb8dc403SDave Cobbleyelse: 94eb8dc403SDave Cobbley def func_code(f): 95eb8dc403SDave Cobbley return f.__code__ 96eb8dc403SDave Cobbley 97eb8dc403SDave Cobbley# Compatibility 98eb8dc403SDave Cobbleytry: 99eb8dc403SDave Cobbley MAXINT = sys.maxint 100eb8dc403SDave Cobbleyexcept AttributeError: 101eb8dc403SDave Cobbley MAXINT = sys.maxsize 102eb8dc403SDave Cobbley 103eb8dc403SDave Cobbley# Python 2.x/3.0 compatibility. 104eb8dc403SDave Cobbleydef load_ply_lex(): 105eb8dc403SDave Cobbley if sys.version_info[0] < 3: 106eb8dc403SDave Cobbley import lex 107eb8dc403SDave Cobbley else: 108eb8dc403SDave Cobbley import ply.lex as lex 109eb8dc403SDave Cobbley return lex 110eb8dc403SDave Cobbley 111eb8dc403SDave Cobbley# This object is a stand-in for a logging object created by the 112eb8dc403SDave Cobbley# logging module. PLY will use this by default to create things 113eb8dc403SDave Cobbley# such as the parser.out file. If a user wants more detailed 114eb8dc403SDave Cobbley# information, they can create their own logging object and pass 115eb8dc403SDave Cobbley# it into PLY. 116eb8dc403SDave Cobbley 117eb8dc403SDave Cobbleyclass PlyLogger(object): 118eb8dc403SDave Cobbley def __init__(self,f): 119eb8dc403SDave Cobbley self.f = f 120eb8dc403SDave Cobbley def debug(self,msg,*args,**kwargs): 121eb8dc403SDave Cobbley self.f.write((msg % args) + "\n") 122eb8dc403SDave Cobbley info = debug 123eb8dc403SDave Cobbley 124eb8dc403SDave Cobbley def warning(self,msg,*args,**kwargs): 125eb8dc403SDave Cobbley self.f.write("WARNING: "+ (msg % args) + "\n") 126eb8dc403SDave Cobbley 127eb8dc403SDave Cobbley def error(self,msg,*args,**kwargs): 128eb8dc403SDave Cobbley self.f.write("ERROR: " + (msg % args) + "\n") 129eb8dc403SDave Cobbley 130eb8dc403SDave Cobbley critical = debug 131eb8dc403SDave Cobbley 132eb8dc403SDave Cobbley# Null logger is used when no output is generated. Does nothing. 133eb8dc403SDave Cobbleyclass NullLogger(object): 134eb8dc403SDave Cobbley def __getattribute__(self,name): 135eb8dc403SDave Cobbley return self 136eb8dc403SDave Cobbley def __call__(self,*args,**kwargs): 137eb8dc403SDave Cobbley return self 138eb8dc403SDave Cobbley 139eb8dc403SDave Cobbley# Exception raised for yacc-related errors 140eb8dc403SDave Cobbleyclass YaccError(Exception): pass 141eb8dc403SDave Cobbley 142eb8dc403SDave Cobbley# Format the result message that the parser produces when running in debug mode. 143eb8dc403SDave Cobbleydef format_result(r): 144eb8dc403SDave Cobbley repr_str = repr(r) 145eb8dc403SDave Cobbley if '\n' in repr_str: repr_str = repr(repr_str) 146eb8dc403SDave Cobbley if len(repr_str) > resultlimit: 147eb8dc403SDave Cobbley repr_str = repr_str[:resultlimit]+" ..." 148eb8dc403SDave Cobbley result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 149eb8dc403SDave Cobbley return result 150eb8dc403SDave Cobbley 151eb8dc403SDave Cobbley 152eb8dc403SDave Cobbley# Format stack entries when the parser is running in debug mode 153eb8dc403SDave Cobbleydef format_stack_entry(r): 154eb8dc403SDave Cobbley repr_str = repr(r) 155eb8dc403SDave Cobbley if '\n' in repr_str: repr_str = repr(repr_str) 156eb8dc403SDave Cobbley if len(repr_str) < 16: 157eb8dc403SDave Cobbley return repr_str 158eb8dc403SDave Cobbley else: 159eb8dc403SDave Cobbley return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 160eb8dc403SDave Cobbley 161eb8dc403SDave Cobbley#----------------------------------------------------------------------------- 162eb8dc403SDave Cobbley# === LR Parsing Engine === 163eb8dc403SDave Cobbley# 164eb8dc403SDave Cobbley# The following classes are used for the LR parser itself. These are not 165eb8dc403SDave Cobbley# used during table construction and are independent of the actual LR 166eb8dc403SDave Cobbley# table generation algorithm 167eb8dc403SDave Cobbley#----------------------------------------------------------------------------- 168eb8dc403SDave Cobbley 169eb8dc403SDave Cobbley# This class is used to hold non-terminal grammar symbols during parsing. 170eb8dc403SDave Cobbley# It normally has the following attributes set: 171eb8dc403SDave Cobbley# .type = Grammar symbol type 172eb8dc403SDave Cobbley# .value = Symbol value 173eb8dc403SDave Cobbley# .lineno = Starting line number 174eb8dc403SDave Cobbley# .endlineno = Ending line number (optional, set automatically) 175eb8dc403SDave Cobbley# .lexpos = Starting lex position 176eb8dc403SDave Cobbley# .endlexpos = Ending lex position (optional, set automatically) 177eb8dc403SDave Cobbley 178eb8dc403SDave Cobbleyclass YaccSymbol: 179eb8dc403SDave Cobbley def __str__(self): return self.type 180eb8dc403SDave Cobbley def __repr__(self): return str(self) 181eb8dc403SDave Cobbley 182eb8dc403SDave Cobbley# This class is a wrapper around the objects actually passed to each 183eb8dc403SDave Cobbley# grammar rule. Index lookup and assignment actually assign the 184eb8dc403SDave Cobbley# .value attribute of the underlying YaccSymbol object. 185eb8dc403SDave Cobbley# The lineno() method returns the line number of a given 186eb8dc403SDave Cobbley# item (or 0 if not defined). The linespan() method returns 187eb8dc403SDave Cobbley# a tuple of (startline,endline) representing the range of lines 188eb8dc403SDave Cobbley# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 189eb8dc403SDave Cobbley# representing the range of positional information for a symbol. 190eb8dc403SDave Cobbley 191eb8dc403SDave Cobbleyclass YaccProduction: 192eb8dc403SDave Cobbley def __init__(self,s,stack=None): 193eb8dc403SDave Cobbley self.slice = s 194eb8dc403SDave Cobbley self.stack = stack 195eb8dc403SDave Cobbley self.lexer = None 196eb8dc403SDave Cobbley self.parser= None 197eb8dc403SDave Cobbley def __getitem__(self,n): 198eb8dc403SDave Cobbley if isinstance(n,slice): 199eb8dc403SDave Cobbley return [self[i] for i in range(*(n.indices(len(self.slice))))] 200eb8dc403SDave Cobbley if n >= 0: return self.slice[n].value 201eb8dc403SDave Cobbley else: return self.stack[n].value 202eb8dc403SDave Cobbley 203eb8dc403SDave Cobbley def __setitem__(self,n,v): 204eb8dc403SDave Cobbley self.slice[n].value = v 205eb8dc403SDave Cobbley 206eb8dc403SDave Cobbley def __getslice__(self,i,j): 207eb8dc403SDave Cobbley return [s.value for s in self.slice[i:j]] 208eb8dc403SDave Cobbley 209eb8dc403SDave Cobbley def __len__(self): 210eb8dc403SDave Cobbley return len(self.slice) 211eb8dc403SDave Cobbley 212eb8dc403SDave Cobbley def lineno(self,n): 213eb8dc403SDave Cobbley return getattr(self.slice[n],"lineno",0) 214eb8dc403SDave Cobbley 215eb8dc403SDave Cobbley def set_lineno(self,n,lineno): 216eb8dc403SDave Cobbley self.slice[n].lineno = lineno 217eb8dc403SDave Cobbley 218eb8dc403SDave Cobbley def linespan(self,n): 219eb8dc403SDave Cobbley startline = getattr(self.slice[n],"lineno",0) 220eb8dc403SDave Cobbley endline = getattr(self.slice[n],"endlineno",startline) 221eb8dc403SDave Cobbley return startline,endline 222eb8dc403SDave Cobbley 223eb8dc403SDave Cobbley def lexpos(self,n): 224eb8dc403SDave Cobbley return getattr(self.slice[n],"lexpos",0) 225eb8dc403SDave Cobbley 226eb8dc403SDave Cobbley def lexspan(self,n): 227eb8dc403SDave Cobbley startpos = getattr(self.slice[n],"lexpos",0) 228eb8dc403SDave Cobbley endpos = getattr(self.slice[n],"endlexpos",startpos) 229eb8dc403SDave Cobbley return startpos,endpos 230eb8dc403SDave Cobbley 231eb8dc403SDave Cobbley def error(self): 232eb8dc403SDave Cobbley raise SyntaxError 233eb8dc403SDave Cobbley 234eb8dc403SDave Cobbley 235eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 236eb8dc403SDave Cobbley# == LRParser == 237eb8dc403SDave Cobbley# 238eb8dc403SDave Cobbley# The LR Parsing engine. 239eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 240eb8dc403SDave Cobbley 241eb8dc403SDave Cobbleyclass LRParser: 242eb8dc403SDave Cobbley def __init__(self,lrtab,errorf): 243eb8dc403SDave Cobbley self.productions = lrtab.lr_productions 244eb8dc403SDave Cobbley self.action = lrtab.lr_action 245eb8dc403SDave Cobbley self.goto = lrtab.lr_goto 246eb8dc403SDave Cobbley self.errorfunc = errorf 247eb8dc403SDave Cobbley 248eb8dc403SDave Cobbley def errok(self): 249eb8dc403SDave Cobbley self.errorok = 1 250eb8dc403SDave Cobbley 251eb8dc403SDave Cobbley def restart(self): 252eb8dc403SDave Cobbley del self.statestack[:] 253eb8dc403SDave Cobbley del self.symstack[:] 254eb8dc403SDave Cobbley sym = YaccSymbol() 255eb8dc403SDave Cobbley sym.type = '$end' 256eb8dc403SDave Cobbley self.symstack.append(sym) 257eb8dc403SDave Cobbley self.statestack.append(0) 258eb8dc403SDave Cobbley 259eb8dc403SDave Cobbley def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 260eb8dc403SDave Cobbley if debug or yaccdevel: 261eb8dc403SDave Cobbley if isinstance(debug,int): 262eb8dc403SDave Cobbley debug = PlyLogger(sys.stderr) 263eb8dc403SDave Cobbley return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 264eb8dc403SDave Cobbley elif tracking: 265eb8dc403SDave Cobbley return self.parseopt(input,lexer,debug,tracking,tokenfunc) 266eb8dc403SDave Cobbley else: 267eb8dc403SDave Cobbley return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 268eb8dc403SDave Cobbley 269eb8dc403SDave Cobbley 270eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 271eb8dc403SDave Cobbley # parsedebug(). 272eb8dc403SDave Cobbley # 273eb8dc403SDave Cobbley # This is the debugging enabled version of parse(). All changes made to the 274eb8dc403SDave Cobbley # parsing engine should be made here. For the non-debugging version, 275eb8dc403SDave Cobbley # copy this code to a method parseopt() and delete all of the sections 276eb8dc403SDave Cobbley # enclosed in: 277eb8dc403SDave Cobbley # 278eb8dc403SDave Cobbley # #--! DEBUG 279eb8dc403SDave Cobbley # statements 280eb8dc403SDave Cobbley # #--! DEBUG 281eb8dc403SDave Cobbley # 282eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 283eb8dc403SDave Cobbley 284eb8dc403SDave Cobbley def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 285eb8dc403SDave Cobbley lookahead = None # Current lookahead symbol 286eb8dc403SDave Cobbley lookaheadstack = [ ] # Stack of lookahead symbols 287eb8dc403SDave Cobbley actions = self.action # Local reference to action table (to avoid lookup on self.) 288eb8dc403SDave Cobbley goto = self.goto # Local reference to goto table (to avoid lookup on self.) 289eb8dc403SDave Cobbley prod = self.productions # Local reference to production list (to avoid lookup on self.) 290eb8dc403SDave Cobbley pslice = YaccProduction(None) # Production object passed to grammar rules 291eb8dc403SDave Cobbley errorcount = 0 # Used during error recovery 292eb8dc403SDave Cobbley 293eb8dc403SDave Cobbley # --! DEBUG 294eb8dc403SDave Cobbley debug.info("PLY: PARSE DEBUG START") 295eb8dc403SDave Cobbley # --! DEBUG 296eb8dc403SDave Cobbley 297eb8dc403SDave Cobbley # If no lexer was given, we will try to use the lex module 298eb8dc403SDave Cobbley if not lexer: 299eb8dc403SDave Cobbley lex = load_ply_lex() 300eb8dc403SDave Cobbley lexer = lex.lexer 301eb8dc403SDave Cobbley 302eb8dc403SDave Cobbley # Set up the lexer and parser objects on pslice 303eb8dc403SDave Cobbley pslice.lexer = lexer 304eb8dc403SDave Cobbley pslice.parser = self 305eb8dc403SDave Cobbley 306eb8dc403SDave Cobbley # If input was supplied, pass to lexer 307eb8dc403SDave Cobbley if input is not None: 308eb8dc403SDave Cobbley lexer.input(input) 309eb8dc403SDave Cobbley 310eb8dc403SDave Cobbley if tokenfunc is None: 311eb8dc403SDave Cobbley # Tokenize function 312eb8dc403SDave Cobbley get_token = lexer.token 313eb8dc403SDave Cobbley else: 314eb8dc403SDave Cobbley get_token = tokenfunc 315eb8dc403SDave Cobbley 316eb8dc403SDave Cobbley # Set up the state and symbol stacks 317eb8dc403SDave Cobbley 318eb8dc403SDave Cobbley statestack = [ ] # Stack of parsing states 319eb8dc403SDave Cobbley self.statestack = statestack 320eb8dc403SDave Cobbley symstack = [ ] # Stack of grammar symbols 321eb8dc403SDave Cobbley self.symstack = symstack 322eb8dc403SDave Cobbley 323eb8dc403SDave Cobbley pslice.stack = symstack # Put in the production 324eb8dc403SDave Cobbley errtoken = None # Err token 325eb8dc403SDave Cobbley 326eb8dc403SDave Cobbley # The start state is assumed to be (0,$end) 327eb8dc403SDave Cobbley 328eb8dc403SDave Cobbley statestack.append(0) 329eb8dc403SDave Cobbley sym = YaccSymbol() 330eb8dc403SDave Cobbley sym.type = "$end" 331eb8dc403SDave Cobbley symstack.append(sym) 332eb8dc403SDave Cobbley state = 0 333eb8dc403SDave Cobbley while 1: 334eb8dc403SDave Cobbley # Get the next symbol on the input. If a lookahead symbol 335eb8dc403SDave Cobbley # is already set, we just use that. Otherwise, we'll pull 336eb8dc403SDave Cobbley # the next token off of the lookaheadstack or from the lexer 337eb8dc403SDave Cobbley 338eb8dc403SDave Cobbley # --! DEBUG 339eb8dc403SDave Cobbley debug.debug('') 340eb8dc403SDave Cobbley debug.debug('State : %s', state) 341eb8dc403SDave Cobbley # --! DEBUG 342eb8dc403SDave Cobbley 343eb8dc403SDave Cobbley if not lookahead: 344eb8dc403SDave Cobbley if not lookaheadstack: 345eb8dc403SDave Cobbley lookahead = get_token() # Get the next token 346eb8dc403SDave Cobbley else: 347eb8dc403SDave Cobbley lookahead = lookaheadstack.pop() 348eb8dc403SDave Cobbley if not lookahead: 349eb8dc403SDave Cobbley lookahead = YaccSymbol() 350eb8dc403SDave Cobbley lookahead.type = "$end" 351eb8dc403SDave Cobbley 352eb8dc403SDave Cobbley # --! DEBUG 353eb8dc403SDave Cobbley debug.debug('Stack : %s', 354eb8dc403SDave Cobbley ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 355eb8dc403SDave Cobbley # --! DEBUG 356eb8dc403SDave Cobbley 357eb8dc403SDave Cobbley # Check the action table 358eb8dc403SDave Cobbley ltype = lookahead.type 359eb8dc403SDave Cobbley t = actions[state].get(ltype) 360eb8dc403SDave Cobbley 361eb8dc403SDave Cobbley if t is not None: 362eb8dc403SDave Cobbley if t > 0: 363eb8dc403SDave Cobbley # shift a symbol on the stack 364eb8dc403SDave Cobbley statestack.append(t) 365eb8dc403SDave Cobbley state = t 366eb8dc403SDave Cobbley 367eb8dc403SDave Cobbley # --! DEBUG 368eb8dc403SDave Cobbley debug.debug("Action : Shift and goto state %s", t) 369eb8dc403SDave Cobbley # --! DEBUG 370eb8dc403SDave Cobbley 371eb8dc403SDave Cobbley symstack.append(lookahead) 372eb8dc403SDave Cobbley lookahead = None 373eb8dc403SDave Cobbley 374eb8dc403SDave Cobbley # Decrease error count on successful shift 375eb8dc403SDave Cobbley if errorcount: errorcount -=1 376eb8dc403SDave Cobbley continue 377eb8dc403SDave Cobbley 378eb8dc403SDave Cobbley if t < 0: 379eb8dc403SDave Cobbley # reduce a symbol on the stack, emit a production 380eb8dc403SDave Cobbley p = prod[-t] 381eb8dc403SDave Cobbley pname = p.name 382eb8dc403SDave Cobbley plen = p.len 383eb8dc403SDave Cobbley 384eb8dc403SDave Cobbley # Get production function 385eb8dc403SDave Cobbley sym = YaccSymbol() 386eb8dc403SDave Cobbley sym.type = pname # Production name 387eb8dc403SDave Cobbley sym.value = None 388eb8dc403SDave Cobbley 389eb8dc403SDave Cobbley # --! DEBUG 390eb8dc403SDave Cobbley if plen: 391eb8dc403SDave Cobbley debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 392eb8dc403SDave Cobbley else: 393eb8dc403SDave Cobbley debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 394eb8dc403SDave Cobbley 395eb8dc403SDave Cobbley # --! DEBUG 396eb8dc403SDave Cobbley 397eb8dc403SDave Cobbley if plen: 398eb8dc403SDave Cobbley targ = symstack[-plen-1:] 399eb8dc403SDave Cobbley targ[0] = sym 400eb8dc403SDave Cobbley 401eb8dc403SDave Cobbley # --! TRACKING 402eb8dc403SDave Cobbley if tracking: 403eb8dc403SDave Cobbley t1 = targ[1] 404eb8dc403SDave Cobbley sym.lineno = t1.lineno 405eb8dc403SDave Cobbley sym.lexpos = t1.lexpos 406eb8dc403SDave Cobbley t1 = targ[-1] 407eb8dc403SDave Cobbley sym.endlineno = getattr(t1,"endlineno",t1.lineno) 408eb8dc403SDave Cobbley sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 409eb8dc403SDave Cobbley 410eb8dc403SDave Cobbley # --! TRACKING 411eb8dc403SDave Cobbley 412eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 413eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 414eb8dc403SDave Cobbley # below as a performance optimization. Make sure 415eb8dc403SDave Cobbley # changes get made in both locations. 416eb8dc403SDave Cobbley 417eb8dc403SDave Cobbley pslice.slice = targ 418eb8dc403SDave Cobbley 419eb8dc403SDave Cobbley try: 420eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 421eb8dc403SDave Cobbley del symstack[-plen:] 422eb8dc403SDave Cobbley del statestack[-plen:] 423eb8dc403SDave Cobbley p.callable(pslice) 424eb8dc403SDave Cobbley # --! DEBUG 425eb8dc403SDave Cobbley debug.info("Result : %s", format_result(pslice[0])) 426eb8dc403SDave Cobbley # --! DEBUG 427eb8dc403SDave Cobbley symstack.append(sym) 428eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 429eb8dc403SDave Cobbley statestack.append(state) 430eb8dc403SDave Cobbley except SyntaxError: 431eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 432eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 433eb8dc403SDave Cobbley symstack.pop() 434eb8dc403SDave Cobbley statestack.pop() 435eb8dc403SDave Cobbley state = statestack[-1] 436eb8dc403SDave Cobbley sym.type = 'error' 437eb8dc403SDave Cobbley lookahead = sym 438eb8dc403SDave Cobbley errorcount = error_count 439eb8dc403SDave Cobbley self.errorok = 0 440eb8dc403SDave Cobbley continue 441eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 442eb8dc403SDave Cobbley 443eb8dc403SDave Cobbley else: 444eb8dc403SDave Cobbley 445eb8dc403SDave Cobbley # --! TRACKING 446eb8dc403SDave Cobbley if tracking: 447eb8dc403SDave Cobbley sym.lineno = lexer.lineno 448eb8dc403SDave Cobbley sym.lexpos = lexer.lexpos 449eb8dc403SDave Cobbley # --! TRACKING 450eb8dc403SDave Cobbley 451eb8dc403SDave Cobbley targ = [ sym ] 452eb8dc403SDave Cobbley 453eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 454eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 455eb8dc403SDave Cobbley # above as a performance optimization. Make sure 456eb8dc403SDave Cobbley # changes get made in both locations. 457eb8dc403SDave Cobbley 458eb8dc403SDave Cobbley pslice.slice = targ 459eb8dc403SDave Cobbley 460eb8dc403SDave Cobbley try: 461eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 462eb8dc403SDave Cobbley p.callable(pslice) 463eb8dc403SDave Cobbley # --! DEBUG 464eb8dc403SDave Cobbley debug.info("Result : %s", format_result(pslice[0])) 465eb8dc403SDave Cobbley # --! DEBUG 466eb8dc403SDave Cobbley symstack.append(sym) 467eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 468eb8dc403SDave Cobbley statestack.append(state) 469eb8dc403SDave Cobbley except SyntaxError: 470eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 471eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 472eb8dc403SDave Cobbley symstack.pop() 473eb8dc403SDave Cobbley statestack.pop() 474eb8dc403SDave Cobbley state = statestack[-1] 475eb8dc403SDave Cobbley sym.type = 'error' 476eb8dc403SDave Cobbley lookahead = sym 477eb8dc403SDave Cobbley errorcount = error_count 478eb8dc403SDave Cobbley self.errorok = 0 479eb8dc403SDave Cobbley continue 480eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 481eb8dc403SDave Cobbley 482eb8dc403SDave Cobbley if t == 0: 483eb8dc403SDave Cobbley n = symstack[-1] 484eb8dc403SDave Cobbley result = getattr(n,"value",None) 485eb8dc403SDave Cobbley # --! DEBUG 486eb8dc403SDave Cobbley debug.info("Done : Returning %s", format_result(result)) 487eb8dc403SDave Cobbley debug.info("PLY: PARSE DEBUG END") 488eb8dc403SDave Cobbley # --! DEBUG 489eb8dc403SDave Cobbley return result 490eb8dc403SDave Cobbley 49182c905dcSAndrew Geissler if t is None: 492eb8dc403SDave Cobbley 493eb8dc403SDave Cobbley # --! DEBUG 494eb8dc403SDave Cobbley debug.error('Error : %s', 495eb8dc403SDave Cobbley ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 496eb8dc403SDave Cobbley # --! DEBUG 497eb8dc403SDave Cobbley 498eb8dc403SDave Cobbley # We have some kind of parsing error here. To handle 499eb8dc403SDave Cobbley # this, we are going to push the current token onto 500eb8dc403SDave Cobbley # the tokenstack and replace it with an 'error' token. 501eb8dc403SDave Cobbley # If there are any synchronization rules, they may 502eb8dc403SDave Cobbley # catch it. 503eb8dc403SDave Cobbley # 504eb8dc403SDave Cobbley # In addition to pushing the error token, we call call 505eb8dc403SDave Cobbley # the user defined p_error() function if this is the 506eb8dc403SDave Cobbley # first syntax error. This function is only called if 507eb8dc403SDave Cobbley # errorcount == 0. 508eb8dc403SDave Cobbley if errorcount == 0 or self.errorok: 509eb8dc403SDave Cobbley errorcount = error_count 510eb8dc403SDave Cobbley self.errorok = 0 511eb8dc403SDave Cobbley errtoken = lookahead 512eb8dc403SDave Cobbley if errtoken.type == "$end": 513eb8dc403SDave Cobbley errtoken = None # End of file! 514eb8dc403SDave Cobbley if self.errorfunc: 515eb8dc403SDave Cobbley global errok,token,restart 516eb8dc403SDave Cobbley errok = self.errok # Set some special functions available in error recovery 517eb8dc403SDave Cobbley token = get_token 518eb8dc403SDave Cobbley restart = self.restart 519eb8dc403SDave Cobbley if errtoken and not hasattr(errtoken,'lexer'): 520eb8dc403SDave Cobbley errtoken.lexer = lexer 521eb8dc403SDave Cobbley tok = self.errorfunc(errtoken) 522eb8dc403SDave Cobbley del errok, token, restart # Delete special functions 523eb8dc403SDave Cobbley 524eb8dc403SDave Cobbley if self.errorok: 525eb8dc403SDave Cobbley # User must have done some kind of panic 526eb8dc403SDave Cobbley # mode recovery on their own. The 527eb8dc403SDave Cobbley # returned token is the next lookahead 528eb8dc403SDave Cobbley lookahead = tok 529eb8dc403SDave Cobbley errtoken = None 530eb8dc403SDave Cobbley continue 531eb8dc403SDave Cobbley else: 532eb8dc403SDave Cobbley if errtoken: 533eb8dc403SDave Cobbley if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 534eb8dc403SDave Cobbley else: lineno = 0 535eb8dc403SDave Cobbley if lineno: 536eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 537eb8dc403SDave Cobbley else: 538eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 539eb8dc403SDave Cobbley else: 540eb8dc403SDave Cobbley sys.stderr.write("yacc: Parse error in input. EOF\n") 541eb8dc403SDave Cobbley return 542eb8dc403SDave Cobbley 543eb8dc403SDave Cobbley else: 544eb8dc403SDave Cobbley errorcount = error_count 545eb8dc403SDave Cobbley 546eb8dc403SDave Cobbley # case 1: the statestack only has 1 entry on it. If we're in this state, the 547eb8dc403SDave Cobbley # entire parse has been rolled back and we're completely hosed. The token is 548eb8dc403SDave Cobbley # discarded and we just keep going. 549eb8dc403SDave Cobbley 550eb8dc403SDave Cobbley if len(statestack) <= 1 and lookahead.type != "$end": 551eb8dc403SDave Cobbley lookahead = None 552eb8dc403SDave Cobbley errtoken = None 553eb8dc403SDave Cobbley state = 0 554eb8dc403SDave Cobbley # Nuke the pushback stack 555eb8dc403SDave Cobbley del lookaheadstack[:] 556eb8dc403SDave Cobbley continue 557eb8dc403SDave Cobbley 558eb8dc403SDave Cobbley # case 2: the statestack has a couple of entries on it, but we're 559eb8dc403SDave Cobbley # at the end of the file. nuke the top entry and generate an error token 560eb8dc403SDave Cobbley 561eb8dc403SDave Cobbley # Start nuking entries on the stack 562eb8dc403SDave Cobbley if lookahead.type == "$end": 563eb8dc403SDave Cobbley # Whoa. We're really hosed here. Bail out 564eb8dc403SDave Cobbley return 565eb8dc403SDave Cobbley 566eb8dc403SDave Cobbley if lookahead.type != 'error': 567eb8dc403SDave Cobbley sym = symstack[-1] 568eb8dc403SDave Cobbley if sym.type == 'error': 569eb8dc403SDave Cobbley # Hmmm. Error is on top of stack, we'll just nuke input 570eb8dc403SDave Cobbley # symbol and continue 571eb8dc403SDave Cobbley lookahead = None 572eb8dc403SDave Cobbley continue 573eb8dc403SDave Cobbley t = YaccSymbol() 574eb8dc403SDave Cobbley t.type = 'error' 575eb8dc403SDave Cobbley if hasattr(lookahead,"lineno"): 576eb8dc403SDave Cobbley t.lineno = lookahead.lineno 577eb8dc403SDave Cobbley t.value = lookahead 578eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 579eb8dc403SDave Cobbley lookahead = t 580eb8dc403SDave Cobbley else: 581eb8dc403SDave Cobbley symstack.pop() 582eb8dc403SDave Cobbley statestack.pop() 583eb8dc403SDave Cobbley state = statestack[-1] # Potential bug fix 584eb8dc403SDave Cobbley 585eb8dc403SDave Cobbley continue 586eb8dc403SDave Cobbley 587eb8dc403SDave Cobbley # Call an error function here 588eb8dc403SDave Cobbley raise RuntimeError("yacc: internal parser error!!!\n") 589eb8dc403SDave Cobbley 590eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 591eb8dc403SDave Cobbley # parseopt(). 592eb8dc403SDave Cobbley # 593eb8dc403SDave Cobbley # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 594eb8dc403SDave Cobbley # Edit the debug version above, then copy any modifications to the method 595eb8dc403SDave Cobbley # below while removing #--! DEBUG sections. 596eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 597eb8dc403SDave Cobbley 598eb8dc403SDave Cobbley 599eb8dc403SDave Cobbley def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 600eb8dc403SDave Cobbley lookahead = None # Current lookahead symbol 601eb8dc403SDave Cobbley lookaheadstack = [ ] # Stack of lookahead symbols 602eb8dc403SDave Cobbley actions = self.action # Local reference to action table (to avoid lookup on self.) 603eb8dc403SDave Cobbley goto = self.goto # Local reference to goto table (to avoid lookup on self.) 604eb8dc403SDave Cobbley prod = self.productions # Local reference to production list (to avoid lookup on self.) 605eb8dc403SDave Cobbley pslice = YaccProduction(None) # Production object passed to grammar rules 606eb8dc403SDave Cobbley errorcount = 0 # Used during error recovery 607eb8dc403SDave Cobbley 608eb8dc403SDave Cobbley # If no lexer was given, we will try to use the lex module 609eb8dc403SDave Cobbley if not lexer: 610eb8dc403SDave Cobbley lex = load_ply_lex() 611eb8dc403SDave Cobbley lexer = lex.lexer 612eb8dc403SDave Cobbley 613eb8dc403SDave Cobbley # Set up the lexer and parser objects on pslice 614eb8dc403SDave Cobbley pslice.lexer = lexer 615eb8dc403SDave Cobbley pslice.parser = self 616eb8dc403SDave Cobbley 617eb8dc403SDave Cobbley # If input was supplied, pass to lexer 618eb8dc403SDave Cobbley if input is not None: 619eb8dc403SDave Cobbley lexer.input(input) 620eb8dc403SDave Cobbley 621eb8dc403SDave Cobbley if tokenfunc is None: 622eb8dc403SDave Cobbley # Tokenize function 623eb8dc403SDave Cobbley get_token = lexer.token 624eb8dc403SDave Cobbley else: 625eb8dc403SDave Cobbley get_token = tokenfunc 626eb8dc403SDave Cobbley 627eb8dc403SDave Cobbley # Set up the state and symbol stacks 628eb8dc403SDave Cobbley 629eb8dc403SDave Cobbley statestack = [ ] # Stack of parsing states 630eb8dc403SDave Cobbley self.statestack = statestack 631eb8dc403SDave Cobbley symstack = [ ] # Stack of grammar symbols 632eb8dc403SDave Cobbley self.symstack = symstack 633eb8dc403SDave Cobbley 634eb8dc403SDave Cobbley pslice.stack = symstack # Put in the production 635eb8dc403SDave Cobbley errtoken = None # Err token 636eb8dc403SDave Cobbley 637eb8dc403SDave Cobbley # The start state is assumed to be (0,$end) 638eb8dc403SDave Cobbley 639eb8dc403SDave Cobbley statestack.append(0) 640eb8dc403SDave Cobbley sym = YaccSymbol() 641eb8dc403SDave Cobbley sym.type = '$end' 642eb8dc403SDave Cobbley symstack.append(sym) 643eb8dc403SDave Cobbley state = 0 644eb8dc403SDave Cobbley while 1: 645eb8dc403SDave Cobbley # Get the next symbol on the input. If a lookahead symbol 646eb8dc403SDave Cobbley # is already set, we just use that. Otherwise, we'll pull 647eb8dc403SDave Cobbley # the next token off of the lookaheadstack or from the lexer 648eb8dc403SDave Cobbley 649eb8dc403SDave Cobbley if not lookahead: 650eb8dc403SDave Cobbley if not lookaheadstack: 651eb8dc403SDave Cobbley lookahead = get_token() # Get the next token 652eb8dc403SDave Cobbley else: 653eb8dc403SDave Cobbley lookahead = lookaheadstack.pop() 654eb8dc403SDave Cobbley if not lookahead: 655eb8dc403SDave Cobbley lookahead = YaccSymbol() 656eb8dc403SDave Cobbley lookahead.type = '$end' 657eb8dc403SDave Cobbley 658eb8dc403SDave Cobbley # Check the action table 659eb8dc403SDave Cobbley ltype = lookahead.type 660eb8dc403SDave Cobbley t = actions[state].get(ltype) 661eb8dc403SDave Cobbley 662eb8dc403SDave Cobbley if t is not None: 663eb8dc403SDave Cobbley if t > 0: 664eb8dc403SDave Cobbley # shift a symbol on the stack 665eb8dc403SDave Cobbley statestack.append(t) 666eb8dc403SDave Cobbley state = t 667eb8dc403SDave Cobbley 668eb8dc403SDave Cobbley symstack.append(lookahead) 669eb8dc403SDave Cobbley lookahead = None 670eb8dc403SDave Cobbley 671eb8dc403SDave Cobbley # Decrease error count on successful shift 672eb8dc403SDave Cobbley if errorcount: errorcount -=1 673eb8dc403SDave Cobbley continue 674eb8dc403SDave Cobbley 675eb8dc403SDave Cobbley if t < 0: 676eb8dc403SDave Cobbley # reduce a symbol on the stack, emit a production 677eb8dc403SDave Cobbley p = prod[-t] 678eb8dc403SDave Cobbley pname = p.name 679eb8dc403SDave Cobbley plen = p.len 680eb8dc403SDave Cobbley 681eb8dc403SDave Cobbley # Get production function 682eb8dc403SDave Cobbley sym = YaccSymbol() 683eb8dc403SDave Cobbley sym.type = pname # Production name 684eb8dc403SDave Cobbley sym.value = None 685eb8dc403SDave Cobbley 686eb8dc403SDave Cobbley if plen: 687eb8dc403SDave Cobbley targ = symstack[-plen-1:] 688eb8dc403SDave Cobbley targ[0] = sym 689eb8dc403SDave Cobbley 690eb8dc403SDave Cobbley # --! TRACKING 691eb8dc403SDave Cobbley if tracking: 692eb8dc403SDave Cobbley t1 = targ[1] 693eb8dc403SDave Cobbley sym.lineno = t1.lineno 694eb8dc403SDave Cobbley sym.lexpos = t1.lexpos 695eb8dc403SDave Cobbley t1 = targ[-1] 696eb8dc403SDave Cobbley sym.endlineno = getattr(t1,"endlineno",t1.lineno) 697eb8dc403SDave Cobbley sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 698eb8dc403SDave Cobbley 699eb8dc403SDave Cobbley # --! TRACKING 700eb8dc403SDave Cobbley 701eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 702eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 703eb8dc403SDave Cobbley # below as a performance optimization. Make sure 704eb8dc403SDave Cobbley # changes get made in both locations. 705eb8dc403SDave Cobbley 706eb8dc403SDave Cobbley pslice.slice = targ 707eb8dc403SDave Cobbley 708eb8dc403SDave Cobbley try: 709eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 710eb8dc403SDave Cobbley del symstack[-plen:] 711eb8dc403SDave Cobbley del statestack[-plen:] 712eb8dc403SDave Cobbley p.callable(pslice) 713eb8dc403SDave Cobbley symstack.append(sym) 714eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 715eb8dc403SDave Cobbley statestack.append(state) 716eb8dc403SDave Cobbley except SyntaxError: 717eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 718eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 719eb8dc403SDave Cobbley symstack.pop() 720eb8dc403SDave Cobbley statestack.pop() 721eb8dc403SDave Cobbley state = statestack[-1] 722eb8dc403SDave Cobbley sym.type = 'error' 723eb8dc403SDave Cobbley lookahead = sym 724eb8dc403SDave Cobbley errorcount = error_count 725eb8dc403SDave Cobbley self.errorok = 0 726eb8dc403SDave Cobbley continue 727eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 728eb8dc403SDave Cobbley 729eb8dc403SDave Cobbley else: 730eb8dc403SDave Cobbley 731eb8dc403SDave Cobbley # --! TRACKING 732eb8dc403SDave Cobbley if tracking: 733eb8dc403SDave Cobbley sym.lineno = lexer.lineno 734eb8dc403SDave Cobbley sym.lexpos = lexer.lexpos 735eb8dc403SDave Cobbley # --! TRACKING 736eb8dc403SDave Cobbley 737eb8dc403SDave Cobbley targ = [ sym ] 738eb8dc403SDave Cobbley 739eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 740eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 741eb8dc403SDave Cobbley # above as a performance optimization. Make sure 742eb8dc403SDave Cobbley # changes get made in both locations. 743eb8dc403SDave Cobbley 744eb8dc403SDave Cobbley pslice.slice = targ 745eb8dc403SDave Cobbley 746eb8dc403SDave Cobbley try: 747eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 748eb8dc403SDave Cobbley p.callable(pslice) 749eb8dc403SDave Cobbley symstack.append(sym) 750eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 751eb8dc403SDave Cobbley statestack.append(state) 752eb8dc403SDave Cobbley except SyntaxError: 753eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 754eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 755eb8dc403SDave Cobbley symstack.pop() 756eb8dc403SDave Cobbley statestack.pop() 757eb8dc403SDave Cobbley state = statestack[-1] 758eb8dc403SDave Cobbley sym.type = 'error' 759eb8dc403SDave Cobbley lookahead = sym 760eb8dc403SDave Cobbley errorcount = error_count 761eb8dc403SDave Cobbley self.errorok = 0 762eb8dc403SDave Cobbley continue 763eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 764eb8dc403SDave Cobbley 765eb8dc403SDave Cobbley if t == 0: 766eb8dc403SDave Cobbley n = symstack[-1] 767eb8dc403SDave Cobbley return getattr(n,"value",None) 768eb8dc403SDave Cobbley 76982c905dcSAndrew Geissler if t is None: 770eb8dc403SDave Cobbley 771eb8dc403SDave Cobbley # We have some kind of parsing error here. To handle 772eb8dc403SDave Cobbley # this, we are going to push the current token onto 773eb8dc403SDave Cobbley # the tokenstack and replace it with an 'error' token. 774eb8dc403SDave Cobbley # If there are any synchronization rules, they may 775eb8dc403SDave Cobbley # catch it. 776eb8dc403SDave Cobbley # 777eb8dc403SDave Cobbley # In addition to pushing the error token, we call call 778eb8dc403SDave Cobbley # the user defined p_error() function if this is the 779eb8dc403SDave Cobbley # first syntax error. This function is only called if 780eb8dc403SDave Cobbley # errorcount == 0. 781eb8dc403SDave Cobbley if errorcount == 0 or self.errorok: 782eb8dc403SDave Cobbley errorcount = error_count 783eb8dc403SDave Cobbley self.errorok = 0 784eb8dc403SDave Cobbley errtoken = lookahead 785eb8dc403SDave Cobbley if errtoken.type == '$end': 786eb8dc403SDave Cobbley errtoken = None # End of file! 787eb8dc403SDave Cobbley if self.errorfunc: 788eb8dc403SDave Cobbley global errok,token,restart 789eb8dc403SDave Cobbley errok = self.errok # Set some special functions available in error recovery 790eb8dc403SDave Cobbley token = get_token 791eb8dc403SDave Cobbley restart = self.restart 792eb8dc403SDave Cobbley if errtoken and not hasattr(errtoken,'lexer'): 793eb8dc403SDave Cobbley errtoken.lexer = lexer 794eb8dc403SDave Cobbley tok = self.errorfunc(errtoken) 795eb8dc403SDave Cobbley del errok, token, restart # Delete special functions 796eb8dc403SDave Cobbley 797eb8dc403SDave Cobbley if self.errorok: 798eb8dc403SDave Cobbley # User must have done some kind of panic 799eb8dc403SDave Cobbley # mode recovery on their own. The 800eb8dc403SDave Cobbley # returned token is the next lookahead 801eb8dc403SDave Cobbley lookahead = tok 802eb8dc403SDave Cobbley errtoken = None 803eb8dc403SDave Cobbley continue 804eb8dc403SDave Cobbley else: 805eb8dc403SDave Cobbley if errtoken: 806eb8dc403SDave Cobbley if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 807eb8dc403SDave Cobbley else: lineno = 0 808eb8dc403SDave Cobbley if lineno: 809eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 810eb8dc403SDave Cobbley else: 811eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 812eb8dc403SDave Cobbley else: 813eb8dc403SDave Cobbley sys.stderr.write("yacc: Parse error in input. EOF\n") 814eb8dc403SDave Cobbley return 815eb8dc403SDave Cobbley 816eb8dc403SDave Cobbley else: 817eb8dc403SDave Cobbley errorcount = error_count 818eb8dc403SDave Cobbley 819eb8dc403SDave Cobbley # case 1: the statestack only has 1 entry on it. If we're in this state, the 820eb8dc403SDave Cobbley # entire parse has been rolled back and we're completely hosed. The token is 821eb8dc403SDave Cobbley # discarded and we just keep going. 822eb8dc403SDave Cobbley 823eb8dc403SDave Cobbley if len(statestack) <= 1 and lookahead.type != '$end': 824eb8dc403SDave Cobbley lookahead = None 825eb8dc403SDave Cobbley errtoken = None 826eb8dc403SDave Cobbley state = 0 827eb8dc403SDave Cobbley # Nuke the pushback stack 828eb8dc403SDave Cobbley del lookaheadstack[:] 829eb8dc403SDave Cobbley continue 830eb8dc403SDave Cobbley 831eb8dc403SDave Cobbley # case 2: the statestack has a couple of entries on it, but we're 832eb8dc403SDave Cobbley # at the end of the file. nuke the top entry and generate an error token 833eb8dc403SDave Cobbley 834eb8dc403SDave Cobbley # Start nuking entries on the stack 835eb8dc403SDave Cobbley if lookahead.type == '$end': 836eb8dc403SDave Cobbley # Whoa. We're really hosed here. Bail out 837eb8dc403SDave Cobbley return 838eb8dc403SDave Cobbley 839eb8dc403SDave Cobbley if lookahead.type != 'error': 840eb8dc403SDave Cobbley sym = symstack[-1] 841eb8dc403SDave Cobbley if sym.type == 'error': 842eb8dc403SDave Cobbley # Hmmm. Error is on top of stack, we'll just nuke input 843eb8dc403SDave Cobbley # symbol and continue 844eb8dc403SDave Cobbley lookahead = None 845eb8dc403SDave Cobbley continue 846eb8dc403SDave Cobbley t = YaccSymbol() 847eb8dc403SDave Cobbley t.type = 'error' 848eb8dc403SDave Cobbley if hasattr(lookahead,"lineno"): 849eb8dc403SDave Cobbley t.lineno = lookahead.lineno 850eb8dc403SDave Cobbley t.value = lookahead 851eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 852eb8dc403SDave Cobbley lookahead = t 853eb8dc403SDave Cobbley else: 854eb8dc403SDave Cobbley symstack.pop() 855eb8dc403SDave Cobbley statestack.pop() 856eb8dc403SDave Cobbley state = statestack[-1] # Potential bug fix 857eb8dc403SDave Cobbley 858eb8dc403SDave Cobbley continue 859eb8dc403SDave Cobbley 860eb8dc403SDave Cobbley # Call an error function here 861eb8dc403SDave Cobbley raise RuntimeError("yacc: internal parser error!!!\n") 862eb8dc403SDave Cobbley 863eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 864eb8dc403SDave Cobbley # parseopt_notrack(). 865eb8dc403SDave Cobbley # 866eb8dc403SDave Cobbley # Optimized version of parseopt() with line number tracking removed. 867eb8dc403SDave Cobbley # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 868eb8dc403SDave Cobbley # code in the #--! TRACKING sections 869eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 870eb8dc403SDave Cobbley 871eb8dc403SDave Cobbley def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 872eb8dc403SDave Cobbley lookahead = None # Current lookahead symbol 873eb8dc403SDave Cobbley lookaheadstack = [ ] # Stack of lookahead symbols 874eb8dc403SDave Cobbley actions = self.action # Local reference to action table (to avoid lookup on self.) 875eb8dc403SDave Cobbley goto = self.goto # Local reference to goto table (to avoid lookup on self.) 876eb8dc403SDave Cobbley prod = self.productions # Local reference to production list (to avoid lookup on self.) 877eb8dc403SDave Cobbley pslice = YaccProduction(None) # Production object passed to grammar rules 878eb8dc403SDave Cobbley errorcount = 0 # Used during error recovery 879eb8dc403SDave Cobbley 880eb8dc403SDave Cobbley # If no lexer was given, we will try to use the lex module 881eb8dc403SDave Cobbley if not lexer: 882eb8dc403SDave Cobbley lex = load_ply_lex() 883eb8dc403SDave Cobbley lexer = lex.lexer 884eb8dc403SDave Cobbley 885eb8dc403SDave Cobbley # Set up the lexer and parser objects on pslice 886eb8dc403SDave Cobbley pslice.lexer = lexer 887eb8dc403SDave Cobbley pslice.parser = self 888eb8dc403SDave Cobbley 889eb8dc403SDave Cobbley # If input was supplied, pass to lexer 890eb8dc403SDave Cobbley if input is not None: 891eb8dc403SDave Cobbley lexer.input(input) 892eb8dc403SDave Cobbley 893eb8dc403SDave Cobbley if tokenfunc is None: 894eb8dc403SDave Cobbley # Tokenize function 895eb8dc403SDave Cobbley get_token = lexer.token 896eb8dc403SDave Cobbley else: 897eb8dc403SDave Cobbley get_token = tokenfunc 898eb8dc403SDave Cobbley 899eb8dc403SDave Cobbley # Set up the state and symbol stacks 900eb8dc403SDave Cobbley 901eb8dc403SDave Cobbley statestack = [ ] # Stack of parsing states 902eb8dc403SDave Cobbley self.statestack = statestack 903eb8dc403SDave Cobbley symstack = [ ] # Stack of grammar symbols 904eb8dc403SDave Cobbley self.symstack = symstack 905eb8dc403SDave Cobbley 906eb8dc403SDave Cobbley pslice.stack = symstack # Put in the production 907eb8dc403SDave Cobbley errtoken = None # Err token 908eb8dc403SDave Cobbley 909eb8dc403SDave Cobbley # The start state is assumed to be (0,$end) 910eb8dc403SDave Cobbley 911eb8dc403SDave Cobbley statestack.append(0) 912eb8dc403SDave Cobbley sym = YaccSymbol() 913eb8dc403SDave Cobbley sym.type = '$end' 914eb8dc403SDave Cobbley symstack.append(sym) 915eb8dc403SDave Cobbley state = 0 916eb8dc403SDave Cobbley while 1: 917eb8dc403SDave Cobbley # Get the next symbol on the input. If a lookahead symbol 918eb8dc403SDave Cobbley # is already set, we just use that. Otherwise, we'll pull 919eb8dc403SDave Cobbley # the next token off of the lookaheadstack or from the lexer 920eb8dc403SDave Cobbley 921eb8dc403SDave Cobbley if not lookahead: 922eb8dc403SDave Cobbley if not lookaheadstack: 923eb8dc403SDave Cobbley lookahead = get_token() # Get the next token 924eb8dc403SDave Cobbley else: 925eb8dc403SDave Cobbley lookahead = lookaheadstack.pop() 926eb8dc403SDave Cobbley if not lookahead: 927eb8dc403SDave Cobbley lookahead = YaccSymbol() 928eb8dc403SDave Cobbley lookahead.type = '$end' 929eb8dc403SDave Cobbley 930eb8dc403SDave Cobbley # Check the action table 931eb8dc403SDave Cobbley ltype = lookahead.type 932eb8dc403SDave Cobbley t = actions[state].get(ltype) 933eb8dc403SDave Cobbley 934eb8dc403SDave Cobbley if t is not None: 935eb8dc403SDave Cobbley if t > 0: 936eb8dc403SDave Cobbley # shift a symbol on the stack 937eb8dc403SDave Cobbley statestack.append(t) 938eb8dc403SDave Cobbley state = t 939eb8dc403SDave Cobbley 940eb8dc403SDave Cobbley symstack.append(lookahead) 941eb8dc403SDave Cobbley lookahead = None 942eb8dc403SDave Cobbley 943eb8dc403SDave Cobbley # Decrease error count on successful shift 944eb8dc403SDave Cobbley if errorcount: errorcount -=1 945eb8dc403SDave Cobbley continue 946eb8dc403SDave Cobbley 947eb8dc403SDave Cobbley if t < 0: 948eb8dc403SDave Cobbley # reduce a symbol on the stack, emit a production 949eb8dc403SDave Cobbley p = prod[-t] 950eb8dc403SDave Cobbley pname = p.name 951eb8dc403SDave Cobbley plen = p.len 952eb8dc403SDave Cobbley 953eb8dc403SDave Cobbley # Get production function 954eb8dc403SDave Cobbley sym = YaccSymbol() 955eb8dc403SDave Cobbley sym.type = pname # Production name 956eb8dc403SDave Cobbley sym.value = None 957eb8dc403SDave Cobbley 958eb8dc403SDave Cobbley if plen: 959eb8dc403SDave Cobbley targ = symstack[-plen-1:] 960eb8dc403SDave Cobbley targ[0] = sym 961eb8dc403SDave Cobbley 962eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 963eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 964eb8dc403SDave Cobbley # below as a performance optimization. Make sure 965eb8dc403SDave Cobbley # changes get made in both locations. 966eb8dc403SDave Cobbley 967eb8dc403SDave Cobbley pslice.slice = targ 968eb8dc403SDave Cobbley 969eb8dc403SDave Cobbley try: 970eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 971eb8dc403SDave Cobbley del symstack[-plen:] 972eb8dc403SDave Cobbley del statestack[-plen:] 973eb8dc403SDave Cobbley p.callable(pslice) 974eb8dc403SDave Cobbley symstack.append(sym) 975eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 976eb8dc403SDave Cobbley statestack.append(state) 977eb8dc403SDave Cobbley except SyntaxError: 978eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 979eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 980eb8dc403SDave Cobbley symstack.pop() 981eb8dc403SDave Cobbley statestack.pop() 982eb8dc403SDave Cobbley state = statestack[-1] 983eb8dc403SDave Cobbley sym.type = 'error' 984eb8dc403SDave Cobbley lookahead = sym 985eb8dc403SDave Cobbley errorcount = error_count 986eb8dc403SDave Cobbley self.errorok = 0 987eb8dc403SDave Cobbley continue 988eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 989eb8dc403SDave Cobbley 990eb8dc403SDave Cobbley else: 991eb8dc403SDave Cobbley 992eb8dc403SDave Cobbley targ = [ sym ] 993eb8dc403SDave Cobbley 994eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 995eb8dc403SDave Cobbley # The code enclosed in this section is duplicated 996eb8dc403SDave Cobbley # above as a performance optimization. Make sure 997eb8dc403SDave Cobbley # changes get made in both locations. 998eb8dc403SDave Cobbley 999eb8dc403SDave Cobbley pslice.slice = targ 1000eb8dc403SDave Cobbley 1001eb8dc403SDave Cobbley try: 1002eb8dc403SDave Cobbley # Call the grammar rule with our special slice object 1003eb8dc403SDave Cobbley p.callable(pslice) 1004eb8dc403SDave Cobbley symstack.append(sym) 1005eb8dc403SDave Cobbley state = goto[statestack[-1]][pname] 1006eb8dc403SDave Cobbley statestack.append(state) 1007eb8dc403SDave Cobbley except SyntaxError: 1008eb8dc403SDave Cobbley # If an error was set. Enter error recovery state 1009eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 1010eb8dc403SDave Cobbley symstack.pop() 1011eb8dc403SDave Cobbley statestack.pop() 1012eb8dc403SDave Cobbley state = statestack[-1] 1013eb8dc403SDave Cobbley sym.type = 'error' 1014eb8dc403SDave Cobbley lookahead = sym 1015eb8dc403SDave Cobbley errorcount = error_count 1016eb8dc403SDave Cobbley self.errorok = 0 1017eb8dc403SDave Cobbley continue 1018eb8dc403SDave Cobbley # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1019eb8dc403SDave Cobbley 1020eb8dc403SDave Cobbley if t == 0: 1021eb8dc403SDave Cobbley n = symstack[-1] 1022eb8dc403SDave Cobbley return getattr(n,"value",None) 1023eb8dc403SDave Cobbley 102482c905dcSAndrew Geissler if t is None: 1025eb8dc403SDave Cobbley 1026eb8dc403SDave Cobbley # We have some kind of parsing error here. To handle 1027eb8dc403SDave Cobbley # this, we are going to push the current token onto 1028eb8dc403SDave Cobbley # the tokenstack and replace it with an 'error' token. 1029eb8dc403SDave Cobbley # If there are any synchronization rules, they may 1030eb8dc403SDave Cobbley # catch it. 1031eb8dc403SDave Cobbley # 1032eb8dc403SDave Cobbley # In addition to pushing the error token, we call call 1033eb8dc403SDave Cobbley # the user defined p_error() function if this is the 1034eb8dc403SDave Cobbley # first syntax error. This function is only called if 1035eb8dc403SDave Cobbley # errorcount == 0. 1036eb8dc403SDave Cobbley if errorcount == 0 or self.errorok: 1037eb8dc403SDave Cobbley errorcount = error_count 1038eb8dc403SDave Cobbley self.errorok = 0 1039eb8dc403SDave Cobbley errtoken = lookahead 1040eb8dc403SDave Cobbley if errtoken.type == '$end': 1041eb8dc403SDave Cobbley errtoken = None # End of file! 1042eb8dc403SDave Cobbley if self.errorfunc: 1043eb8dc403SDave Cobbley global errok,token,restart 1044eb8dc403SDave Cobbley errok = self.errok # Set some special functions available in error recovery 1045eb8dc403SDave Cobbley token = get_token 1046eb8dc403SDave Cobbley restart = self.restart 1047eb8dc403SDave Cobbley if errtoken and not hasattr(errtoken,'lexer'): 1048eb8dc403SDave Cobbley errtoken.lexer = lexer 1049eb8dc403SDave Cobbley tok = self.errorfunc(errtoken) 1050eb8dc403SDave Cobbley del errok, token, restart # Delete special functions 1051eb8dc403SDave Cobbley 1052eb8dc403SDave Cobbley if self.errorok: 1053eb8dc403SDave Cobbley # User must have done some kind of panic 1054eb8dc403SDave Cobbley # mode recovery on their own. The 1055eb8dc403SDave Cobbley # returned token is the next lookahead 1056eb8dc403SDave Cobbley lookahead = tok 1057eb8dc403SDave Cobbley errtoken = None 1058eb8dc403SDave Cobbley continue 1059eb8dc403SDave Cobbley else: 1060eb8dc403SDave Cobbley if errtoken: 1061eb8dc403SDave Cobbley if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1062eb8dc403SDave Cobbley else: lineno = 0 1063eb8dc403SDave Cobbley if lineno: 1064eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1065eb8dc403SDave Cobbley else: 1066eb8dc403SDave Cobbley sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1067eb8dc403SDave Cobbley else: 1068eb8dc403SDave Cobbley sys.stderr.write("yacc: Parse error in input. EOF\n") 1069eb8dc403SDave Cobbley return 1070eb8dc403SDave Cobbley 1071eb8dc403SDave Cobbley else: 1072eb8dc403SDave Cobbley errorcount = error_count 1073eb8dc403SDave Cobbley 1074eb8dc403SDave Cobbley # case 1: the statestack only has 1 entry on it. If we're in this state, the 1075eb8dc403SDave Cobbley # entire parse has been rolled back and we're completely hosed. The token is 1076eb8dc403SDave Cobbley # discarded and we just keep going. 1077eb8dc403SDave Cobbley 1078eb8dc403SDave Cobbley if len(statestack) <= 1 and lookahead.type != '$end': 1079eb8dc403SDave Cobbley lookahead = None 1080eb8dc403SDave Cobbley errtoken = None 1081eb8dc403SDave Cobbley state = 0 1082eb8dc403SDave Cobbley # Nuke the pushback stack 1083eb8dc403SDave Cobbley del lookaheadstack[:] 1084eb8dc403SDave Cobbley continue 1085eb8dc403SDave Cobbley 1086eb8dc403SDave Cobbley # case 2: the statestack has a couple of entries on it, but we're 1087eb8dc403SDave Cobbley # at the end of the file. nuke the top entry and generate an error token 1088eb8dc403SDave Cobbley 1089eb8dc403SDave Cobbley # Start nuking entries on the stack 1090eb8dc403SDave Cobbley if lookahead.type == '$end': 1091eb8dc403SDave Cobbley # Whoa. We're really hosed here. Bail out 1092eb8dc403SDave Cobbley return 1093eb8dc403SDave Cobbley 1094eb8dc403SDave Cobbley if lookahead.type != 'error': 1095eb8dc403SDave Cobbley sym = symstack[-1] 1096eb8dc403SDave Cobbley if sym.type == 'error': 1097eb8dc403SDave Cobbley # Hmmm. Error is on top of stack, we'll just nuke input 1098eb8dc403SDave Cobbley # symbol and continue 1099eb8dc403SDave Cobbley lookahead = None 1100eb8dc403SDave Cobbley continue 1101eb8dc403SDave Cobbley t = YaccSymbol() 1102eb8dc403SDave Cobbley t.type = 'error' 1103eb8dc403SDave Cobbley if hasattr(lookahead,"lineno"): 1104eb8dc403SDave Cobbley t.lineno = lookahead.lineno 1105eb8dc403SDave Cobbley t.value = lookahead 1106eb8dc403SDave Cobbley lookaheadstack.append(lookahead) 1107eb8dc403SDave Cobbley lookahead = t 1108eb8dc403SDave Cobbley else: 1109eb8dc403SDave Cobbley symstack.pop() 1110eb8dc403SDave Cobbley statestack.pop() 1111eb8dc403SDave Cobbley state = statestack[-1] # Potential bug fix 1112eb8dc403SDave Cobbley 1113eb8dc403SDave Cobbley continue 1114eb8dc403SDave Cobbley 1115eb8dc403SDave Cobbley # Call an error function here 1116eb8dc403SDave Cobbley raise RuntimeError("yacc: internal parser error!!!\n") 1117eb8dc403SDave Cobbley 1118eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1119eb8dc403SDave Cobbley# === Grammar Representation === 1120eb8dc403SDave Cobbley# 1121eb8dc403SDave Cobbley# The following functions, classes, and variables are used to represent and 1122eb8dc403SDave Cobbley# manipulate the rules that make up a grammar. 1123eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1124eb8dc403SDave Cobbley 1125eb8dc403SDave Cobbleyimport re 1126eb8dc403SDave Cobbley 1127eb8dc403SDave Cobbley# regex matching identifiers 1128eb8dc403SDave Cobbley_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1129eb8dc403SDave Cobbley 1130eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1131eb8dc403SDave Cobbley# class Production: 1132eb8dc403SDave Cobbley# 1133eb8dc403SDave Cobbley# This class stores the raw information about a single production or grammar rule. 1134eb8dc403SDave Cobbley# A grammar rule refers to a specification such as this: 1135eb8dc403SDave Cobbley# 1136eb8dc403SDave Cobbley# expr : expr PLUS term 1137eb8dc403SDave Cobbley# 1138eb8dc403SDave Cobbley# Here are the basic attributes defined on all productions 1139eb8dc403SDave Cobbley# 1140eb8dc403SDave Cobbley# name - Name of the production. For example 'expr' 1141eb8dc403SDave Cobbley# prod - A list of symbols on the right side ['expr','PLUS','term'] 1142eb8dc403SDave Cobbley# prec - Production precedence level 1143eb8dc403SDave Cobbley# number - Production number. 1144eb8dc403SDave Cobbley# func - Function that executes on reduce 1145eb8dc403SDave Cobbley# file - File where production function is defined 1146eb8dc403SDave Cobbley# lineno - Line number where production function is defined 1147eb8dc403SDave Cobbley# 1148eb8dc403SDave Cobbley# The following attributes are defined or optional. 1149eb8dc403SDave Cobbley# 1150eb8dc403SDave Cobbley# len - Length of the production (number of symbols on right hand side) 1151eb8dc403SDave Cobbley# usyms - Set of unique symbols found in the production 1152eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1153eb8dc403SDave Cobbley 1154eb8dc403SDave Cobbleyclass Production(object): 1155eb8dc403SDave Cobbley reduced = 0 1156eb8dc403SDave Cobbley def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 1157eb8dc403SDave Cobbley self.name = name 1158eb8dc403SDave Cobbley self.prod = tuple(prod) 1159eb8dc403SDave Cobbley self.number = number 1160eb8dc403SDave Cobbley self.func = func 1161eb8dc403SDave Cobbley self.callable = None 1162eb8dc403SDave Cobbley self.file = file 1163eb8dc403SDave Cobbley self.line = line 1164eb8dc403SDave Cobbley self.prec = precedence 1165eb8dc403SDave Cobbley 1166eb8dc403SDave Cobbley # Internal settings used during table construction 1167eb8dc403SDave Cobbley 1168eb8dc403SDave Cobbley self.len = len(self.prod) # Length of the production 1169eb8dc403SDave Cobbley 1170eb8dc403SDave Cobbley # Create a list of unique production symbols used in the production 1171eb8dc403SDave Cobbley self.usyms = [ ] 1172eb8dc403SDave Cobbley for s in self.prod: 1173eb8dc403SDave Cobbley if s not in self.usyms: 1174eb8dc403SDave Cobbley self.usyms.append(s) 1175eb8dc403SDave Cobbley 1176eb8dc403SDave Cobbley # List of all LR items for the production 1177eb8dc403SDave Cobbley self.lr_items = [] 1178eb8dc403SDave Cobbley self.lr_next = None 1179eb8dc403SDave Cobbley 1180eb8dc403SDave Cobbley # Create a string representation 1181eb8dc403SDave Cobbley if self.prod: 1182eb8dc403SDave Cobbley self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1183eb8dc403SDave Cobbley else: 1184eb8dc403SDave Cobbley self.str = "%s -> <empty>" % self.name 1185eb8dc403SDave Cobbley 1186eb8dc403SDave Cobbley def __str__(self): 1187eb8dc403SDave Cobbley return self.str 1188eb8dc403SDave Cobbley 1189eb8dc403SDave Cobbley def __repr__(self): 1190eb8dc403SDave Cobbley return "Production("+str(self)+")" 1191eb8dc403SDave Cobbley 1192eb8dc403SDave Cobbley def __len__(self): 1193eb8dc403SDave Cobbley return len(self.prod) 1194eb8dc403SDave Cobbley 1195eb8dc403SDave Cobbley def __nonzero__(self): 1196eb8dc403SDave Cobbley return 1 1197eb8dc403SDave Cobbley 1198eb8dc403SDave Cobbley def __getitem__(self,index): 1199eb8dc403SDave Cobbley return self.prod[index] 1200eb8dc403SDave Cobbley 1201eb8dc403SDave Cobbley # Return the nth lr_item from the production (or None if at the end) 1202eb8dc403SDave Cobbley def lr_item(self,n): 1203eb8dc403SDave Cobbley if n > len(self.prod): return None 1204eb8dc403SDave Cobbley p = LRItem(self,n) 1205eb8dc403SDave Cobbley 1206eb8dc403SDave Cobbley # Precompute the list of productions immediately following. Hack. Remove later 1207eb8dc403SDave Cobbley try: 1208c9f7865aSAndrew Geissler p.lr_after = self.Prodnames[p.prod[n+1]] 1209eb8dc403SDave Cobbley except (IndexError,KeyError): 1210eb8dc403SDave Cobbley p.lr_after = [] 1211eb8dc403SDave Cobbley try: 1212eb8dc403SDave Cobbley p.lr_before = p.prod[n-1] 1213eb8dc403SDave Cobbley except IndexError: 1214eb8dc403SDave Cobbley p.lr_before = None 1215eb8dc403SDave Cobbley 1216eb8dc403SDave Cobbley return p 1217eb8dc403SDave Cobbley 1218eb8dc403SDave Cobbley # Bind the production function name to a callable 1219eb8dc403SDave Cobbley def bind(self,pdict): 1220eb8dc403SDave Cobbley if self.func: 1221eb8dc403SDave Cobbley self.callable = pdict[self.func] 1222eb8dc403SDave Cobbley 1223eb8dc403SDave Cobbley# This class serves as a minimal standin for Production objects when 1224eb8dc403SDave Cobbley# reading table data from files. It only contains information 1225eb8dc403SDave Cobbley# actually used by the LR parsing engine, plus some additional 1226eb8dc403SDave Cobbley# debugging information. 1227eb8dc403SDave Cobbleyclass MiniProduction(object): 1228eb8dc403SDave Cobbley def __init__(self,str,name,len,func,file,line): 1229eb8dc403SDave Cobbley self.name = name 1230eb8dc403SDave Cobbley self.len = len 1231eb8dc403SDave Cobbley self.func = func 1232eb8dc403SDave Cobbley self.callable = None 1233eb8dc403SDave Cobbley self.file = file 1234eb8dc403SDave Cobbley self.line = line 1235eb8dc403SDave Cobbley self.str = str 1236eb8dc403SDave Cobbley def __str__(self): 1237eb8dc403SDave Cobbley return self.str 1238eb8dc403SDave Cobbley def __repr__(self): 1239eb8dc403SDave Cobbley return "MiniProduction(%s)" % self.str 1240eb8dc403SDave Cobbley 1241eb8dc403SDave Cobbley # Bind the production function name to a callable 1242eb8dc403SDave Cobbley def bind(self,pdict): 1243eb8dc403SDave Cobbley if self.func: 1244eb8dc403SDave Cobbley self.callable = pdict[self.func] 1245eb8dc403SDave Cobbley 1246eb8dc403SDave Cobbley 1247eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1248eb8dc403SDave Cobbley# class LRItem 1249eb8dc403SDave Cobbley# 1250eb8dc403SDave Cobbley# This class represents a specific stage of parsing a production rule. For 1251eb8dc403SDave Cobbley# example: 1252eb8dc403SDave Cobbley# 1253eb8dc403SDave Cobbley# expr : expr . PLUS term 1254eb8dc403SDave Cobbley# 1255eb8dc403SDave Cobbley# In the above, the "." represents the current location of the parse. Here 1256eb8dc403SDave Cobbley# basic attributes: 1257eb8dc403SDave Cobbley# 1258eb8dc403SDave Cobbley# name - Name of the production. For example 'expr' 1259eb8dc403SDave Cobbley# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1260eb8dc403SDave Cobbley# number - Production number. 1261eb8dc403SDave Cobbley# 1262eb8dc403SDave Cobbley# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1263eb8dc403SDave Cobbley# then lr_next refers to 'expr -> expr PLUS . term' 1264eb8dc403SDave Cobbley# lr_index - LR item index (location of the ".") in the prod list. 1265eb8dc403SDave Cobbley# lookaheads - LALR lookahead symbols for this item 1266eb8dc403SDave Cobbley# len - Length of the production (number of symbols on right hand side) 1267eb8dc403SDave Cobbley# lr_after - List of all productions that immediately follow 1268eb8dc403SDave Cobbley# lr_before - Grammar symbol immediately before 1269eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1270eb8dc403SDave Cobbley 1271eb8dc403SDave Cobbleyclass LRItem(object): 1272eb8dc403SDave Cobbley def __init__(self,p,n): 1273eb8dc403SDave Cobbley self.name = p.name 1274eb8dc403SDave Cobbley self.prod = list(p.prod) 1275eb8dc403SDave Cobbley self.number = p.number 1276eb8dc403SDave Cobbley self.lr_index = n 1277eb8dc403SDave Cobbley self.lookaheads = { } 1278eb8dc403SDave Cobbley self.prod.insert(n,".") 1279eb8dc403SDave Cobbley self.prod = tuple(self.prod) 1280eb8dc403SDave Cobbley self.len = len(self.prod) 1281eb8dc403SDave Cobbley self.usyms = p.usyms 1282eb8dc403SDave Cobbley 1283eb8dc403SDave Cobbley def __str__(self): 1284eb8dc403SDave Cobbley if self.prod: 1285eb8dc403SDave Cobbley s = "%s -> %s" % (self.name," ".join(self.prod)) 1286eb8dc403SDave Cobbley else: 1287eb8dc403SDave Cobbley s = "%s -> <empty>" % self.name 1288eb8dc403SDave Cobbley return s 1289eb8dc403SDave Cobbley 1290eb8dc403SDave Cobbley def __repr__(self): 1291eb8dc403SDave Cobbley return "LRItem("+str(self)+")" 1292eb8dc403SDave Cobbley 1293eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1294eb8dc403SDave Cobbley# rightmost_terminal() 1295eb8dc403SDave Cobbley# 1296eb8dc403SDave Cobbley# Return the rightmost terminal from a list of symbols. Used in add_production() 1297eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1298eb8dc403SDave Cobbleydef rightmost_terminal(symbols, terminals): 1299eb8dc403SDave Cobbley i = len(symbols) - 1 1300eb8dc403SDave Cobbley while i >= 0: 1301eb8dc403SDave Cobbley if symbols[i] in terminals: 1302eb8dc403SDave Cobbley return symbols[i] 1303eb8dc403SDave Cobbley i -= 1 1304eb8dc403SDave Cobbley return None 1305eb8dc403SDave Cobbley 1306eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1307eb8dc403SDave Cobbley# === GRAMMAR CLASS === 1308eb8dc403SDave Cobbley# 1309eb8dc403SDave Cobbley# The following class represents the contents of the specified grammar along 1310eb8dc403SDave Cobbley# with various computed properties such as first sets, follow sets, LR items, etc. 1311eb8dc403SDave Cobbley# This data is used for critical parts of the table generation process later. 1312eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1313eb8dc403SDave Cobbley 1314eb8dc403SDave Cobbleyclass GrammarError(YaccError): pass 1315eb8dc403SDave Cobbley 1316eb8dc403SDave Cobbleyclass Grammar(object): 1317eb8dc403SDave Cobbley def __init__(self,terminals): 1318eb8dc403SDave Cobbley self.Productions = [None] # A list of all of the productions. The first 1319eb8dc403SDave Cobbley # entry is always reserved for the purpose of 1320eb8dc403SDave Cobbley # building an augmented grammar 1321eb8dc403SDave Cobbley 1322eb8dc403SDave Cobbley self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1323eb8dc403SDave Cobbley # productions of that nonterminal. 1324eb8dc403SDave Cobbley 1325eb8dc403SDave Cobbley self.Prodmap = { } # A dictionary that is only used to detect duplicate 1326eb8dc403SDave Cobbley # productions. 1327eb8dc403SDave Cobbley 1328eb8dc403SDave Cobbley self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1329eb8dc403SDave Cobbley # list of the rules where they are used. 1330eb8dc403SDave Cobbley 1331eb8dc403SDave Cobbley for term in terminals: 1332eb8dc403SDave Cobbley self.Terminals[term] = [] 1333eb8dc403SDave Cobbley 1334eb8dc403SDave Cobbley self.Terminals['error'] = [] 1335eb8dc403SDave Cobbley 1336eb8dc403SDave Cobbley self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1337eb8dc403SDave Cobbley # of rule numbers where they are used. 1338eb8dc403SDave Cobbley 1339eb8dc403SDave Cobbley self.First = { } # A dictionary of precomputed FIRST(x) symbols 1340eb8dc403SDave Cobbley 1341eb8dc403SDave Cobbley self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1342eb8dc403SDave Cobbley 1343eb8dc403SDave Cobbley self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1344eb8dc403SDave Cobbley # form ('right',level) or ('nonassoc', level) or ('left',level) 1345eb8dc403SDave Cobbley 1346eb8dc403SDave Cobbley self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1347eb8dc403SDave Cobbley # This is only used to provide error checking and to generate 1348eb8dc403SDave Cobbley # a warning about unused precedence rules. 1349eb8dc403SDave Cobbley 1350eb8dc403SDave Cobbley self.Start = None # Starting symbol for the grammar 1351eb8dc403SDave Cobbley 1352eb8dc403SDave Cobbley 1353eb8dc403SDave Cobbley def __len__(self): 1354eb8dc403SDave Cobbley return len(self.Productions) 1355eb8dc403SDave Cobbley 1356eb8dc403SDave Cobbley def __getitem__(self,index): 1357eb8dc403SDave Cobbley return self.Productions[index] 1358eb8dc403SDave Cobbley 1359eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1360eb8dc403SDave Cobbley # set_precedence() 1361eb8dc403SDave Cobbley # 1362eb8dc403SDave Cobbley # Sets the precedence for a given terminal. assoc is the associativity such as 1363eb8dc403SDave Cobbley # 'left','right', or 'nonassoc'. level is a numeric level. 1364eb8dc403SDave Cobbley # 1365eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1366eb8dc403SDave Cobbley 1367eb8dc403SDave Cobbley def set_precedence(self,term,assoc,level): 1368eb8dc403SDave Cobbley assert self.Productions == [None],"Must call set_precedence() before add_production()" 1369eb8dc403SDave Cobbley if term in self.Precedence: 1370eb8dc403SDave Cobbley raise GrammarError("Precedence already specified for terminal '%s'" % term) 1371eb8dc403SDave Cobbley if assoc not in ['left','right','nonassoc']: 1372eb8dc403SDave Cobbley raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1373eb8dc403SDave Cobbley self.Precedence[term] = (assoc,level) 1374eb8dc403SDave Cobbley 1375eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1376eb8dc403SDave Cobbley # add_production() 1377eb8dc403SDave Cobbley # 1378eb8dc403SDave Cobbley # Given an action function, this function assembles a production rule and 1379eb8dc403SDave Cobbley # computes its precedence level. 1380eb8dc403SDave Cobbley # 1381eb8dc403SDave Cobbley # The production rule is supplied as a list of symbols. For example, 1382eb8dc403SDave Cobbley # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1383eb8dc403SDave Cobbley # symbols ['expr','PLUS','term']. 1384eb8dc403SDave Cobbley # 1385eb8dc403SDave Cobbley # Precedence is determined by the precedence of the right-most non-terminal 1386eb8dc403SDave Cobbley # or the precedence of a terminal specified by %prec. 1387eb8dc403SDave Cobbley # 1388eb8dc403SDave Cobbley # A variety of error checks are performed to make sure production symbols 1389eb8dc403SDave Cobbley # are valid and that %prec is used correctly. 1390eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1391eb8dc403SDave Cobbley 1392eb8dc403SDave Cobbley def add_production(self,prodname,syms,func=None,file='',line=0): 1393eb8dc403SDave Cobbley 1394eb8dc403SDave Cobbley if prodname in self.Terminals: 1395eb8dc403SDave Cobbley raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1396eb8dc403SDave Cobbley if prodname == 'error': 1397eb8dc403SDave Cobbley raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1398eb8dc403SDave Cobbley if not _is_identifier.match(prodname): 1399eb8dc403SDave Cobbley raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1400eb8dc403SDave Cobbley 1401eb8dc403SDave Cobbley # Look for literal tokens 1402eb8dc403SDave Cobbley for n,s in enumerate(syms): 1403eb8dc403SDave Cobbley if s[0] in "'\"": 1404eb8dc403SDave Cobbley try: 1405eb8dc403SDave Cobbley c = eval(s) 1406eb8dc403SDave Cobbley if (len(c) > 1): 1407eb8dc403SDave Cobbley raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1408eb8dc403SDave Cobbley if not c in self.Terminals: 1409eb8dc403SDave Cobbley self.Terminals[c] = [] 1410eb8dc403SDave Cobbley syms[n] = c 1411eb8dc403SDave Cobbley continue 1412eb8dc403SDave Cobbley except SyntaxError: 1413eb8dc403SDave Cobbley pass 1414eb8dc403SDave Cobbley if not _is_identifier.match(s) and s != '%prec': 1415eb8dc403SDave Cobbley raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1416eb8dc403SDave Cobbley 1417eb8dc403SDave Cobbley # Determine the precedence level 1418eb8dc403SDave Cobbley if '%prec' in syms: 1419eb8dc403SDave Cobbley if syms[-1] == '%prec': 1420eb8dc403SDave Cobbley raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1421eb8dc403SDave Cobbley if syms[-2] != '%prec': 1422eb8dc403SDave Cobbley raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1423eb8dc403SDave Cobbley precname = syms[-1] 1424eb8dc403SDave Cobbley prodprec = self.Precedence.get(precname,None) 1425eb8dc403SDave Cobbley if not prodprec: 1426eb8dc403SDave Cobbley raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1427eb8dc403SDave Cobbley else: 1428eb8dc403SDave Cobbley self.UsedPrecedence[precname] = 1 1429eb8dc403SDave Cobbley del syms[-2:] # Drop %prec from the rule 1430eb8dc403SDave Cobbley else: 1431eb8dc403SDave Cobbley # If no %prec, precedence is determined by the rightmost terminal symbol 1432eb8dc403SDave Cobbley precname = rightmost_terminal(syms,self.Terminals) 1433eb8dc403SDave Cobbley prodprec = self.Precedence.get(precname,('right',0)) 1434eb8dc403SDave Cobbley 1435eb8dc403SDave Cobbley # See if the rule is already in the rulemap 1436eb8dc403SDave Cobbley map = "%s -> %s" % (prodname,syms) 1437eb8dc403SDave Cobbley if map in self.Prodmap: 1438eb8dc403SDave Cobbley m = self.Prodmap[map] 1439eb8dc403SDave Cobbley raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1440eb8dc403SDave Cobbley "Previous definition at %s:%d" % (m.file, m.line)) 1441eb8dc403SDave Cobbley 1442eb8dc403SDave Cobbley # From this point on, everything is valid. Create a new Production instance 1443eb8dc403SDave Cobbley pnumber = len(self.Productions) 1444eb8dc403SDave Cobbley if not prodname in self.Nonterminals: 1445eb8dc403SDave Cobbley self.Nonterminals[prodname] = [ ] 1446eb8dc403SDave Cobbley 1447eb8dc403SDave Cobbley # Add the production number to Terminals and Nonterminals 1448eb8dc403SDave Cobbley for t in syms: 1449eb8dc403SDave Cobbley if t in self.Terminals: 1450eb8dc403SDave Cobbley self.Terminals[t].append(pnumber) 1451eb8dc403SDave Cobbley else: 1452eb8dc403SDave Cobbley if not t in self.Nonterminals: 1453eb8dc403SDave Cobbley self.Nonterminals[t] = [ ] 1454eb8dc403SDave Cobbley self.Nonterminals[t].append(pnumber) 1455eb8dc403SDave Cobbley 1456eb8dc403SDave Cobbley # Create a production and add it to the list of productions 1457eb8dc403SDave Cobbley p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1458eb8dc403SDave Cobbley self.Productions.append(p) 1459eb8dc403SDave Cobbley self.Prodmap[map] = p 1460eb8dc403SDave Cobbley 1461eb8dc403SDave Cobbley # Add to the global productions list 1462eb8dc403SDave Cobbley try: 1463eb8dc403SDave Cobbley self.Prodnames[prodname].append(p) 1464eb8dc403SDave Cobbley except KeyError: 1465eb8dc403SDave Cobbley self.Prodnames[prodname] = [ p ] 1466eb8dc403SDave Cobbley return 0 1467eb8dc403SDave Cobbley 1468eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1469eb8dc403SDave Cobbley # set_start() 1470eb8dc403SDave Cobbley # 1471eb8dc403SDave Cobbley # Sets the starting symbol and creates the augmented grammar. Production 1472eb8dc403SDave Cobbley # rule 0 is S' -> start where start is the start symbol. 1473eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1474eb8dc403SDave Cobbley 1475eb8dc403SDave Cobbley def set_start(self,start=None): 1476eb8dc403SDave Cobbley if not start: 1477eb8dc403SDave Cobbley start = self.Productions[1].name 1478eb8dc403SDave Cobbley if start not in self.Nonterminals: 1479eb8dc403SDave Cobbley raise GrammarError("start symbol %s undefined" % start) 1480eb8dc403SDave Cobbley self.Productions[0] = Production(0,"S'",[start]) 1481eb8dc403SDave Cobbley self.Nonterminals[start].append(0) 1482eb8dc403SDave Cobbley self.Start = start 1483eb8dc403SDave Cobbley 1484eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1485eb8dc403SDave Cobbley # find_unreachable() 1486eb8dc403SDave Cobbley # 1487eb8dc403SDave Cobbley # Find all of the nonterminal symbols that can't be reached from the starting 1488eb8dc403SDave Cobbley # symbol. Returns a list of nonterminals that can't be reached. 1489eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1490eb8dc403SDave Cobbley 1491eb8dc403SDave Cobbley def find_unreachable(self): 1492eb8dc403SDave Cobbley 1493eb8dc403SDave Cobbley # Mark all symbols that are reachable from a symbol s 1494eb8dc403SDave Cobbley def mark_reachable_from(s): 1495eb8dc403SDave Cobbley if reachable[s]: 1496eb8dc403SDave Cobbley # We've already reached symbol s. 1497eb8dc403SDave Cobbley return 1498eb8dc403SDave Cobbley reachable[s] = 1 1499eb8dc403SDave Cobbley for p in self.Prodnames.get(s,[]): 1500eb8dc403SDave Cobbley for r in p.prod: 1501eb8dc403SDave Cobbley mark_reachable_from(r) 1502eb8dc403SDave Cobbley 1503eb8dc403SDave Cobbley reachable = { } 1504eb8dc403SDave Cobbley for s in list(self.Terminals) + list(self.Nonterminals): 1505eb8dc403SDave Cobbley reachable[s] = 0 1506eb8dc403SDave Cobbley 1507eb8dc403SDave Cobbley mark_reachable_from( self.Productions[0].prod[0] ) 1508eb8dc403SDave Cobbley 1509eb8dc403SDave Cobbley return [s for s in list(self.Nonterminals) 1510eb8dc403SDave Cobbley if not reachable[s]] 1511eb8dc403SDave Cobbley 1512eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1513eb8dc403SDave Cobbley # infinite_cycles() 1514eb8dc403SDave Cobbley # 1515eb8dc403SDave Cobbley # This function looks at the various parsing rules and tries to detect 1516eb8dc403SDave Cobbley # infinite recursion cycles (grammar rules where there is no possible way 1517eb8dc403SDave Cobbley # to derive a string of only terminals). 1518eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1519eb8dc403SDave Cobbley 1520eb8dc403SDave Cobbley def infinite_cycles(self): 1521eb8dc403SDave Cobbley terminates = {} 1522eb8dc403SDave Cobbley 1523eb8dc403SDave Cobbley # Terminals: 1524eb8dc403SDave Cobbley for t in self.Terminals: 1525eb8dc403SDave Cobbley terminates[t] = 1 1526eb8dc403SDave Cobbley 1527eb8dc403SDave Cobbley terminates['$end'] = 1 1528eb8dc403SDave Cobbley 1529eb8dc403SDave Cobbley # Nonterminals: 1530eb8dc403SDave Cobbley 1531eb8dc403SDave Cobbley # Initialize to false: 1532eb8dc403SDave Cobbley for n in self.Nonterminals: 1533eb8dc403SDave Cobbley terminates[n] = 0 1534eb8dc403SDave Cobbley 1535eb8dc403SDave Cobbley # Then propagate termination until no change: 1536eb8dc403SDave Cobbley while 1: 1537eb8dc403SDave Cobbley some_change = 0 1538eb8dc403SDave Cobbley for (n,pl) in self.Prodnames.items(): 1539eb8dc403SDave Cobbley # Nonterminal n terminates iff any of its productions terminates. 1540eb8dc403SDave Cobbley for p in pl: 1541eb8dc403SDave Cobbley # Production p terminates iff all of its rhs symbols terminate. 1542eb8dc403SDave Cobbley for s in p.prod: 1543eb8dc403SDave Cobbley if not terminates[s]: 1544eb8dc403SDave Cobbley # The symbol s does not terminate, 1545eb8dc403SDave Cobbley # so production p does not terminate. 1546eb8dc403SDave Cobbley p_terminates = 0 1547eb8dc403SDave Cobbley break 1548eb8dc403SDave Cobbley else: 1549eb8dc403SDave Cobbley # didn't break from the loop, 1550eb8dc403SDave Cobbley # so every symbol s terminates 1551eb8dc403SDave Cobbley # so production p terminates. 1552eb8dc403SDave Cobbley p_terminates = 1 1553eb8dc403SDave Cobbley 1554eb8dc403SDave Cobbley if p_terminates: 1555eb8dc403SDave Cobbley # symbol n terminates! 1556eb8dc403SDave Cobbley if not terminates[n]: 1557eb8dc403SDave Cobbley terminates[n] = 1 1558eb8dc403SDave Cobbley some_change = 1 1559eb8dc403SDave Cobbley # Don't need to consider any more productions for this n. 1560eb8dc403SDave Cobbley break 1561eb8dc403SDave Cobbley 1562eb8dc403SDave Cobbley if not some_change: 1563eb8dc403SDave Cobbley break 1564eb8dc403SDave Cobbley 1565eb8dc403SDave Cobbley infinite = [] 1566eb8dc403SDave Cobbley for (s,term) in terminates.items(): 1567eb8dc403SDave Cobbley if not term: 1568eb8dc403SDave Cobbley if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1569eb8dc403SDave Cobbley # s is used-but-not-defined, and we've already warned of that, 1570eb8dc403SDave Cobbley # so it would be overkill to say that it's also non-terminating. 1571eb8dc403SDave Cobbley pass 1572eb8dc403SDave Cobbley else: 1573eb8dc403SDave Cobbley infinite.append(s) 1574eb8dc403SDave Cobbley 1575eb8dc403SDave Cobbley return infinite 1576eb8dc403SDave Cobbley 1577eb8dc403SDave Cobbley 1578eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1579eb8dc403SDave Cobbley # undefined_symbols() 1580eb8dc403SDave Cobbley # 1581eb8dc403SDave Cobbley # Find all symbols that were used the grammar, but not defined as tokens or 1582eb8dc403SDave Cobbley # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1583eb8dc403SDave Cobbley # and prod is the production where the symbol was used. 1584eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1585eb8dc403SDave Cobbley def undefined_symbols(self): 1586eb8dc403SDave Cobbley result = [] 1587eb8dc403SDave Cobbley for p in self.Productions: 1588eb8dc403SDave Cobbley if not p: continue 1589eb8dc403SDave Cobbley 1590eb8dc403SDave Cobbley for s in p.prod: 1591eb8dc403SDave Cobbley if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1592eb8dc403SDave Cobbley result.append((s,p)) 1593eb8dc403SDave Cobbley return result 1594eb8dc403SDave Cobbley 1595eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1596eb8dc403SDave Cobbley # unused_terminals() 1597eb8dc403SDave Cobbley # 1598eb8dc403SDave Cobbley # Find all terminals that were defined, but not used by the grammar. Returns 1599eb8dc403SDave Cobbley # a list of all symbols. 1600eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1601eb8dc403SDave Cobbley def unused_terminals(self): 1602eb8dc403SDave Cobbley unused_tok = [] 1603eb8dc403SDave Cobbley for s,v in self.Terminals.items(): 1604eb8dc403SDave Cobbley if s != 'error' and not v: 1605eb8dc403SDave Cobbley unused_tok.append(s) 1606eb8dc403SDave Cobbley 1607eb8dc403SDave Cobbley return unused_tok 1608eb8dc403SDave Cobbley 1609eb8dc403SDave Cobbley # ------------------------------------------------------------------------------ 1610eb8dc403SDave Cobbley # unused_rules() 1611eb8dc403SDave Cobbley # 1612eb8dc403SDave Cobbley # Find all grammar rules that were defined, but not used (maybe not reachable) 1613eb8dc403SDave Cobbley # Returns a list of productions. 1614eb8dc403SDave Cobbley # ------------------------------------------------------------------------------ 1615eb8dc403SDave Cobbley 1616eb8dc403SDave Cobbley def unused_rules(self): 1617eb8dc403SDave Cobbley unused_prod = [] 1618eb8dc403SDave Cobbley for s,v in self.Nonterminals.items(): 1619eb8dc403SDave Cobbley if not v: 1620eb8dc403SDave Cobbley p = self.Prodnames[s][0] 1621eb8dc403SDave Cobbley unused_prod.append(p) 1622eb8dc403SDave Cobbley return unused_prod 1623eb8dc403SDave Cobbley 1624eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1625eb8dc403SDave Cobbley # unused_precedence() 1626eb8dc403SDave Cobbley # 1627eb8dc403SDave Cobbley # Returns a list of tuples (term,precedence) corresponding to precedence 1628eb8dc403SDave Cobbley # rules that were never used by the grammar. term is the name of the terminal 1629eb8dc403SDave Cobbley # on which precedence was applied and precedence is a string such as 'left' or 1630eb8dc403SDave Cobbley # 'right' corresponding to the type of precedence. 1631eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1632eb8dc403SDave Cobbley 1633eb8dc403SDave Cobbley def unused_precedence(self): 1634eb8dc403SDave Cobbley unused = [] 1635eb8dc403SDave Cobbley for termname in self.Precedence: 1636eb8dc403SDave Cobbley if not (termname in self.Terminals or termname in self.UsedPrecedence): 1637eb8dc403SDave Cobbley unused.append((termname,self.Precedence[termname][0])) 1638eb8dc403SDave Cobbley 1639eb8dc403SDave Cobbley return unused 1640eb8dc403SDave Cobbley 1641eb8dc403SDave Cobbley # ------------------------------------------------------------------------- 1642eb8dc403SDave Cobbley # _first() 1643eb8dc403SDave Cobbley # 1644eb8dc403SDave Cobbley # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1645eb8dc403SDave Cobbley # 1646eb8dc403SDave Cobbley # During execution of compute_first1, the result may be incomplete. 1647eb8dc403SDave Cobbley # Afterward (e.g., when called from compute_follow()), it will be complete. 1648eb8dc403SDave Cobbley # ------------------------------------------------------------------------- 1649eb8dc403SDave Cobbley def _first(self,beta): 1650eb8dc403SDave Cobbley 1651eb8dc403SDave Cobbley # We are computing First(x1,x2,x3,...,xn) 1652eb8dc403SDave Cobbley result = [ ] 1653eb8dc403SDave Cobbley for x in beta: 1654eb8dc403SDave Cobbley x_produces_empty = 0 1655eb8dc403SDave Cobbley 1656eb8dc403SDave Cobbley # Add all the non-<empty> symbols of First[x] to the result. 1657eb8dc403SDave Cobbley for f in self.First[x]: 1658eb8dc403SDave Cobbley if f == '<empty>': 1659eb8dc403SDave Cobbley x_produces_empty = 1 1660eb8dc403SDave Cobbley else: 1661eb8dc403SDave Cobbley if f not in result: result.append(f) 1662eb8dc403SDave Cobbley 1663eb8dc403SDave Cobbley if x_produces_empty: 1664eb8dc403SDave Cobbley # We have to consider the next x in beta, 1665eb8dc403SDave Cobbley # i.e. stay in the loop. 1666eb8dc403SDave Cobbley pass 1667eb8dc403SDave Cobbley else: 1668eb8dc403SDave Cobbley # We don't have to consider any further symbols in beta. 1669eb8dc403SDave Cobbley break 1670eb8dc403SDave Cobbley else: 1671eb8dc403SDave Cobbley # There was no 'break' from the loop, 1672eb8dc403SDave Cobbley # so x_produces_empty was true for all x in beta, 1673eb8dc403SDave Cobbley # so beta produces empty as well. 1674eb8dc403SDave Cobbley result.append('<empty>') 1675eb8dc403SDave Cobbley 1676eb8dc403SDave Cobbley return result 1677eb8dc403SDave Cobbley 1678eb8dc403SDave Cobbley # ------------------------------------------------------------------------- 1679eb8dc403SDave Cobbley # compute_first() 1680eb8dc403SDave Cobbley # 1681eb8dc403SDave Cobbley # Compute the value of FIRST1(X) for all symbols 1682eb8dc403SDave Cobbley # ------------------------------------------------------------------------- 1683eb8dc403SDave Cobbley def compute_first(self): 1684eb8dc403SDave Cobbley if self.First: 1685eb8dc403SDave Cobbley return self.First 1686eb8dc403SDave Cobbley 1687eb8dc403SDave Cobbley # Terminals: 1688eb8dc403SDave Cobbley for t in self.Terminals: 1689eb8dc403SDave Cobbley self.First[t] = [t] 1690eb8dc403SDave Cobbley 1691eb8dc403SDave Cobbley self.First['$end'] = ['$end'] 1692eb8dc403SDave Cobbley 1693eb8dc403SDave Cobbley # Nonterminals: 1694eb8dc403SDave Cobbley 1695eb8dc403SDave Cobbley # Initialize to the empty set: 1696eb8dc403SDave Cobbley for n in self.Nonterminals: 1697eb8dc403SDave Cobbley self.First[n] = [] 1698eb8dc403SDave Cobbley 1699eb8dc403SDave Cobbley # Then propagate symbols until no change: 1700eb8dc403SDave Cobbley while 1: 1701eb8dc403SDave Cobbley some_change = 0 1702eb8dc403SDave Cobbley for n in self.Nonterminals: 1703eb8dc403SDave Cobbley for p in self.Prodnames[n]: 1704eb8dc403SDave Cobbley for f in self._first(p.prod): 1705eb8dc403SDave Cobbley if f not in self.First[n]: 1706eb8dc403SDave Cobbley self.First[n].append( f ) 1707eb8dc403SDave Cobbley some_change = 1 1708eb8dc403SDave Cobbley if not some_change: 1709eb8dc403SDave Cobbley break 1710eb8dc403SDave Cobbley 1711eb8dc403SDave Cobbley return self.First 1712eb8dc403SDave Cobbley 1713eb8dc403SDave Cobbley # --------------------------------------------------------------------- 1714eb8dc403SDave Cobbley # compute_follow() 1715eb8dc403SDave Cobbley # 1716eb8dc403SDave Cobbley # Computes all of the follow sets for every non-terminal symbol. The 1717eb8dc403SDave Cobbley # follow set is the set of all symbols that might follow a given 1718eb8dc403SDave Cobbley # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1719eb8dc403SDave Cobbley # --------------------------------------------------------------------- 1720eb8dc403SDave Cobbley def compute_follow(self,start=None): 1721eb8dc403SDave Cobbley # If already computed, return the result 1722eb8dc403SDave Cobbley if self.Follow: 1723eb8dc403SDave Cobbley return self.Follow 1724eb8dc403SDave Cobbley 1725eb8dc403SDave Cobbley # If first sets not computed yet, do that first. 1726eb8dc403SDave Cobbley if not self.First: 1727eb8dc403SDave Cobbley self.compute_first() 1728eb8dc403SDave Cobbley 1729eb8dc403SDave Cobbley # Add '$end' to the follow list of the start symbol 1730eb8dc403SDave Cobbley for k in self.Nonterminals: 1731eb8dc403SDave Cobbley self.Follow[k] = [ ] 1732eb8dc403SDave Cobbley 1733eb8dc403SDave Cobbley if not start: 1734eb8dc403SDave Cobbley start = self.Productions[1].name 1735eb8dc403SDave Cobbley 1736eb8dc403SDave Cobbley self.Follow[start] = [ '$end' ] 1737eb8dc403SDave Cobbley 1738eb8dc403SDave Cobbley while 1: 1739eb8dc403SDave Cobbley didadd = 0 1740eb8dc403SDave Cobbley for p in self.Productions[1:]: 1741eb8dc403SDave Cobbley # Here is the production set 1742eb8dc403SDave Cobbley for i in range(len(p.prod)): 1743eb8dc403SDave Cobbley B = p.prod[i] 1744eb8dc403SDave Cobbley if B in self.Nonterminals: 1745eb8dc403SDave Cobbley # Okay. We got a non-terminal in a production 1746eb8dc403SDave Cobbley fst = self._first(p.prod[i+1:]) 1747eb8dc403SDave Cobbley hasempty = 0 1748eb8dc403SDave Cobbley for f in fst: 1749eb8dc403SDave Cobbley if f != '<empty>' and f not in self.Follow[B]: 1750eb8dc403SDave Cobbley self.Follow[B].append(f) 1751eb8dc403SDave Cobbley didadd = 1 1752eb8dc403SDave Cobbley if f == '<empty>': 1753eb8dc403SDave Cobbley hasempty = 1 1754eb8dc403SDave Cobbley if hasempty or i == (len(p.prod)-1): 1755eb8dc403SDave Cobbley # Add elements of follow(a) to follow(b) 1756eb8dc403SDave Cobbley for f in self.Follow[p.name]: 1757eb8dc403SDave Cobbley if f not in self.Follow[B]: 1758eb8dc403SDave Cobbley self.Follow[B].append(f) 1759eb8dc403SDave Cobbley didadd = 1 1760eb8dc403SDave Cobbley if not didadd: break 1761eb8dc403SDave Cobbley return self.Follow 1762eb8dc403SDave Cobbley 1763eb8dc403SDave Cobbley 1764eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1765eb8dc403SDave Cobbley # build_lritems() 1766eb8dc403SDave Cobbley # 1767eb8dc403SDave Cobbley # This function walks the list of productions and builds a complete set of the 1768eb8dc403SDave Cobbley # LR items. The LR items are stored in two ways: First, they are uniquely 1769eb8dc403SDave Cobbley # numbered and placed in the list _lritems. Second, a linked list of LR items 1770eb8dc403SDave Cobbley # is built for each production. For example: 1771eb8dc403SDave Cobbley # 1772eb8dc403SDave Cobbley # E -> E PLUS E 1773eb8dc403SDave Cobbley # 1774eb8dc403SDave Cobbley # Creates the list 1775eb8dc403SDave Cobbley # 1776eb8dc403SDave Cobbley # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1777eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 1778eb8dc403SDave Cobbley 1779eb8dc403SDave Cobbley def build_lritems(self): 1780eb8dc403SDave Cobbley for p in self.Productions: 1781eb8dc403SDave Cobbley lastlri = p 1782eb8dc403SDave Cobbley i = 0 1783eb8dc403SDave Cobbley lr_items = [] 1784eb8dc403SDave Cobbley while 1: 1785eb8dc403SDave Cobbley if i > len(p): 1786eb8dc403SDave Cobbley lri = None 1787eb8dc403SDave Cobbley else: 1788eb8dc403SDave Cobbley lri = LRItem(p,i) 1789eb8dc403SDave Cobbley # Precompute the list of productions immediately following 1790eb8dc403SDave Cobbley try: 1791eb8dc403SDave Cobbley lri.lr_after = self.Prodnames[lri.prod[i+1]] 1792eb8dc403SDave Cobbley except (IndexError,KeyError): 1793eb8dc403SDave Cobbley lri.lr_after = [] 1794eb8dc403SDave Cobbley try: 1795eb8dc403SDave Cobbley lri.lr_before = lri.prod[i-1] 1796eb8dc403SDave Cobbley except IndexError: 1797eb8dc403SDave Cobbley lri.lr_before = None 1798eb8dc403SDave Cobbley 1799eb8dc403SDave Cobbley lastlri.lr_next = lri 1800eb8dc403SDave Cobbley if not lri: break 1801eb8dc403SDave Cobbley lr_items.append(lri) 1802eb8dc403SDave Cobbley lastlri = lri 1803eb8dc403SDave Cobbley i += 1 1804eb8dc403SDave Cobbley p.lr_items = lr_items 1805eb8dc403SDave Cobbley 1806eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1807eb8dc403SDave Cobbley# == Class LRTable == 1808eb8dc403SDave Cobbley# 1809eb8dc403SDave Cobbley# This basic class represents a basic table of LR parsing information. 1810eb8dc403SDave Cobbley# Methods for generating the tables are not defined here. They are defined 1811eb8dc403SDave Cobbley# in the derived class LRGeneratedTable. 1812eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1813eb8dc403SDave Cobbley 1814eb8dc403SDave Cobbleyclass VersionError(YaccError): pass 1815eb8dc403SDave Cobbley 1816eb8dc403SDave Cobbleyclass LRTable(object): 1817eb8dc403SDave Cobbley def __init__(self): 1818eb8dc403SDave Cobbley self.lr_action = None 1819eb8dc403SDave Cobbley self.lr_goto = None 1820eb8dc403SDave Cobbley self.lr_productions = None 1821eb8dc403SDave Cobbley self.lr_method = None 1822eb8dc403SDave Cobbley 1823eb8dc403SDave Cobbley def read_table(self,module): 1824eb8dc403SDave Cobbley if isinstance(module,types.ModuleType): 1825eb8dc403SDave Cobbley parsetab = module 1826eb8dc403SDave Cobbley else: 1827eb8dc403SDave Cobbley if sys.version_info[0] < 3: 1828eb8dc403SDave Cobbley exec("import %s as parsetab" % module) 1829eb8dc403SDave Cobbley else: 1830eb8dc403SDave Cobbley env = { } 1831eb8dc403SDave Cobbley exec("import %s as parsetab" % module, env, env) 1832eb8dc403SDave Cobbley parsetab = env['parsetab'] 1833eb8dc403SDave Cobbley 1834eb8dc403SDave Cobbley if parsetab._tabversion != __tabversion__: 1835eb8dc403SDave Cobbley raise VersionError("yacc table file version is out of date") 1836eb8dc403SDave Cobbley 1837eb8dc403SDave Cobbley self.lr_action = parsetab._lr_action 1838eb8dc403SDave Cobbley self.lr_goto = parsetab._lr_goto 1839eb8dc403SDave Cobbley 1840eb8dc403SDave Cobbley self.lr_productions = [] 1841eb8dc403SDave Cobbley for p in parsetab._lr_productions: 1842eb8dc403SDave Cobbley self.lr_productions.append(MiniProduction(*p)) 1843eb8dc403SDave Cobbley 1844eb8dc403SDave Cobbley self.lr_method = parsetab._lr_method 1845eb8dc403SDave Cobbley return parsetab._lr_signature 1846eb8dc403SDave Cobbley 1847eb8dc403SDave Cobbley def read_pickle(self,filename): 1848eb8dc403SDave Cobbley try: 1849eb8dc403SDave Cobbley import cPickle as pickle 1850eb8dc403SDave Cobbley except ImportError: 1851eb8dc403SDave Cobbley import pickle 1852eb8dc403SDave Cobbley 1853eb8dc403SDave Cobbley in_f = open(filename,"rb") 1854eb8dc403SDave Cobbley 1855eb8dc403SDave Cobbley tabversion = pickle.load(in_f) 1856eb8dc403SDave Cobbley if tabversion != __tabversion__: 1857eb8dc403SDave Cobbley raise VersionError("yacc table file version is out of date") 1858eb8dc403SDave Cobbley self.lr_method = pickle.load(in_f) 1859eb8dc403SDave Cobbley signature = pickle.load(in_f) 1860eb8dc403SDave Cobbley self.lr_action = pickle.load(in_f) 1861eb8dc403SDave Cobbley self.lr_goto = pickle.load(in_f) 1862eb8dc403SDave Cobbley productions = pickle.load(in_f) 1863eb8dc403SDave Cobbley 1864eb8dc403SDave Cobbley self.lr_productions = [] 1865eb8dc403SDave Cobbley for p in productions: 1866eb8dc403SDave Cobbley self.lr_productions.append(MiniProduction(*p)) 1867eb8dc403SDave Cobbley 1868eb8dc403SDave Cobbley in_f.close() 1869eb8dc403SDave Cobbley return signature 1870eb8dc403SDave Cobbley 1871eb8dc403SDave Cobbley # Bind all production function names to callable objects in pdict 1872eb8dc403SDave Cobbley def bind_callables(self,pdict): 1873eb8dc403SDave Cobbley for p in self.lr_productions: 1874eb8dc403SDave Cobbley p.bind(pdict) 1875eb8dc403SDave Cobbley 1876eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1877eb8dc403SDave Cobbley# === LR Generator === 1878eb8dc403SDave Cobbley# 1879eb8dc403SDave Cobbley# The following classes and functions are used to generate LR parsing tables on 1880eb8dc403SDave Cobbley# a grammar. 1881eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1882eb8dc403SDave Cobbley 1883eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1884eb8dc403SDave Cobbley# digraph() 1885eb8dc403SDave Cobbley# traverse() 1886eb8dc403SDave Cobbley# 1887eb8dc403SDave Cobbley# The following two functions are used to compute set valued functions 1888eb8dc403SDave Cobbley# of the form: 1889eb8dc403SDave Cobbley# 1890eb8dc403SDave Cobbley# F(x) = F'(x) U U{F(y) | x R y} 1891eb8dc403SDave Cobbley# 1892eb8dc403SDave Cobbley# This is used to compute the values of Read() sets as well as FOLLOW sets 1893eb8dc403SDave Cobbley# in LALR(1) generation. 1894eb8dc403SDave Cobbley# 1895eb8dc403SDave Cobbley# Inputs: X - An input set 1896eb8dc403SDave Cobbley# R - A relation 1897eb8dc403SDave Cobbley# FP - Set-valued function 1898eb8dc403SDave Cobbley# ------------------------------------------------------------------------------ 1899eb8dc403SDave Cobbley 1900eb8dc403SDave Cobbleydef digraph(X,R,FP): 1901eb8dc403SDave Cobbley N = { } 1902eb8dc403SDave Cobbley for x in X: 1903eb8dc403SDave Cobbley N[x] = 0 1904eb8dc403SDave Cobbley stack = [] 1905eb8dc403SDave Cobbley F = { } 1906eb8dc403SDave Cobbley for x in X: 1907eb8dc403SDave Cobbley if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1908eb8dc403SDave Cobbley return F 1909eb8dc403SDave Cobbley 1910eb8dc403SDave Cobbleydef traverse(x,N,stack,F,X,R,FP): 1911eb8dc403SDave Cobbley stack.append(x) 1912eb8dc403SDave Cobbley d = len(stack) 1913eb8dc403SDave Cobbley N[x] = d 1914eb8dc403SDave Cobbley F[x] = FP(x) # F(X) <- F'(x) 1915eb8dc403SDave Cobbley 1916eb8dc403SDave Cobbley rel = R(x) # Get y's related to x 1917eb8dc403SDave Cobbley for y in rel: 1918eb8dc403SDave Cobbley if N[y] == 0: 1919eb8dc403SDave Cobbley traverse(y,N,stack,F,X,R,FP) 1920eb8dc403SDave Cobbley N[x] = min(N[x],N[y]) 1921eb8dc403SDave Cobbley for a in F.get(y,[]): 1922eb8dc403SDave Cobbley if a not in F[x]: F[x].append(a) 1923eb8dc403SDave Cobbley if N[x] == d: 1924eb8dc403SDave Cobbley N[stack[-1]] = MAXINT 1925eb8dc403SDave Cobbley F[stack[-1]] = F[x] 1926eb8dc403SDave Cobbley element = stack.pop() 1927eb8dc403SDave Cobbley while element != x: 1928eb8dc403SDave Cobbley N[stack[-1]] = MAXINT 1929eb8dc403SDave Cobbley F[stack[-1]] = F[x] 1930eb8dc403SDave Cobbley element = stack.pop() 1931eb8dc403SDave Cobbley 1932eb8dc403SDave Cobbleyclass LALRError(YaccError): pass 1933eb8dc403SDave Cobbley 1934eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1935eb8dc403SDave Cobbley# == LRGeneratedTable == 1936eb8dc403SDave Cobbley# 1937eb8dc403SDave Cobbley# This class implements the LR table generation algorithm. There are no 1938eb8dc403SDave Cobbley# public methods except for write() 1939eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 1940eb8dc403SDave Cobbley 1941eb8dc403SDave Cobbleyclass LRGeneratedTable(LRTable): 1942eb8dc403SDave Cobbley def __init__(self,grammar,method='LALR',log=None): 1943eb8dc403SDave Cobbley if method not in ['SLR','LALR']: 1944eb8dc403SDave Cobbley raise LALRError("Unsupported method %s" % method) 1945eb8dc403SDave Cobbley 1946eb8dc403SDave Cobbley self.grammar = grammar 1947eb8dc403SDave Cobbley self.lr_method = method 1948eb8dc403SDave Cobbley 1949eb8dc403SDave Cobbley # Set up the logger 1950eb8dc403SDave Cobbley if not log: 1951eb8dc403SDave Cobbley log = NullLogger() 1952eb8dc403SDave Cobbley self.log = log 1953eb8dc403SDave Cobbley 1954eb8dc403SDave Cobbley # Internal attributes 1955eb8dc403SDave Cobbley self.lr_action = {} # Action table 1956eb8dc403SDave Cobbley self.lr_goto = {} # Goto table 1957eb8dc403SDave Cobbley self.lr_productions = grammar.Productions # Copy of grammar Production array 1958eb8dc403SDave Cobbley self.lr_goto_cache = {} # Cache of computed gotos 1959eb8dc403SDave Cobbley self.lr0_cidhash = {} # Cache of closures 1960eb8dc403SDave Cobbley 1961eb8dc403SDave Cobbley self._add_count = 0 # Internal counter used to detect cycles 1962eb8dc403SDave Cobbley 1963eb8dc403SDave Cobbley # Diagonistic information filled in by the table generator 1964eb8dc403SDave Cobbley self.sr_conflict = 0 1965eb8dc403SDave Cobbley self.rr_conflict = 0 1966eb8dc403SDave Cobbley self.conflicts = [] # List of conflicts 1967eb8dc403SDave Cobbley 1968eb8dc403SDave Cobbley self.sr_conflicts = [] 1969eb8dc403SDave Cobbley self.rr_conflicts = [] 1970eb8dc403SDave Cobbley 1971eb8dc403SDave Cobbley # Build the tables 1972eb8dc403SDave Cobbley self.grammar.build_lritems() 1973eb8dc403SDave Cobbley self.grammar.compute_first() 1974eb8dc403SDave Cobbley self.grammar.compute_follow() 1975eb8dc403SDave Cobbley self.lr_parse_table() 1976eb8dc403SDave Cobbley 1977eb8dc403SDave Cobbley # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1978eb8dc403SDave Cobbley 1979eb8dc403SDave Cobbley def lr0_closure(self,I): 1980eb8dc403SDave Cobbley self._add_count += 1 1981eb8dc403SDave Cobbley 1982eb8dc403SDave Cobbley # Add everything in I to J 1983eb8dc403SDave Cobbley J = I[:] 1984eb8dc403SDave Cobbley didadd = 1 1985eb8dc403SDave Cobbley while didadd: 1986eb8dc403SDave Cobbley didadd = 0 1987eb8dc403SDave Cobbley for j in J: 1988eb8dc403SDave Cobbley for x in j.lr_after: 1989eb8dc403SDave Cobbley if getattr(x,"lr0_added",0) == self._add_count: continue 1990eb8dc403SDave Cobbley # Add B --> .G to J 1991eb8dc403SDave Cobbley J.append(x.lr_next) 1992eb8dc403SDave Cobbley x.lr0_added = self._add_count 1993eb8dc403SDave Cobbley didadd = 1 1994eb8dc403SDave Cobbley 1995eb8dc403SDave Cobbley return J 1996eb8dc403SDave Cobbley 1997eb8dc403SDave Cobbley # Compute the LR(0) goto function goto(I,X) where I is a set 1998eb8dc403SDave Cobbley # of LR(0) items and X is a grammar symbol. This function is written 1999eb8dc403SDave Cobbley # in a way that guarantees uniqueness of the generated goto sets 2000eb8dc403SDave Cobbley # (i.e. the same goto set will never be returned as two different Python 2001eb8dc403SDave Cobbley # objects). With uniqueness, we can later do fast set comparisons using 2002eb8dc403SDave Cobbley # id(obj) instead of element-wise comparison. 2003eb8dc403SDave Cobbley 2004eb8dc403SDave Cobbley def lr0_goto(self,I,x): 2005eb8dc403SDave Cobbley # First we look for a previously cached entry 2006eb8dc403SDave Cobbley g = self.lr_goto_cache.get((id(I),x),None) 2007eb8dc403SDave Cobbley if g: return g 2008eb8dc403SDave Cobbley 2009eb8dc403SDave Cobbley # Now we generate the goto set in a way that guarantees uniqueness 2010eb8dc403SDave Cobbley # of the result 2011eb8dc403SDave Cobbley 2012eb8dc403SDave Cobbley s = self.lr_goto_cache.get(x,None) 2013eb8dc403SDave Cobbley if not s: 2014eb8dc403SDave Cobbley s = { } 2015eb8dc403SDave Cobbley self.lr_goto_cache[x] = s 2016eb8dc403SDave Cobbley 2017eb8dc403SDave Cobbley gs = [ ] 2018eb8dc403SDave Cobbley for p in I: 2019eb8dc403SDave Cobbley n = p.lr_next 2020eb8dc403SDave Cobbley if n and n.lr_before == x: 2021eb8dc403SDave Cobbley s1 = s.get(id(n),None) 2022eb8dc403SDave Cobbley if not s1: 2023eb8dc403SDave Cobbley s1 = { } 2024eb8dc403SDave Cobbley s[id(n)] = s1 2025eb8dc403SDave Cobbley gs.append(n) 2026eb8dc403SDave Cobbley s = s1 2027eb8dc403SDave Cobbley g = s.get('$end',None) 2028eb8dc403SDave Cobbley if not g: 2029eb8dc403SDave Cobbley if gs: 2030eb8dc403SDave Cobbley g = self.lr0_closure(gs) 2031eb8dc403SDave Cobbley s['$end'] = g 2032eb8dc403SDave Cobbley else: 2033eb8dc403SDave Cobbley s['$end'] = gs 2034eb8dc403SDave Cobbley self.lr_goto_cache[(id(I),x)] = g 2035eb8dc403SDave Cobbley return g 2036eb8dc403SDave Cobbley 2037eb8dc403SDave Cobbley # Compute the LR(0) sets of item function 2038eb8dc403SDave Cobbley def lr0_items(self): 2039eb8dc403SDave Cobbley 2040eb8dc403SDave Cobbley C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2041eb8dc403SDave Cobbley i = 0 2042eb8dc403SDave Cobbley for I in C: 2043eb8dc403SDave Cobbley self.lr0_cidhash[id(I)] = i 2044eb8dc403SDave Cobbley i += 1 2045eb8dc403SDave Cobbley 2046eb8dc403SDave Cobbley # Loop over the items in C and each grammar symbols 2047eb8dc403SDave Cobbley i = 0 2048eb8dc403SDave Cobbley while i < len(C): 2049eb8dc403SDave Cobbley I = C[i] 2050eb8dc403SDave Cobbley i += 1 2051eb8dc403SDave Cobbley 2052eb8dc403SDave Cobbley # Collect all of the symbols that could possibly be in the goto(I,X) sets 2053eb8dc403SDave Cobbley asyms = { } 2054eb8dc403SDave Cobbley for ii in I: 2055eb8dc403SDave Cobbley for s in ii.usyms: 2056eb8dc403SDave Cobbley asyms[s] = None 2057eb8dc403SDave Cobbley 2058eb8dc403SDave Cobbley for x in asyms: 2059eb8dc403SDave Cobbley g = self.lr0_goto(I,x) 2060eb8dc403SDave Cobbley if not g: continue 2061eb8dc403SDave Cobbley if id(g) in self.lr0_cidhash: continue 2062eb8dc403SDave Cobbley self.lr0_cidhash[id(g)] = len(C) 2063eb8dc403SDave Cobbley C.append(g) 2064eb8dc403SDave Cobbley 2065eb8dc403SDave Cobbley return C 2066eb8dc403SDave Cobbley 2067eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2068eb8dc403SDave Cobbley # ==== LALR(1) Parsing ==== 2069eb8dc403SDave Cobbley # 2070eb8dc403SDave Cobbley # LALR(1) parsing is almost exactly the same as SLR except that instead of 2071eb8dc403SDave Cobbley # relying upon Follow() sets when performing reductions, a more selective 2072eb8dc403SDave Cobbley # lookahead set that incorporates the state of the LR(0) machine is utilized. 2073eb8dc403SDave Cobbley # Thus, we mainly just have to focus on calculating the lookahead sets. 2074eb8dc403SDave Cobbley # 2075eb8dc403SDave Cobbley # The method used here is due to DeRemer and Pennelo (1982). 2076eb8dc403SDave Cobbley # 2077eb8dc403SDave Cobbley # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2078eb8dc403SDave Cobbley # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2079eb8dc403SDave Cobbley # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2080eb8dc403SDave Cobbley # 2081eb8dc403SDave Cobbley # Further details can also be found in: 2082eb8dc403SDave Cobbley # 2083eb8dc403SDave Cobbley # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2084eb8dc403SDave Cobbley # McGraw-Hill Book Company, (1985). 2085eb8dc403SDave Cobbley # 2086eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2087eb8dc403SDave Cobbley 2088eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2089eb8dc403SDave Cobbley # compute_nullable_nonterminals() 2090eb8dc403SDave Cobbley # 2091eb8dc403SDave Cobbley # Creates a dictionary containing all of the non-terminals that might produce 2092eb8dc403SDave Cobbley # an empty production. 2093eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2094eb8dc403SDave Cobbley 2095eb8dc403SDave Cobbley def compute_nullable_nonterminals(self): 2096eb8dc403SDave Cobbley nullable = {} 2097eb8dc403SDave Cobbley num_nullable = 0 2098eb8dc403SDave Cobbley while 1: 2099eb8dc403SDave Cobbley for p in self.grammar.Productions[1:]: 2100eb8dc403SDave Cobbley if p.len == 0: 2101eb8dc403SDave Cobbley nullable[p.name] = 1 2102eb8dc403SDave Cobbley continue 2103eb8dc403SDave Cobbley for t in p.prod: 2104eb8dc403SDave Cobbley if not t in nullable: break 2105eb8dc403SDave Cobbley else: 2106eb8dc403SDave Cobbley nullable[p.name] = 1 2107eb8dc403SDave Cobbley if len(nullable) == num_nullable: break 2108eb8dc403SDave Cobbley num_nullable = len(nullable) 2109eb8dc403SDave Cobbley return nullable 2110eb8dc403SDave Cobbley 2111eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2112eb8dc403SDave Cobbley # find_nonterminal_trans(C) 2113eb8dc403SDave Cobbley # 2114eb8dc403SDave Cobbley # Given a set of LR(0) items, this functions finds all of the non-terminal 2115eb8dc403SDave Cobbley # transitions. These are transitions in which a dot appears immediately before 2116eb8dc403SDave Cobbley # a non-terminal. Returns a list of tuples of the form (state,N) where state 2117eb8dc403SDave Cobbley # is the state number and N is the nonterminal symbol. 2118eb8dc403SDave Cobbley # 2119eb8dc403SDave Cobbley # The input C is the set of LR(0) items. 2120eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2121eb8dc403SDave Cobbley 2122eb8dc403SDave Cobbley def find_nonterminal_transitions(self,C): 2123eb8dc403SDave Cobbley trans = [] 2124eb8dc403SDave Cobbley for state in range(len(C)): 2125eb8dc403SDave Cobbley for p in C[state]: 2126eb8dc403SDave Cobbley if p.lr_index < p.len - 1: 2127eb8dc403SDave Cobbley t = (state,p.prod[p.lr_index+1]) 2128eb8dc403SDave Cobbley if t[1] in self.grammar.Nonterminals: 2129eb8dc403SDave Cobbley if t not in trans: trans.append(t) 2130eb8dc403SDave Cobbley state = state + 1 2131eb8dc403SDave Cobbley return trans 2132eb8dc403SDave Cobbley 2133eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2134eb8dc403SDave Cobbley # dr_relation() 2135eb8dc403SDave Cobbley # 2136eb8dc403SDave Cobbley # Computes the DR(p,A) relationships for non-terminal transitions. The input 2137eb8dc403SDave Cobbley # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2138eb8dc403SDave Cobbley # 2139eb8dc403SDave Cobbley # Returns a list of terminals. 2140eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2141eb8dc403SDave Cobbley 2142eb8dc403SDave Cobbley def dr_relation(self,C,trans,nullable): 2143eb8dc403SDave Cobbley dr_set = { } 2144eb8dc403SDave Cobbley state,N = trans 2145eb8dc403SDave Cobbley terms = [] 2146eb8dc403SDave Cobbley 2147eb8dc403SDave Cobbley g = self.lr0_goto(C[state],N) 2148eb8dc403SDave Cobbley for p in g: 2149eb8dc403SDave Cobbley if p.lr_index < p.len - 1: 2150eb8dc403SDave Cobbley a = p.prod[p.lr_index+1] 2151eb8dc403SDave Cobbley if a in self.grammar.Terminals: 2152eb8dc403SDave Cobbley if a not in terms: terms.append(a) 2153eb8dc403SDave Cobbley 2154eb8dc403SDave Cobbley # This extra bit is to handle the start state 2155eb8dc403SDave Cobbley if state == 0 and N == self.grammar.Productions[0].prod[0]: 2156eb8dc403SDave Cobbley terms.append('$end') 2157eb8dc403SDave Cobbley 2158eb8dc403SDave Cobbley return terms 2159eb8dc403SDave Cobbley 2160eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2161eb8dc403SDave Cobbley # reads_relation() 2162eb8dc403SDave Cobbley # 2163eb8dc403SDave Cobbley # Computes the READS() relation (p,A) READS (t,C). 2164eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2165eb8dc403SDave Cobbley 2166eb8dc403SDave Cobbley def reads_relation(self,C, trans, empty): 2167eb8dc403SDave Cobbley # Look for empty transitions 2168eb8dc403SDave Cobbley rel = [] 2169eb8dc403SDave Cobbley state, N = trans 2170eb8dc403SDave Cobbley 2171eb8dc403SDave Cobbley g = self.lr0_goto(C[state],N) 2172eb8dc403SDave Cobbley j = self.lr0_cidhash.get(id(g),-1) 2173eb8dc403SDave Cobbley for p in g: 2174eb8dc403SDave Cobbley if p.lr_index < p.len - 1: 2175eb8dc403SDave Cobbley a = p.prod[p.lr_index + 1] 2176eb8dc403SDave Cobbley if a in empty: 2177eb8dc403SDave Cobbley rel.append((j,a)) 2178eb8dc403SDave Cobbley 2179eb8dc403SDave Cobbley return rel 2180eb8dc403SDave Cobbley 2181eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2182eb8dc403SDave Cobbley # compute_lookback_includes() 2183eb8dc403SDave Cobbley # 2184eb8dc403SDave Cobbley # Determines the lookback and includes relations 2185eb8dc403SDave Cobbley # 2186eb8dc403SDave Cobbley # LOOKBACK: 2187eb8dc403SDave Cobbley # 2188eb8dc403SDave Cobbley # This relation is determined by running the LR(0) state machine forward. 2189eb8dc403SDave Cobbley # For example, starting with a production "N : . A B C", we run it forward 2190eb8dc403SDave Cobbley # to obtain "N : A B C ." We then build a relationship between this final 2191eb8dc403SDave Cobbley # state and the starting state. These relationships are stored in a dictionary 2192eb8dc403SDave Cobbley # lookdict. 2193eb8dc403SDave Cobbley # 2194eb8dc403SDave Cobbley # INCLUDES: 2195eb8dc403SDave Cobbley # 2196eb8dc403SDave Cobbley # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2197eb8dc403SDave Cobbley # 2198eb8dc403SDave Cobbley # This relation is used to determine non-terminal transitions that occur 2199eb8dc403SDave Cobbley # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2200eb8dc403SDave Cobbley # if the following holds: 2201eb8dc403SDave Cobbley # 2202eb8dc403SDave Cobbley # B -> LAT, where T -> epsilon and p' -L-> p 2203eb8dc403SDave Cobbley # 2204eb8dc403SDave Cobbley # L is essentially a prefix (which may be empty), T is a suffix that must be 2205eb8dc403SDave Cobbley # able to derive an empty string. State p' must lead to state p with the string L. 2206eb8dc403SDave Cobbley # 2207eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2208eb8dc403SDave Cobbley 2209eb8dc403SDave Cobbley def compute_lookback_includes(self,C,trans,nullable): 2210eb8dc403SDave Cobbley 2211eb8dc403SDave Cobbley lookdict = {} # Dictionary of lookback relations 2212eb8dc403SDave Cobbley includedict = {} # Dictionary of include relations 2213eb8dc403SDave Cobbley 2214eb8dc403SDave Cobbley # Make a dictionary of non-terminal transitions 2215eb8dc403SDave Cobbley dtrans = {} 2216eb8dc403SDave Cobbley for t in trans: 2217eb8dc403SDave Cobbley dtrans[t] = 1 2218eb8dc403SDave Cobbley 2219eb8dc403SDave Cobbley # Loop over all transitions and compute lookbacks and includes 2220eb8dc403SDave Cobbley for state,N in trans: 2221eb8dc403SDave Cobbley lookb = [] 2222eb8dc403SDave Cobbley includes = [] 2223eb8dc403SDave Cobbley for p in C[state]: 2224eb8dc403SDave Cobbley if p.name != N: continue 2225eb8dc403SDave Cobbley 2226eb8dc403SDave Cobbley # Okay, we have a name match. We now follow the production all the way 2227eb8dc403SDave Cobbley # through the state machine until we get the . on the right hand side 2228eb8dc403SDave Cobbley 2229eb8dc403SDave Cobbley lr_index = p.lr_index 2230eb8dc403SDave Cobbley j = state 2231eb8dc403SDave Cobbley while lr_index < p.len - 1: 2232eb8dc403SDave Cobbley lr_index = lr_index + 1 2233eb8dc403SDave Cobbley t = p.prod[lr_index] 2234eb8dc403SDave Cobbley 2235eb8dc403SDave Cobbley # Check to see if this symbol and state are a non-terminal transition 2236eb8dc403SDave Cobbley if (j,t) in dtrans: 2237eb8dc403SDave Cobbley # Yes. Okay, there is some chance that this is an includes relation 2238eb8dc403SDave Cobbley # the only way to know for certain is whether the rest of the 2239eb8dc403SDave Cobbley # production derives empty 2240eb8dc403SDave Cobbley 2241eb8dc403SDave Cobbley li = lr_index + 1 2242eb8dc403SDave Cobbley while li < p.len: 2243eb8dc403SDave Cobbley if p.prod[li] in self.grammar.Terminals: break # No forget it 2244eb8dc403SDave Cobbley if not p.prod[li] in nullable: break 2245eb8dc403SDave Cobbley li = li + 1 2246eb8dc403SDave Cobbley else: 2247eb8dc403SDave Cobbley # Appears to be a relation between (j,t) and (state,N) 2248eb8dc403SDave Cobbley includes.append((j,t)) 2249eb8dc403SDave Cobbley 2250eb8dc403SDave Cobbley g = self.lr0_goto(C[j],t) # Go to next set 2251eb8dc403SDave Cobbley j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2252eb8dc403SDave Cobbley 2253eb8dc403SDave Cobbley # When we get here, j is the final state, now we have to locate the production 2254eb8dc403SDave Cobbley for r in C[j]: 2255eb8dc403SDave Cobbley if r.name != p.name: continue 2256eb8dc403SDave Cobbley if r.len != p.len: continue 2257eb8dc403SDave Cobbley i = 0 2258eb8dc403SDave Cobbley # This look is comparing a production ". A B C" with "A B C ." 2259eb8dc403SDave Cobbley while i < r.lr_index: 2260eb8dc403SDave Cobbley if r.prod[i] != p.prod[i+1]: break 2261eb8dc403SDave Cobbley i = i + 1 2262eb8dc403SDave Cobbley else: 2263eb8dc403SDave Cobbley lookb.append((j,r)) 2264eb8dc403SDave Cobbley for i in includes: 2265eb8dc403SDave Cobbley if not i in includedict: includedict[i] = [] 2266eb8dc403SDave Cobbley includedict[i].append((state,N)) 2267eb8dc403SDave Cobbley lookdict[(state,N)] = lookb 2268eb8dc403SDave Cobbley 2269eb8dc403SDave Cobbley return lookdict,includedict 2270eb8dc403SDave Cobbley 2271eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2272eb8dc403SDave Cobbley # compute_read_sets() 2273eb8dc403SDave Cobbley # 2274eb8dc403SDave Cobbley # Given a set of LR(0) items, this function computes the read sets. 2275eb8dc403SDave Cobbley # 2276eb8dc403SDave Cobbley # Inputs: C = Set of LR(0) items 2277eb8dc403SDave Cobbley # ntrans = Set of nonterminal transitions 2278eb8dc403SDave Cobbley # nullable = Set of empty transitions 2279eb8dc403SDave Cobbley # 2280eb8dc403SDave Cobbley # Returns a set containing the read sets 2281eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2282eb8dc403SDave Cobbley 2283eb8dc403SDave Cobbley def compute_read_sets(self,C, ntrans, nullable): 2284eb8dc403SDave Cobbley FP = lambda x: self.dr_relation(C,x,nullable) 2285eb8dc403SDave Cobbley R = lambda x: self.reads_relation(C,x,nullable) 2286eb8dc403SDave Cobbley F = digraph(ntrans,R,FP) 2287eb8dc403SDave Cobbley return F 2288eb8dc403SDave Cobbley 2289eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2290eb8dc403SDave Cobbley # compute_follow_sets() 2291eb8dc403SDave Cobbley # 2292eb8dc403SDave Cobbley # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2293eb8dc403SDave Cobbley # and an include set, this function computes the follow sets 2294eb8dc403SDave Cobbley # 2295eb8dc403SDave Cobbley # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2296eb8dc403SDave Cobbley # 2297eb8dc403SDave Cobbley # Inputs: 2298eb8dc403SDave Cobbley # ntrans = Set of nonterminal transitions 2299eb8dc403SDave Cobbley # readsets = Readset (previously computed) 2300eb8dc403SDave Cobbley # inclsets = Include sets (previously computed) 2301eb8dc403SDave Cobbley # 2302eb8dc403SDave Cobbley # Returns a set containing the follow sets 2303eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2304eb8dc403SDave Cobbley 2305eb8dc403SDave Cobbley def compute_follow_sets(self,ntrans,readsets,inclsets): 2306eb8dc403SDave Cobbley FP = lambda x: readsets[x] 2307eb8dc403SDave Cobbley R = lambda x: inclsets.get(x,[]) 2308eb8dc403SDave Cobbley F = digraph(ntrans,R,FP) 2309eb8dc403SDave Cobbley return F 2310eb8dc403SDave Cobbley 2311eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2312eb8dc403SDave Cobbley # add_lookaheads() 2313eb8dc403SDave Cobbley # 2314eb8dc403SDave Cobbley # Attaches the lookahead symbols to grammar rules. 2315eb8dc403SDave Cobbley # 2316eb8dc403SDave Cobbley # Inputs: lookbacks - Set of lookback relations 2317eb8dc403SDave Cobbley # followset - Computed follow set 2318eb8dc403SDave Cobbley # 2319eb8dc403SDave Cobbley # This function directly attaches the lookaheads to productions contained 2320eb8dc403SDave Cobbley # in the lookbacks set 2321eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2322eb8dc403SDave Cobbley 2323eb8dc403SDave Cobbley def add_lookaheads(self,lookbacks,followset): 2324eb8dc403SDave Cobbley for trans,lb in lookbacks.items(): 2325eb8dc403SDave Cobbley # Loop over productions in lookback 2326eb8dc403SDave Cobbley for state,p in lb: 2327eb8dc403SDave Cobbley if not state in p.lookaheads: 2328eb8dc403SDave Cobbley p.lookaheads[state] = [] 2329eb8dc403SDave Cobbley f = followset.get(trans,[]) 2330eb8dc403SDave Cobbley for a in f: 2331eb8dc403SDave Cobbley if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 2332eb8dc403SDave Cobbley 2333eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2334eb8dc403SDave Cobbley # add_lalr_lookaheads() 2335eb8dc403SDave Cobbley # 2336eb8dc403SDave Cobbley # This function does all of the work of adding lookahead information for use 2337eb8dc403SDave Cobbley # with LALR parsing 2338eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2339eb8dc403SDave Cobbley 2340eb8dc403SDave Cobbley def add_lalr_lookaheads(self,C): 2341eb8dc403SDave Cobbley # Determine all of the nullable nonterminals 2342eb8dc403SDave Cobbley nullable = self.compute_nullable_nonterminals() 2343eb8dc403SDave Cobbley 2344eb8dc403SDave Cobbley # Find all non-terminal transitions 2345eb8dc403SDave Cobbley trans = self.find_nonterminal_transitions(C) 2346eb8dc403SDave Cobbley 2347eb8dc403SDave Cobbley # Compute read sets 2348eb8dc403SDave Cobbley readsets = self.compute_read_sets(C,trans,nullable) 2349eb8dc403SDave Cobbley 2350eb8dc403SDave Cobbley # Compute lookback/includes relations 2351eb8dc403SDave Cobbley lookd, included = self.compute_lookback_includes(C,trans,nullable) 2352eb8dc403SDave Cobbley 2353eb8dc403SDave Cobbley # Compute LALR FOLLOW sets 2354eb8dc403SDave Cobbley followsets = self.compute_follow_sets(trans,readsets,included) 2355eb8dc403SDave Cobbley 2356eb8dc403SDave Cobbley # Add all of the lookaheads 2357eb8dc403SDave Cobbley self.add_lookaheads(lookd,followsets) 2358eb8dc403SDave Cobbley 2359eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2360eb8dc403SDave Cobbley # lr_parse_table() 2361eb8dc403SDave Cobbley # 2362eb8dc403SDave Cobbley # This function constructs the parse tables for SLR or LALR 2363eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2364eb8dc403SDave Cobbley def lr_parse_table(self): 2365eb8dc403SDave Cobbley Productions = self.grammar.Productions 2366eb8dc403SDave Cobbley Precedence = self.grammar.Precedence 2367eb8dc403SDave Cobbley goto = self.lr_goto # Goto array 2368eb8dc403SDave Cobbley action = self.lr_action # Action array 2369eb8dc403SDave Cobbley log = self.log # Logger for output 2370eb8dc403SDave Cobbley 2371eb8dc403SDave Cobbley actionp = { } # Action production array (temporary) 2372eb8dc403SDave Cobbley 2373eb8dc403SDave Cobbley log.info("Parsing method: %s", self.lr_method) 2374eb8dc403SDave Cobbley 2375eb8dc403SDave Cobbley # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2376eb8dc403SDave Cobbley # This determines the number of states 2377eb8dc403SDave Cobbley 2378eb8dc403SDave Cobbley C = self.lr0_items() 2379eb8dc403SDave Cobbley 2380eb8dc403SDave Cobbley if self.lr_method == 'LALR': 2381eb8dc403SDave Cobbley self.add_lalr_lookaheads(C) 2382eb8dc403SDave Cobbley 2383eb8dc403SDave Cobbley # Build the parser table, state by state 2384eb8dc403SDave Cobbley st = 0 2385eb8dc403SDave Cobbley for I in C: 2386eb8dc403SDave Cobbley # Loop over each production in I 2387eb8dc403SDave Cobbley actlist = [ ] # List of actions 2388eb8dc403SDave Cobbley st_action = { } 2389eb8dc403SDave Cobbley st_actionp = { } 2390eb8dc403SDave Cobbley st_goto = { } 2391eb8dc403SDave Cobbley log.info("") 2392eb8dc403SDave Cobbley log.info("state %d", st) 2393eb8dc403SDave Cobbley log.info("") 2394eb8dc403SDave Cobbley for p in I: 2395eb8dc403SDave Cobbley log.info(" (%d) %s", p.number, str(p)) 2396eb8dc403SDave Cobbley log.info("") 2397eb8dc403SDave Cobbley 2398eb8dc403SDave Cobbley for p in I: 2399eb8dc403SDave Cobbley if p.len == p.lr_index + 1: 2400eb8dc403SDave Cobbley if p.name == "S'": 2401eb8dc403SDave Cobbley # Start symbol. Accept! 2402eb8dc403SDave Cobbley st_action["$end"] = 0 2403eb8dc403SDave Cobbley st_actionp["$end"] = p 2404eb8dc403SDave Cobbley else: 2405eb8dc403SDave Cobbley # We are at the end of a production. Reduce! 2406eb8dc403SDave Cobbley if self.lr_method == 'LALR': 2407eb8dc403SDave Cobbley laheads = p.lookaheads[st] 2408eb8dc403SDave Cobbley else: 2409eb8dc403SDave Cobbley laheads = self.grammar.Follow[p.name] 2410eb8dc403SDave Cobbley for a in laheads: 2411eb8dc403SDave Cobbley actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2412eb8dc403SDave Cobbley r = st_action.get(a,None) 2413eb8dc403SDave Cobbley if r is not None: 2414eb8dc403SDave Cobbley # Whoa. Have a shift/reduce or reduce/reduce conflict 2415eb8dc403SDave Cobbley if r > 0: 2416eb8dc403SDave Cobbley # Need to decide on shift or reduce here 2417eb8dc403SDave Cobbley # By default we favor shifting. Need to add 2418eb8dc403SDave Cobbley # some precedence rules here. 2419eb8dc403SDave Cobbley sprec,slevel = Productions[st_actionp[a].number].prec 2420eb8dc403SDave Cobbley rprec,rlevel = Precedence.get(a,('right',0)) 2421eb8dc403SDave Cobbley if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2422eb8dc403SDave Cobbley # We really need to reduce here. 2423eb8dc403SDave Cobbley st_action[a] = -p.number 2424eb8dc403SDave Cobbley st_actionp[a] = p 2425eb8dc403SDave Cobbley if not slevel and not rlevel: 2426eb8dc403SDave Cobbley log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2427eb8dc403SDave Cobbley self.sr_conflicts.append((st,a,'reduce')) 2428eb8dc403SDave Cobbley Productions[p.number].reduced += 1 2429eb8dc403SDave Cobbley elif (slevel == rlevel) and (rprec == 'nonassoc'): 2430eb8dc403SDave Cobbley st_action[a] = None 2431eb8dc403SDave Cobbley else: 2432eb8dc403SDave Cobbley # Hmmm. Guess we'll keep the shift 2433eb8dc403SDave Cobbley if not rlevel: 2434eb8dc403SDave Cobbley log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2435eb8dc403SDave Cobbley self.sr_conflicts.append((st,a,'shift')) 2436eb8dc403SDave Cobbley elif r < 0: 2437eb8dc403SDave Cobbley # Reduce/reduce conflict. In this case, we favor the rule 2438eb8dc403SDave Cobbley # that was defined first in the grammar file 2439eb8dc403SDave Cobbley oldp = Productions[-r] 2440eb8dc403SDave Cobbley pp = Productions[p.number] 2441eb8dc403SDave Cobbley if oldp.line > pp.line: 2442eb8dc403SDave Cobbley st_action[a] = -p.number 2443eb8dc403SDave Cobbley st_actionp[a] = p 2444eb8dc403SDave Cobbley chosenp,rejectp = pp,oldp 2445eb8dc403SDave Cobbley Productions[p.number].reduced += 1 2446eb8dc403SDave Cobbley Productions[oldp.number].reduced -= 1 2447eb8dc403SDave Cobbley else: 2448eb8dc403SDave Cobbley chosenp,rejectp = oldp,pp 2449eb8dc403SDave Cobbley self.rr_conflicts.append((st,chosenp,rejectp)) 2450eb8dc403SDave Cobbley log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2451eb8dc403SDave Cobbley else: 2452eb8dc403SDave Cobbley raise LALRError("Unknown conflict in state %d" % st) 2453eb8dc403SDave Cobbley else: 2454eb8dc403SDave Cobbley st_action[a] = -p.number 2455eb8dc403SDave Cobbley st_actionp[a] = p 2456eb8dc403SDave Cobbley Productions[p.number].reduced += 1 2457eb8dc403SDave Cobbley else: 2458eb8dc403SDave Cobbley i = p.lr_index 2459eb8dc403SDave Cobbley a = p.prod[i+1] # Get symbol right after the "." 2460eb8dc403SDave Cobbley if a in self.grammar.Terminals: 2461eb8dc403SDave Cobbley g = self.lr0_goto(I,a) 2462eb8dc403SDave Cobbley j = self.lr0_cidhash.get(id(g),-1) 2463eb8dc403SDave Cobbley if j >= 0: 2464eb8dc403SDave Cobbley # We are in a shift state 2465eb8dc403SDave Cobbley actlist.append((a,p,"shift and go to state %d" % j)) 2466eb8dc403SDave Cobbley r = st_action.get(a,None) 2467eb8dc403SDave Cobbley if r is not None: 2468eb8dc403SDave Cobbley # Whoa have a shift/reduce or shift/shift conflict 2469eb8dc403SDave Cobbley if r > 0: 2470eb8dc403SDave Cobbley if r != j: 2471eb8dc403SDave Cobbley raise LALRError("Shift/shift conflict in state %d" % st) 2472eb8dc403SDave Cobbley elif r < 0: 2473eb8dc403SDave Cobbley # Do a precedence check. 2474eb8dc403SDave Cobbley # - if precedence of reduce rule is higher, we reduce. 2475eb8dc403SDave Cobbley # - if precedence of reduce is same and left assoc, we reduce. 2476eb8dc403SDave Cobbley # - otherwise we shift 2477eb8dc403SDave Cobbley rprec,rlevel = Productions[st_actionp[a].number].prec 2478eb8dc403SDave Cobbley sprec,slevel = Precedence.get(a,('right',0)) 2479eb8dc403SDave Cobbley if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2480eb8dc403SDave Cobbley # We decide to shift here... highest precedence to shift 2481eb8dc403SDave Cobbley Productions[st_actionp[a].number].reduced -= 1 2482eb8dc403SDave Cobbley st_action[a] = j 2483eb8dc403SDave Cobbley st_actionp[a] = p 2484eb8dc403SDave Cobbley if not rlevel: 2485eb8dc403SDave Cobbley log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2486eb8dc403SDave Cobbley self.sr_conflicts.append((st,a,'shift')) 2487eb8dc403SDave Cobbley elif (slevel == rlevel) and (rprec == 'nonassoc'): 2488eb8dc403SDave Cobbley st_action[a] = None 2489eb8dc403SDave Cobbley else: 2490eb8dc403SDave Cobbley # Hmmm. Guess we'll keep the reduce 2491eb8dc403SDave Cobbley if not slevel and not rlevel: 2492eb8dc403SDave Cobbley log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2493eb8dc403SDave Cobbley self.sr_conflicts.append((st,a,'reduce')) 2494eb8dc403SDave Cobbley 2495eb8dc403SDave Cobbley else: 2496eb8dc403SDave Cobbley raise LALRError("Unknown conflict in state %d" % st) 2497eb8dc403SDave Cobbley else: 2498eb8dc403SDave Cobbley st_action[a] = j 2499eb8dc403SDave Cobbley st_actionp[a] = p 2500eb8dc403SDave Cobbley 2501eb8dc403SDave Cobbley # Print the actions associated with each terminal 2502eb8dc403SDave Cobbley _actprint = { } 2503eb8dc403SDave Cobbley for a,p,m in actlist: 2504eb8dc403SDave Cobbley if a in st_action: 2505eb8dc403SDave Cobbley if p is st_actionp[a]: 2506eb8dc403SDave Cobbley log.info(" %-15s %s",a,m) 2507eb8dc403SDave Cobbley _actprint[(a,m)] = 1 2508eb8dc403SDave Cobbley log.info("") 2509eb8dc403SDave Cobbley # Print the actions that were not used. (debugging) 2510eb8dc403SDave Cobbley not_used = 0 2511eb8dc403SDave Cobbley for a,p,m in actlist: 2512eb8dc403SDave Cobbley if a in st_action: 2513eb8dc403SDave Cobbley if p is not st_actionp[a]: 2514eb8dc403SDave Cobbley if not (a,m) in _actprint: 2515eb8dc403SDave Cobbley log.debug(" ! %-15s [ %s ]",a,m) 2516eb8dc403SDave Cobbley not_used = 1 2517eb8dc403SDave Cobbley _actprint[(a,m)] = 1 2518eb8dc403SDave Cobbley if not_used: 2519eb8dc403SDave Cobbley log.debug("") 2520eb8dc403SDave Cobbley 2521eb8dc403SDave Cobbley # Construct the goto table for this state 2522eb8dc403SDave Cobbley 2523eb8dc403SDave Cobbley nkeys = { } 2524eb8dc403SDave Cobbley for ii in I: 2525eb8dc403SDave Cobbley for s in ii.usyms: 2526eb8dc403SDave Cobbley if s in self.grammar.Nonterminals: 2527eb8dc403SDave Cobbley nkeys[s] = None 2528eb8dc403SDave Cobbley for n in nkeys: 2529eb8dc403SDave Cobbley g = self.lr0_goto(I,n) 2530eb8dc403SDave Cobbley j = self.lr0_cidhash.get(id(g),-1) 2531eb8dc403SDave Cobbley if j >= 0: 2532eb8dc403SDave Cobbley st_goto[n] = j 2533eb8dc403SDave Cobbley log.info(" %-30s shift and go to state %d",n,j) 2534eb8dc403SDave Cobbley 2535eb8dc403SDave Cobbley action[st] = st_action 2536eb8dc403SDave Cobbley actionp[st] = st_actionp 2537eb8dc403SDave Cobbley goto[st] = st_goto 2538eb8dc403SDave Cobbley st += 1 2539eb8dc403SDave Cobbley 2540eb8dc403SDave Cobbley 2541eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2542eb8dc403SDave Cobbley # write() 2543eb8dc403SDave Cobbley # 2544eb8dc403SDave Cobbley # This function writes the LR parsing tables to a file 2545eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2546eb8dc403SDave Cobbley 2547eb8dc403SDave Cobbley def write_table(self,modulename,outputdir='',signature=""): 2548eb8dc403SDave Cobbley basemodulename = modulename.split(".")[-1] 2549eb8dc403SDave Cobbley filename = os.path.join(outputdir,basemodulename) + ".py" 2550eb8dc403SDave Cobbley try: 2551eb8dc403SDave Cobbley f = open(filename,"w") 2552eb8dc403SDave Cobbley 2553eb8dc403SDave Cobbley f.write(""" 2554eb8dc403SDave Cobbley# %s 2555eb8dc403SDave Cobbley# This file is automatically generated. Do not edit. 2556eb8dc403SDave Cobbley_tabversion = %r 2557eb8dc403SDave Cobbley 2558eb8dc403SDave Cobbley_lr_method = %r 2559eb8dc403SDave Cobbley 2560eb8dc403SDave Cobbley_lr_signature = %r 2561eb8dc403SDave Cobbley """ % (filename, __tabversion__, self.lr_method, signature)) 2562eb8dc403SDave Cobbley 2563eb8dc403SDave Cobbley # Change smaller to 0 to go back to original tables 2564eb8dc403SDave Cobbley smaller = 1 2565eb8dc403SDave Cobbley 2566eb8dc403SDave Cobbley # Factor out names to try and make smaller 2567eb8dc403SDave Cobbley if smaller: 2568eb8dc403SDave Cobbley items = { } 2569eb8dc403SDave Cobbley 2570eb8dc403SDave Cobbley for s,nd in self.lr_action.items(): 2571eb8dc403SDave Cobbley for name,v in nd.items(): 2572eb8dc403SDave Cobbley i = items.get(name) 2573eb8dc403SDave Cobbley if not i: 2574eb8dc403SDave Cobbley i = ([],[]) 2575eb8dc403SDave Cobbley items[name] = i 2576eb8dc403SDave Cobbley i[0].append(s) 2577eb8dc403SDave Cobbley i[1].append(v) 2578eb8dc403SDave Cobbley 2579eb8dc403SDave Cobbley f.write("\n_lr_action_items = {") 2580eb8dc403SDave Cobbley for k,v in items.items(): 2581eb8dc403SDave Cobbley f.write("%r:([" % k) 2582eb8dc403SDave Cobbley for i in v[0]: 2583eb8dc403SDave Cobbley f.write("%r," % i) 2584eb8dc403SDave Cobbley f.write("],[") 2585eb8dc403SDave Cobbley for i in v[1]: 2586eb8dc403SDave Cobbley f.write("%r," % i) 2587eb8dc403SDave Cobbley 2588eb8dc403SDave Cobbley f.write("]),") 2589eb8dc403SDave Cobbley f.write("}\n") 2590eb8dc403SDave Cobbley 2591eb8dc403SDave Cobbley f.write(""" 2592eb8dc403SDave Cobbley_lr_action = { } 2593eb8dc403SDave Cobbleyfor _k, _v in _lr_action_items.items(): 2594eb8dc403SDave Cobbley for _x,_y in zip(_v[0],_v[1]): 2595eb8dc403SDave Cobbley if not _x in _lr_action: _lr_action[_x] = { } 2596eb8dc403SDave Cobbley _lr_action[_x][_k] = _y 2597eb8dc403SDave Cobbleydel _lr_action_items 2598eb8dc403SDave Cobbley""") 2599eb8dc403SDave Cobbley 2600eb8dc403SDave Cobbley else: 2601eb8dc403SDave Cobbley f.write("\n_lr_action = { "); 2602eb8dc403SDave Cobbley for k,v in self.lr_action.items(): 2603eb8dc403SDave Cobbley f.write("(%r,%r):%r," % (k[0],k[1],v)) 2604eb8dc403SDave Cobbley f.write("}\n"); 2605eb8dc403SDave Cobbley 2606eb8dc403SDave Cobbley if smaller: 2607eb8dc403SDave Cobbley # Factor out names to try and make smaller 2608eb8dc403SDave Cobbley items = { } 2609eb8dc403SDave Cobbley 2610eb8dc403SDave Cobbley for s,nd in self.lr_goto.items(): 2611eb8dc403SDave Cobbley for name,v in nd.items(): 2612eb8dc403SDave Cobbley i = items.get(name) 2613eb8dc403SDave Cobbley if not i: 2614eb8dc403SDave Cobbley i = ([],[]) 2615eb8dc403SDave Cobbley items[name] = i 2616eb8dc403SDave Cobbley i[0].append(s) 2617eb8dc403SDave Cobbley i[1].append(v) 2618eb8dc403SDave Cobbley 2619eb8dc403SDave Cobbley f.write("\n_lr_goto_items = {") 2620eb8dc403SDave Cobbley for k,v in items.items(): 2621eb8dc403SDave Cobbley f.write("%r:([" % k) 2622eb8dc403SDave Cobbley for i in v[0]: 2623eb8dc403SDave Cobbley f.write("%r," % i) 2624eb8dc403SDave Cobbley f.write("],[") 2625eb8dc403SDave Cobbley for i in v[1]: 2626eb8dc403SDave Cobbley f.write("%r," % i) 2627eb8dc403SDave Cobbley 2628eb8dc403SDave Cobbley f.write("]),") 2629eb8dc403SDave Cobbley f.write("}\n") 2630eb8dc403SDave Cobbley 2631eb8dc403SDave Cobbley f.write(""" 2632eb8dc403SDave Cobbley_lr_goto = { } 2633eb8dc403SDave Cobbleyfor _k, _v in _lr_goto_items.items(): 2634eb8dc403SDave Cobbley for _x,_y in zip(_v[0],_v[1]): 2635eb8dc403SDave Cobbley if not _x in _lr_goto: _lr_goto[_x] = { } 2636eb8dc403SDave Cobbley _lr_goto[_x][_k] = _y 2637eb8dc403SDave Cobbleydel _lr_goto_items 2638eb8dc403SDave Cobbley""") 2639eb8dc403SDave Cobbley else: 2640eb8dc403SDave Cobbley f.write("\n_lr_goto = { "); 2641eb8dc403SDave Cobbley for k,v in self.lr_goto.items(): 2642eb8dc403SDave Cobbley f.write("(%r,%r):%r," % (k[0],k[1],v)) 2643eb8dc403SDave Cobbley f.write("}\n"); 2644eb8dc403SDave Cobbley 2645eb8dc403SDave Cobbley # Write production table 2646eb8dc403SDave Cobbley f.write("_lr_productions = [\n") 2647eb8dc403SDave Cobbley for p in self.lr_productions: 2648eb8dc403SDave Cobbley if p.func: 2649eb8dc403SDave Cobbley f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2650eb8dc403SDave Cobbley else: 2651eb8dc403SDave Cobbley f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2652eb8dc403SDave Cobbley f.write("]\n") 2653eb8dc403SDave Cobbley f.close() 2654eb8dc403SDave Cobbley 2655eb8dc403SDave Cobbley except IOError: 2656eb8dc403SDave Cobbley e = sys.exc_info()[1] 2657eb8dc403SDave Cobbley sys.stderr.write("Unable to create '%s'\n" % filename) 2658eb8dc403SDave Cobbley sys.stderr.write(str(e)+"\n") 2659eb8dc403SDave Cobbley return 2660eb8dc403SDave Cobbley 2661eb8dc403SDave Cobbley 2662eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2663eb8dc403SDave Cobbley # pickle_table() 2664eb8dc403SDave Cobbley # 2665eb8dc403SDave Cobbley # This function pickles the LR parsing tables to a supplied file object 2666eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2667eb8dc403SDave Cobbley 2668eb8dc403SDave Cobbley def pickle_table(self,filename,signature=""): 2669eb8dc403SDave Cobbley try: 2670eb8dc403SDave Cobbley import cPickle as pickle 2671eb8dc403SDave Cobbley except ImportError: 2672eb8dc403SDave Cobbley import pickle 2673eb8dc403SDave Cobbley outf = open(filename,"wb") 2674eb8dc403SDave Cobbley pickle.dump(__tabversion__,outf,pickle_protocol) 2675eb8dc403SDave Cobbley pickle.dump(self.lr_method,outf,pickle_protocol) 2676eb8dc403SDave Cobbley pickle.dump(signature,outf,pickle_protocol) 2677eb8dc403SDave Cobbley pickle.dump(self.lr_action,outf,pickle_protocol) 2678eb8dc403SDave Cobbley pickle.dump(self.lr_goto,outf,pickle_protocol) 2679eb8dc403SDave Cobbley 2680eb8dc403SDave Cobbley outp = [] 2681eb8dc403SDave Cobbley for p in self.lr_productions: 2682eb8dc403SDave Cobbley if p.func: 2683eb8dc403SDave Cobbley outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 2684eb8dc403SDave Cobbley else: 2685eb8dc403SDave Cobbley outp.append((str(p),p.name,p.len,None,None,None)) 2686eb8dc403SDave Cobbley pickle.dump(outp,outf,pickle_protocol) 2687eb8dc403SDave Cobbley outf.close() 2688eb8dc403SDave Cobbley 2689eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2690eb8dc403SDave Cobbley# === INTROSPECTION === 2691eb8dc403SDave Cobbley# 2692eb8dc403SDave Cobbley# The following functions and classes are used to implement the PLY 2693eb8dc403SDave Cobbley# introspection features followed by the yacc() function itself. 2694eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2695eb8dc403SDave Cobbley 2696eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2697eb8dc403SDave Cobbley# get_caller_module_dict() 2698eb8dc403SDave Cobbley# 2699eb8dc403SDave Cobbley# This function returns a dictionary containing all of the symbols defined within 2700eb8dc403SDave Cobbley# a caller further down the call stack. This is used to get the environment 2701eb8dc403SDave Cobbley# associated with the yacc() call if none was provided. 2702eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2703eb8dc403SDave Cobbley 2704eb8dc403SDave Cobbleydef get_caller_module_dict(levels): 2705eb8dc403SDave Cobbley try: 2706eb8dc403SDave Cobbley raise RuntimeError 2707eb8dc403SDave Cobbley except RuntimeError: 2708eb8dc403SDave Cobbley e,b,t = sys.exc_info() 2709eb8dc403SDave Cobbley f = t.tb_frame 2710eb8dc403SDave Cobbley while levels > 0: 2711eb8dc403SDave Cobbley f = f.f_back 2712eb8dc403SDave Cobbley levels -= 1 2713eb8dc403SDave Cobbley ldict = f.f_globals.copy() 2714eb8dc403SDave Cobbley if f.f_globals != f.f_locals: 2715eb8dc403SDave Cobbley ldict.update(f.f_locals) 2716eb8dc403SDave Cobbley 2717eb8dc403SDave Cobbley return ldict 2718eb8dc403SDave Cobbley 2719eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2720eb8dc403SDave Cobbley# parse_grammar() 2721eb8dc403SDave Cobbley# 2722eb8dc403SDave Cobbley# This takes a raw grammar rule string and parses it into production data 2723eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2724eb8dc403SDave Cobbleydef parse_grammar(doc,file,line): 2725eb8dc403SDave Cobbley grammar = [] 2726eb8dc403SDave Cobbley # Split the doc string into lines 2727eb8dc403SDave Cobbley pstrings = doc.splitlines() 2728eb8dc403SDave Cobbley lastp = None 2729eb8dc403SDave Cobbley dline = line 2730eb8dc403SDave Cobbley for ps in pstrings: 2731eb8dc403SDave Cobbley dline += 1 2732eb8dc403SDave Cobbley p = ps.split() 2733eb8dc403SDave Cobbley if not p: continue 2734eb8dc403SDave Cobbley try: 2735eb8dc403SDave Cobbley if p[0] == '|': 2736eb8dc403SDave Cobbley # This is a continuation of a previous rule 2737eb8dc403SDave Cobbley if not lastp: 2738eb8dc403SDave Cobbley raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2739eb8dc403SDave Cobbley prodname = lastp 2740eb8dc403SDave Cobbley syms = p[1:] 2741eb8dc403SDave Cobbley else: 2742eb8dc403SDave Cobbley prodname = p[0] 2743eb8dc403SDave Cobbley lastp = prodname 2744eb8dc403SDave Cobbley syms = p[2:] 2745eb8dc403SDave Cobbley assign = p[1] 2746eb8dc403SDave Cobbley if assign != ':' and assign != '::=': 2747eb8dc403SDave Cobbley raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2748eb8dc403SDave Cobbley 2749eb8dc403SDave Cobbley grammar.append((file,dline,prodname,syms)) 2750eb8dc403SDave Cobbley except SyntaxError: 2751eb8dc403SDave Cobbley raise 2752eb8dc403SDave Cobbley except Exception: 2753eb8dc403SDave Cobbley raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2754eb8dc403SDave Cobbley 2755eb8dc403SDave Cobbley return grammar 2756eb8dc403SDave Cobbley 2757eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2758eb8dc403SDave Cobbley# ParserReflect() 2759eb8dc403SDave Cobbley# 2760eb8dc403SDave Cobbley# This class represents information extracted for building a parser including 2761eb8dc403SDave Cobbley# start symbol, error function, tokens, precedence list, action functions, 2762eb8dc403SDave Cobbley# etc. 2763eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 2764eb8dc403SDave Cobbleyclass ParserReflect(object): 2765eb8dc403SDave Cobbley def __init__(self,pdict,log=None): 2766eb8dc403SDave Cobbley self.pdict = pdict 2767eb8dc403SDave Cobbley self.start = None 2768eb8dc403SDave Cobbley self.error_func = None 2769eb8dc403SDave Cobbley self.tokens = None 2770eb8dc403SDave Cobbley self.files = {} 2771eb8dc403SDave Cobbley self.grammar = [] 2772eb8dc403SDave Cobbley self.error = 0 2773eb8dc403SDave Cobbley 2774eb8dc403SDave Cobbley if log is None: 2775eb8dc403SDave Cobbley self.log = PlyLogger(sys.stderr) 2776eb8dc403SDave Cobbley else: 2777eb8dc403SDave Cobbley self.log = log 2778eb8dc403SDave Cobbley 2779eb8dc403SDave Cobbley # Get all of the basic information 2780eb8dc403SDave Cobbley def get_all(self): 2781eb8dc403SDave Cobbley self.get_start() 2782eb8dc403SDave Cobbley self.get_error_func() 2783eb8dc403SDave Cobbley self.get_tokens() 2784eb8dc403SDave Cobbley self.get_precedence() 2785eb8dc403SDave Cobbley self.get_pfunctions() 2786eb8dc403SDave Cobbley 2787eb8dc403SDave Cobbley # Validate all of the information 2788eb8dc403SDave Cobbley def validate_all(self): 2789eb8dc403SDave Cobbley self.validate_start() 2790eb8dc403SDave Cobbley self.validate_error_func() 2791eb8dc403SDave Cobbley self.validate_tokens() 2792eb8dc403SDave Cobbley self.validate_precedence() 2793eb8dc403SDave Cobbley self.validate_pfunctions() 2794eb8dc403SDave Cobbley self.validate_files() 2795eb8dc403SDave Cobbley return self.error 2796eb8dc403SDave Cobbley 2797eb8dc403SDave Cobbley # Compute a signature over the grammar 2798eb8dc403SDave Cobbley def signature(self): 2799eb8dc403SDave Cobbley try: 28009aee5003SAndrew Geissler import hashlib 2801*2390b1b6SPatrick Williams except ImportError: 2802*2390b1b6SPatrick Williams raise RuntimeError("Unable to import hashlib") 2803*2390b1b6SPatrick Williams try: 28049aee5003SAndrew Geissler sig = hashlib.new('MD5', usedforsecurity=False) 2805*2390b1b6SPatrick Williams except TypeError: 2806*2390b1b6SPatrick Williams # Some configurations don't appear to support two arguments 2807*2390b1b6SPatrick Williams sig = hashlib.new('MD5') 2808*2390b1b6SPatrick Williams try: 2809eb8dc403SDave Cobbley if self.start: 2810eb8dc403SDave Cobbley sig.update(self.start.encode('latin-1')) 2811eb8dc403SDave Cobbley if self.prec: 2812eb8dc403SDave Cobbley sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 2813eb8dc403SDave Cobbley if self.tokens: 2814eb8dc403SDave Cobbley sig.update(" ".join(self.tokens).encode('latin-1')) 2815eb8dc403SDave Cobbley for f in self.pfuncs: 2816eb8dc403SDave Cobbley if f[3]: 2817eb8dc403SDave Cobbley sig.update(f[3].encode('latin-1')) 2818eb8dc403SDave Cobbley except (TypeError,ValueError): 2819eb8dc403SDave Cobbley pass 2820eb8dc403SDave Cobbley return sig.digest() 2821eb8dc403SDave Cobbley 2822eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2823eb8dc403SDave Cobbley # validate_file() 2824eb8dc403SDave Cobbley # 2825eb8dc403SDave Cobbley # This method checks to see if there are duplicated p_rulename() functions 2826eb8dc403SDave Cobbley # in the parser module file. Without this function, it is really easy for 2827eb8dc403SDave Cobbley # users to make mistakes by cutting and pasting code fragments (and it's a real 2828eb8dc403SDave Cobbley # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2829eb8dc403SDave Cobbley # we just do a little regular expression pattern matching of def statements 2830eb8dc403SDave Cobbley # to try and detect duplicates. 2831eb8dc403SDave Cobbley # ----------------------------------------------------------------------------- 2832eb8dc403SDave Cobbley 2833eb8dc403SDave Cobbley def validate_files(self): 2834eb8dc403SDave Cobbley # Match def p_funcname( 2835eb8dc403SDave Cobbley fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2836eb8dc403SDave Cobbley 2837eb8dc403SDave Cobbley for filename in self.files.keys(): 2838eb8dc403SDave Cobbley base,ext = os.path.splitext(filename) 2839eb8dc403SDave Cobbley if ext != '.py': return 1 # No idea. Assume it's okay. 2840eb8dc403SDave Cobbley 2841eb8dc403SDave Cobbley try: 2842eb8dc403SDave Cobbley f = open(filename) 2843eb8dc403SDave Cobbley lines = f.readlines() 2844eb8dc403SDave Cobbley f.close() 2845eb8dc403SDave Cobbley except IOError: 2846eb8dc403SDave Cobbley continue 2847eb8dc403SDave Cobbley 2848eb8dc403SDave Cobbley counthash = { } 2849eb8dc403SDave Cobbley for linen,l in enumerate(lines): 2850eb8dc403SDave Cobbley linen += 1 2851eb8dc403SDave Cobbley m = fre.match(l) 2852eb8dc403SDave Cobbley if m: 2853eb8dc403SDave Cobbley name = m.group(1) 2854eb8dc403SDave Cobbley prev = counthash.get(name) 2855eb8dc403SDave Cobbley if not prev: 2856eb8dc403SDave Cobbley counthash[name] = linen 2857eb8dc403SDave Cobbley else: 2858eb8dc403SDave Cobbley self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 2859eb8dc403SDave Cobbley 2860eb8dc403SDave Cobbley # Get the start symbol 2861eb8dc403SDave Cobbley def get_start(self): 2862eb8dc403SDave Cobbley self.start = self.pdict.get('start') 2863eb8dc403SDave Cobbley 2864eb8dc403SDave Cobbley # Validate the start symbol 2865eb8dc403SDave Cobbley def validate_start(self): 2866eb8dc403SDave Cobbley if self.start is not None: 2867eb8dc403SDave Cobbley if not isinstance(self.start,str): 2868eb8dc403SDave Cobbley self.log.error("'start' must be a string") 2869eb8dc403SDave Cobbley 2870eb8dc403SDave Cobbley # Look for error handler 2871eb8dc403SDave Cobbley def get_error_func(self): 2872eb8dc403SDave Cobbley self.error_func = self.pdict.get('p_error') 2873eb8dc403SDave Cobbley 2874eb8dc403SDave Cobbley # Validate the error function 2875eb8dc403SDave Cobbley def validate_error_func(self): 2876eb8dc403SDave Cobbley if self.error_func: 2877eb8dc403SDave Cobbley if isinstance(self.error_func,types.FunctionType): 2878eb8dc403SDave Cobbley ismethod = 0 2879eb8dc403SDave Cobbley elif isinstance(self.error_func, types.MethodType): 2880eb8dc403SDave Cobbley ismethod = 1 2881eb8dc403SDave Cobbley else: 2882eb8dc403SDave Cobbley self.log.error("'p_error' defined, but is not a function or method") 2883eb8dc403SDave Cobbley self.error = 1 2884eb8dc403SDave Cobbley return 2885eb8dc403SDave Cobbley 2886eb8dc403SDave Cobbley eline = func_code(self.error_func).co_firstlineno 2887eb8dc403SDave Cobbley efile = func_code(self.error_func).co_filename 2888eb8dc403SDave Cobbley self.files[efile] = 1 2889eb8dc403SDave Cobbley 2890eb8dc403SDave Cobbley if (func_code(self.error_func).co_argcount != 1+ismethod): 2891eb8dc403SDave Cobbley self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2892eb8dc403SDave Cobbley self.error = 1 2893eb8dc403SDave Cobbley 2894eb8dc403SDave Cobbley # Get the tokens map 2895eb8dc403SDave Cobbley def get_tokens(self): 2896eb8dc403SDave Cobbley tokens = self.pdict.get("tokens",None) 2897eb8dc403SDave Cobbley if not tokens: 2898eb8dc403SDave Cobbley self.log.error("No token list is defined") 2899eb8dc403SDave Cobbley self.error = 1 2900eb8dc403SDave Cobbley return 2901eb8dc403SDave Cobbley 2902eb8dc403SDave Cobbley if not isinstance(tokens,(list, tuple)): 2903eb8dc403SDave Cobbley self.log.error("tokens must be a list or tuple") 2904eb8dc403SDave Cobbley self.error = 1 2905eb8dc403SDave Cobbley return 2906eb8dc403SDave Cobbley 2907eb8dc403SDave Cobbley if not tokens: 2908eb8dc403SDave Cobbley self.log.error("tokens is empty") 2909eb8dc403SDave Cobbley self.error = 1 2910eb8dc403SDave Cobbley return 2911eb8dc403SDave Cobbley 2912eb8dc403SDave Cobbley self.tokens = tokens 2913eb8dc403SDave Cobbley 2914eb8dc403SDave Cobbley # Validate the tokens 2915eb8dc403SDave Cobbley def validate_tokens(self): 2916eb8dc403SDave Cobbley # Validate the tokens. 2917eb8dc403SDave Cobbley if 'error' in self.tokens: 2918eb8dc403SDave Cobbley self.log.error("Illegal token name 'error'. Is a reserved word") 2919eb8dc403SDave Cobbley self.error = 1 2920eb8dc403SDave Cobbley return 2921eb8dc403SDave Cobbley 2922eb8dc403SDave Cobbley terminals = {} 2923eb8dc403SDave Cobbley for n in self.tokens: 2924eb8dc403SDave Cobbley if n in terminals: 2925eb8dc403SDave Cobbley self.log.warning("Token '%s' multiply defined", n) 2926eb8dc403SDave Cobbley terminals[n] = 1 2927eb8dc403SDave Cobbley 2928eb8dc403SDave Cobbley # Get the precedence map (if any) 2929eb8dc403SDave Cobbley def get_precedence(self): 2930eb8dc403SDave Cobbley self.prec = self.pdict.get("precedence",None) 2931eb8dc403SDave Cobbley 2932eb8dc403SDave Cobbley # Validate and parse the precedence map 2933eb8dc403SDave Cobbley def validate_precedence(self): 2934eb8dc403SDave Cobbley preclist = [] 2935eb8dc403SDave Cobbley if self.prec: 2936eb8dc403SDave Cobbley if not isinstance(self.prec,(list,tuple)): 2937eb8dc403SDave Cobbley self.log.error("precedence must be a list or tuple") 2938eb8dc403SDave Cobbley self.error = 1 2939eb8dc403SDave Cobbley return 2940eb8dc403SDave Cobbley for level,p in enumerate(self.prec): 2941eb8dc403SDave Cobbley if not isinstance(p,(list,tuple)): 2942eb8dc403SDave Cobbley self.log.error("Bad precedence table") 2943eb8dc403SDave Cobbley self.error = 1 2944eb8dc403SDave Cobbley return 2945eb8dc403SDave Cobbley 2946eb8dc403SDave Cobbley if len(p) < 2: 2947eb8dc403SDave Cobbley self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2948eb8dc403SDave Cobbley self.error = 1 2949eb8dc403SDave Cobbley return 2950eb8dc403SDave Cobbley assoc = p[0] 2951eb8dc403SDave Cobbley if not isinstance(assoc,str): 2952eb8dc403SDave Cobbley self.log.error("precedence associativity must be a string") 2953eb8dc403SDave Cobbley self.error = 1 2954eb8dc403SDave Cobbley return 2955eb8dc403SDave Cobbley for term in p[1:]: 2956eb8dc403SDave Cobbley if not isinstance(term,str): 2957eb8dc403SDave Cobbley self.log.error("precedence items must be strings") 2958eb8dc403SDave Cobbley self.error = 1 2959eb8dc403SDave Cobbley return 2960eb8dc403SDave Cobbley preclist.append((term,assoc,level+1)) 2961eb8dc403SDave Cobbley self.preclist = preclist 2962eb8dc403SDave Cobbley 2963eb8dc403SDave Cobbley # Get all p_functions from the grammar 2964eb8dc403SDave Cobbley def get_pfunctions(self): 2965eb8dc403SDave Cobbley p_functions = [] 2966eb8dc403SDave Cobbley for name, item in self.pdict.items(): 2967eb8dc403SDave Cobbley if name[:2] != 'p_': continue 2968eb8dc403SDave Cobbley if name == 'p_error': continue 2969eb8dc403SDave Cobbley if isinstance(item,(types.FunctionType,types.MethodType)): 2970eb8dc403SDave Cobbley line = func_code(item).co_firstlineno 2971eb8dc403SDave Cobbley file = func_code(item).co_filename 2972eb8dc403SDave Cobbley p_functions.append((line,file,name,item.__doc__)) 2973eb8dc403SDave Cobbley 2974eb8dc403SDave Cobbley # Sort all of the actions by line number 2975eb8dc403SDave Cobbley p_functions.sort() 2976eb8dc403SDave Cobbley self.pfuncs = p_functions 2977eb8dc403SDave Cobbley 2978eb8dc403SDave Cobbley 2979eb8dc403SDave Cobbley # Validate all of the p_functions 2980eb8dc403SDave Cobbley def validate_pfunctions(self): 2981eb8dc403SDave Cobbley grammar = [] 2982eb8dc403SDave Cobbley # Check for non-empty symbols 2983eb8dc403SDave Cobbley if len(self.pfuncs) == 0: 2984eb8dc403SDave Cobbley self.log.error("no rules of the form p_rulename are defined") 2985eb8dc403SDave Cobbley self.error = 1 2986eb8dc403SDave Cobbley return 2987eb8dc403SDave Cobbley 2988eb8dc403SDave Cobbley for line, file, name, doc in self.pfuncs: 2989eb8dc403SDave Cobbley func = self.pdict[name] 2990eb8dc403SDave Cobbley if isinstance(func, types.MethodType): 2991eb8dc403SDave Cobbley reqargs = 2 2992eb8dc403SDave Cobbley else: 2993eb8dc403SDave Cobbley reqargs = 1 2994eb8dc403SDave Cobbley if func_code(func).co_argcount > reqargs: 2995eb8dc403SDave Cobbley self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2996eb8dc403SDave Cobbley self.error = 1 2997eb8dc403SDave Cobbley elif func_code(func).co_argcount < reqargs: 2998eb8dc403SDave Cobbley self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2999eb8dc403SDave Cobbley self.error = 1 3000eb8dc403SDave Cobbley elif not func.__doc__: 3001eb8dc403SDave Cobbley self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 3002eb8dc403SDave Cobbley else: 3003eb8dc403SDave Cobbley try: 3004eb8dc403SDave Cobbley parsed_g = parse_grammar(doc,file,line) 3005eb8dc403SDave Cobbley for g in parsed_g: 3006eb8dc403SDave Cobbley grammar.append((name, g)) 3007eb8dc403SDave Cobbley except SyntaxError: 3008eb8dc403SDave Cobbley e = sys.exc_info()[1] 3009eb8dc403SDave Cobbley self.log.error(str(e)) 3010eb8dc403SDave Cobbley self.error = 1 3011eb8dc403SDave Cobbley 3012eb8dc403SDave Cobbley # Looks like a valid grammar rule 3013eb8dc403SDave Cobbley # Mark the file in which defined. 3014eb8dc403SDave Cobbley self.files[file] = 1 3015eb8dc403SDave Cobbley 3016eb8dc403SDave Cobbley # Secondary validation step that looks for p_ definitions that are not functions 3017eb8dc403SDave Cobbley # or functions that look like they might be grammar rules. 3018eb8dc403SDave Cobbley 3019eb8dc403SDave Cobbley for n,v in self.pdict.items(): 3020eb8dc403SDave Cobbley if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 3021eb8dc403SDave Cobbley if n[0:2] == 't_': continue 3022eb8dc403SDave Cobbley if n[0:2] == 'p_' and n != 'p_error': 3023eb8dc403SDave Cobbley self.log.warning("'%s' not defined as a function", n) 3024eb8dc403SDave Cobbley if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 3025eb8dc403SDave Cobbley (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 3026eb8dc403SDave Cobbley try: 3027eb8dc403SDave Cobbley doc = v.__doc__.split(" ") 3028eb8dc403SDave Cobbley if doc[1] == ':': 3029eb8dc403SDave Cobbley self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 3030eb8dc403SDave Cobbley func_code(v).co_filename, func_code(v).co_firstlineno,n) 3031eb8dc403SDave Cobbley except Exception: 3032eb8dc403SDave Cobbley pass 3033eb8dc403SDave Cobbley 3034eb8dc403SDave Cobbley self.grammar = grammar 3035eb8dc403SDave Cobbley 3036eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 3037eb8dc403SDave Cobbley# yacc(module) 3038eb8dc403SDave Cobbley# 3039eb8dc403SDave Cobbley# Build a parser 3040eb8dc403SDave Cobbley# ----------------------------------------------------------------------------- 3041eb8dc403SDave Cobbley 3042eb8dc403SDave Cobbleydef yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3043eb8dc403SDave Cobbley check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 3044eb8dc403SDave Cobbley debuglog=None, errorlog = None, picklefile=None): 3045eb8dc403SDave Cobbley 3046eb8dc403SDave Cobbley global parse # Reference to the parsing method of the last built parser 3047eb8dc403SDave Cobbley 3048eb8dc403SDave Cobbley # If pickling is enabled, table files are not created 3049eb8dc403SDave Cobbley 3050eb8dc403SDave Cobbley if picklefile: 3051eb8dc403SDave Cobbley write_tables = 0 3052eb8dc403SDave Cobbley 3053eb8dc403SDave Cobbley if errorlog is None: 3054eb8dc403SDave Cobbley errorlog = PlyLogger(sys.stderr) 3055eb8dc403SDave Cobbley 3056eb8dc403SDave Cobbley # Get the module dictionary used for the parser 3057eb8dc403SDave Cobbley if module: 3058eb8dc403SDave Cobbley _items = [(k,getattr(module,k)) for k in dir(module)] 3059eb8dc403SDave Cobbley pdict = dict(_items) 3060eb8dc403SDave Cobbley else: 3061eb8dc403SDave Cobbley pdict = get_caller_module_dict(2) 3062eb8dc403SDave Cobbley 3063eb8dc403SDave Cobbley # Collect parser information from the dictionary 3064eb8dc403SDave Cobbley pinfo = ParserReflect(pdict,log=errorlog) 3065eb8dc403SDave Cobbley pinfo.get_all() 3066eb8dc403SDave Cobbley 3067eb8dc403SDave Cobbley if pinfo.error: 3068eb8dc403SDave Cobbley raise YaccError("Unable to build parser") 3069eb8dc403SDave Cobbley 3070eb8dc403SDave Cobbley # Check signature against table files (if any) 3071eb8dc403SDave Cobbley signature = pinfo.signature() 3072eb8dc403SDave Cobbley 3073eb8dc403SDave Cobbley # Read the tables 3074eb8dc403SDave Cobbley try: 3075eb8dc403SDave Cobbley lr = LRTable() 3076eb8dc403SDave Cobbley if picklefile: 3077eb8dc403SDave Cobbley read_signature = lr.read_pickle(picklefile) 3078eb8dc403SDave Cobbley else: 3079eb8dc403SDave Cobbley read_signature = lr.read_table(tabmodule) 3080eb8dc403SDave Cobbley if optimize or (read_signature == signature): 3081eb8dc403SDave Cobbley try: 3082eb8dc403SDave Cobbley lr.bind_callables(pinfo.pdict) 3083eb8dc403SDave Cobbley parser = LRParser(lr,pinfo.error_func) 3084eb8dc403SDave Cobbley parse = parser.parse 3085eb8dc403SDave Cobbley return parser 3086eb8dc403SDave Cobbley except Exception: 3087eb8dc403SDave Cobbley e = sys.exc_info()[1] 3088eb8dc403SDave Cobbley errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3089eb8dc403SDave Cobbley except VersionError: 3090eb8dc403SDave Cobbley e = sys.exc_info() 3091eb8dc403SDave Cobbley errorlog.warning(str(e)) 3092eb8dc403SDave Cobbley except Exception: 3093eb8dc403SDave Cobbley pass 3094eb8dc403SDave Cobbley 3095eb8dc403SDave Cobbley if debuglog is None: 3096eb8dc403SDave Cobbley if debug: 3097eb8dc403SDave Cobbley debuglog = PlyLogger(open(debugfile,"w")) 3098eb8dc403SDave Cobbley else: 3099eb8dc403SDave Cobbley debuglog = NullLogger() 3100eb8dc403SDave Cobbley 3101eb8dc403SDave Cobbley debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3102eb8dc403SDave Cobbley 3103eb8dc403SDave Cobbley 3104eb8dc403SDave Cobbley errors = 0 3105eb8dc403SDave Cobbley 3106eb8dc403SDave Cobbley # Validate the parser information 3107eb8dc403SDave Cobbley if pinfo.validate_all(): 3108eb8dc403SDave Cobbley raise YaccError("Unable to build parser") 3109eb8dc403SDave Cobbley 3110eb8dc403SDave Cobbley if not pinfo.error_func: 3111eb8dc403SDave Cobbley errorlog.warning("no p_error() function is defined") 3112eb8dc403SDave Cobbley 3113eb8dc403SDave Cobbley # Create a grammar object 3114eb8dc403SDave Cobbley grammar = Grammar(pinfo.tokens) 3115eb8dc403SDave Cobbley 3116eb8dc403SDave Cobbley # Set precedence level for terminals 3117eb8dc403SDave Cobbley for term, assoc, level in pinfo.preclist: 3118eb8dc403SDave Cobbley try: 3119eb8dc403SDave Cobbley grammar.set_precedence(term,assoc,level) 3120eb8dc403SDave Cobbley except GrammarError: 3121eb8dc403SDave Cobbley e = sys.exc_info()[1] 3122eb8dc403SDave Cobbley errorlog.warning("%s",str(e)) 3123eb8dc403SDave Cobbley 3124eb8dc403SDave Cobbley # Add productions to the grammar 3125eb8dc403SDave Cobbley for funcname, gram in pinfo.grammar: 3126eb8dc403SDave Cobbley file, line, prodname, syms = gram 3127eb8dc403SDave Cobbley try: 3128eb8dc403SDave Cobbley grammar.add_production(prodname,syms,funcname,file,line) 3129eb8dc403SDave Cobbley except GrammarError: 3130eb8dc403SDave Cobbley e = sys.exc_info()[1] 3131eb8dc403SDave Cobbley errorlog.error("%s",str(e)) 3132eb8dc403SDave Cobbley errors = 1 3133eb8dc403SDave Cobbley 3134eb8dc403SDave Cobbley # Set the grammar start symbols 3135eb8dc403SDave Cobbley try: 3136eb8dc403SDave Cobbley if start is None: 3137eb8dc403SDave Cobbley grammar.set_start(pinfo.start) 3138eb8dc403SDave Cobbley else: 3139eb8dc403SDave Cobbley grammar.set_start(start) 3140eb8dc403SDave Cobbley except GrammarError: 3141eb8dc403SDave Cobbley e = sys.exc_info()[1] 3142eb8dc403SDave Cobbley errorlog.error(str(e)) 3143eb8dc403SDave Cobbley errors = 1 3144eb8dc403SDave Cobbley 3145eb8dc403SDave Cobbley if errors: 3146eb8dc403SDave Cobbley raise YaccError("Unable to build parser") 3147eb8dc403SDave Cobbley 3148eb8dc403SDave Cobbley # Verify the grammar structure 3149eb8dc403SDave Cobbley undefined_symbols = grammar.undefined_symbols() 3150eb8dc403SDave Cobbley for sym, prod in undefined_symbols: 3151eb8dc403SDave Cobbley errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3152eb8dc403SDave Cobbley errors = 1 3153eb8dc403SDave Cobbley 3154eb8dc403SDave Cobbley unused_terminals = grammar.unused_terminals() 3155eb8dc403SDave Cobbley if unused_terminals: 3156eb8dc403SDave Cobbley debuglog.info("") 3157eb8dc403SDave Cobbley debuglog.info("Unused terminals:") 3158eb8dc403SDave Cobbley debuglog.info("") 3159eb8dc403SDave Cobbley for term in unused_terminals: 3160eb8dc403SDave Cobbley errorlog.warning("Token '%s' defined, but not used", term) 3161eb8dc403SDave Cobbley debuglog.info(" %s", term) 3162eb8dc403SDave Cobbley 3163eb8dc403SDave Cobbley # Print out all productions to the debug log 3164eb8dc403SDave Cobbley if debug: 3165eb8dc403SDave Cobbley debuglog.info("") 3166eb8dc403SDave Cobbley debuglog.info("Grammar") 3167eb8dc403SDave Cobbley debuglog.info("") 3168eb8dc403SDave Cobbley for n,p in enumerate(grammar.Productions): 3169eb8dc403SDave Cobbley debuglog.info("Rule %-5d %s", n, p) 3170eb8dc403SDave Cobbley 3171eb8dc403SDave Cobbley # Find unused non-terminals 3172eb8dc403SDave Cobbley unused_rules = grammar.unused_rules() 3173eb8dc403SDave Cobbley for prod in unused_rules: 3174eb8dc403SDave Cobbley errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3175eb8dc403SDave Cobbley 3176eb8dc403SDave Cobbley if len(unused_terminals) == 1: 3177eb8dc403SDave Cobbley errorlog.warning("There is 1 unused token") 3178eb8dc403SDave Cobbley if len(unused_terminals) > 1: 3179eb8dc403SDave Cobbley errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3180eb8dc403SDave Cobbley 3181eb8dc403SDave Cobbley if len(unused_rules) == 1: 3182eb8dc403SDave Cobbley errorlog.warning("There is 1 unused rule") 3183eb8dc403SDave Cobbley if len(unused_rules) > 1: 3184eb8dc403SDave Cobbley errorlog.warning("There are %d unused rules", len(unused_rules)) 3185eb8dc403SDave Cobbley 3186eb8dc403SDave Cobbley if debug: 3187eb8dc403SDave Cobbley debuglog.info("") 3188eb8dc403SDave Cobbley debuglog.info("Terminals, with rules where they appear") 3189eb8dc403SDave Cobbley debuglog.info("") 3190eb8dc403SDave Cobbley terms = list(grammar.Terminals) 3191eb8dc403SDave Cobbley terms.sort() 3192eb8dc403SDave Cobbley for term in terms: 3193eb8dc403SDave Cobbley debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3194eb8dc403SDave Cobbley 3195eb8dc403SDave Cobbley debuglog.info("") 3196eb8dc403SDave Cobbley debuglog.info("Nonterminals, with rules where they appear") 3197eb8dc403SDave Cobbley debuglog.info("") 3198eb8dc403SDave Cobbley nonterms = list(grammar.Nonterminals) 3199eb8dc403SDave Cobbley nonterms.sort() 3200eb8dc403SDave Cobbley for nonterm in nonterms: 3201eb8dc403SDave Cobbley debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3202eb8dc403SDave Cobbley debuglog.info("") 3203eb8dc403SDave Cobbley 3204eb8dc403SDave Cobbley if check_recursion: 3205eb8dc403SDave Cobbley unreachable = grammar.find_unreachable() 3206eb8dc403SDave Cobbley for u in unreachable: 3207eb8dc403SDave Cobbley errorlog.warning("Symbol '%s' is unreachable",u) 3208eb8dc403SDave Cobbley 3209eb8dc403SDave Cobbley infinite = grammar.infinite_cycles() 3210eb8dc403SDave Cobbley for inf in infinite: 3211eb8dc403SDave Cobbley errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3212eb8dc403SDave Cobbley errors = 1 3213eb8dc403SDave Cobbley 3214eb8dc403SDave Cobbley unused_prec = grammar.unused_precedence() 3215eb8dc403SDave Cobbley for term, assoc in unused_prec: 3216eb8dc403SDave Cobbley errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3217eb8dc403SDave Cobbley errors = 1 3218eb8dc403SDave Cobbley 3219eb8dc403SDave Cobbley if errors: 3220eb8dc403SDave Cobbley raise YaccError("Unable to build parser") 3221eb8dc403SDave Cobbley 3222eb8dc403SDave Cobbley # Run the LRGeneratedTable on the grammar 3223eb8dc403SDave Cobbley if debug: 3224eb8dc403SDave Cobbley errorlog.debug("Generating %s tables", method) 3225eb8dc403SDave Cobbley 3226eb8dc403SDave Cobbley lr = LRGeneratedTable(grammar,method,debuglog) 3227eb8dc403SDave Cobbley 3228eb8dc403SDave Cobbley if debug: 3229eb8dc403SDave Cobbley num_sr = len(lr.sr_conflicts) 3230eb8dc403SDave Cobbley 3231eb8dc403SDave Cobbley # Report shift/reduce and reduce/reduce conflicts 3232eb8dc403SDave Cobbley if num_sr == 1: 3233eb8dc403SDave Cobbley errorlog.warning("1 shift/reduce conflict") 3234eb8dc403SDave Cobbley elif num_sr > 1: 3235eb8dc403SDave Cobbley errorlog.warning("%d shift/reduce conflicts", num_sr) 3236eb8dc403SDave Cobbley 3237eb8dc403SDave Cobbley num_rr = len(lr.rr_conflicts) 3238eb8dc403SDave Cobbley if num_rr == 1: 3239eb8dc403SDave Cobbley errorlog.warning("1 reduce/reduce conflict") 3240eb8dc403SDave Cobbley elif num_rr > 1: 3241eb8dc403SDave Cobbley errorlog.warning("%d reduce/reduce conflicts", num_rr) 3242eb8dc403SDave Cobbley 3243eb8dc403SDave Cobbley # Write out conflicts to the output file 3244eb8dc403SDave Cobbley if debug and (lr.sr_conflicts or lr.rr_conflicts): 3245eb8dc403SDave Cobbley debuglog.warning("") 3246eb8dc403SDave Cobbley debuglog.warning("Conflicts:") 3247eb8dc403SDave Cobbley debuglog.warning("") 3248eb8dc403SDave Cobbley 3249eb8dc403SDave Cobbley for state, tok, resolution in lr.sr_conflicts: 3250eb8dc403SDave Cobbley debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3251eb8dc403SDave Cobbley 3252eb8dc403SDave Cobbley already_reported = {} 3253eb8dc403SDave Cobbley for state, rule, rejected in lr.rr_conflicts: 3254eb8dc403SDave Cobbley if (state,id(rule),id(rejected)) in already_reported: 3255eb8dc403SDave Cobbley continue 3256eb8dc403SDave Cobbley debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3257eb8dc403SDave Cobbley debuglog.warning("rejected rule (%s) in state %d", rejected,state) 3258eb8dc403SDave Cobbley errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3259eb8dc403SDave Cobbley errorlog.warning("rejected rule (%s) in state %d", rejected, state) 3260eb8dc403SDave Cobbley already_reported[state,id(rule),id(rejected)] = 1 3261eb8dc403SDave Cobbley 3262eb8dc403SDave Cobbley warned_never = [] 3263eb8dc403SDave Cobbley for state, rule, rejected in lr.rr_conflicts: 3264eb8dc403SDave Cobbley if not rejected.reduced and (rejected not in warned_never): 3265eb8dc403SDave Cobbley debuglog.warning("Rule (%s) is never reduced", rejected) 3266eb8dc403SDave Cobbley errorlog.warning("Rule (%s) is never reduced", rejected) 3267eb8dc403SDave Cobbley warned_never.append(rejected) 3268eb8dc403SDave Cobbley 3269eb8dc403SDave Cobbley # Write the table file if requested 3270eb8dc403SDave Cobbley if write_tables: 3271eb8dc403SDave Cobbley lr.write_table(tabmodule,outputdir,signature) 3272eb8dc403SDave Cobbley 3273eb8dc403SDave Cobbley # Write a pickled version of the tables 3274eb8dc403SDave Cobbley if picklefile: 3275eb8dc403SDave Cobbley lr.pickle_table(picklefile,signature) 3276eb8dc403SDave Cobbley 3277eb8dc403SDave Cobbley # Build the parser 3278eb8dc403SDave Cobbley lr.bind_callables(pinfo.pdict) 3279eb8dc403SDave Cobbley parser = LRParser(lr,pinfo.error_func) 3280eb8dc403SDave Cobbley 3281eb8dc403SDave Cobbley parse = parser.parse 3282eb8dc403SDave Cobbley return parser 3283