1# ----------------------------------------------------------------------------- 2# ply: yacc.py 3# 4# Copyright (C) 2001-2009, 5# David M. Beazley (Dabeaz LLC) 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are 10# met: 11# 12# * Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# * Redistributions in binary form must reproduce the above copyright notice, 15# this list of conditions and the following disclaimer in the documentation 16# and/or other materials provided with the distribution. 17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18# endorse or promote products derived from this software without 19# specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32# ----------------------------------------------------------------------------- 33# 34# This implements an LR parser that is constructed from grammar rules defined 35# as Python functions. The grammer is specified by supplying the BNF inside 36# Python documentation strings. The inspiration for this technique was borrowed 37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38# Spark and the GNU bison utility. 39# 40# The current implementation is only somewhat object-oriented. The 41# LR parser itself is defined in terms of an object (which allows multiple 42# parsers to co-exist). However, most of the variables used during table 43# construction are defined in terms of global variables. Users shouldn't 44# notice unless they are trying to define multiple parsers at the same 45# time using threads (in which case they should have their head examined). 46# 47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51# by the more efficient DeRemer and Pennello algorithm. 52# 53# :::::::: WARNING ::::::: 54# 55# Construction of LR parsing tables is fairly complicated and expensive. 56# To make this module run fast, a *LOT* of work has been put into 57# optimization---often at the expensive of readability and what might 58# consider to be good Python "coding style." Modify the code at your 59# own risk! 60# ---------------------------------------------------------------------------- 61 62__version__ = "3.3" 63__tabversion__ = "3.2" # Table version 64 65#----------------------------------------------------------------------------- 66# === User configurable parameters === 67# 68# Change these to modify the default behavior of yacc (if you wish) 69#----------------------------------------------------------------------------- 70 71yaccdebug = 0 # Debugging mode. If set, yacc generates a 72 # a 'parser.out' file in the current directory 73 74debug_file = 'parser.out' # Default name of the debugging file 75tab_module = 'parsetab' # Default name of the table module 76default_lr = 'LALR' # Default LR table generation method 77 78error_count = 3 # Number of symbols that must be shifted to leave recovery mode 79 80yaccdevel = 0 # Set to True if developing yacc. This turns off optimized 81 # implementations of certain functions. 82 83resultlimit = 40 # Size limit of results when running in debug mode. 84 85pickle_protocol = 0 # Protocol to use when writing pickle files 86 87import re, types, sys, os.path 88 89# Compatibility function for python 2.6/3.0 90if sys.version_info[0] < 3: 91 def func_code(f): 92 return f.func_code 93else: 94 def func_code(f): 95 return f.__code__ 96 97# Compatibility 98try: 99 MAXINT = sys.maxint 100except AttributeError: 101 MAXINT = sys.maxsize 102 103# Python 2.x/3.0 compatibility. 104def load_ply_lex(): 105 if sys.version_info[0] < 3: 106 import lex 107 else: 108 import ply.lex as lex 109 return lex 110 111# This object is a stand-in for a logging object created by the 112# logging module. PLY will use this by default to create things 113# such as the parser.out file. If a user wants more detailed 114# information, they can create their own logging object and pass 115# it into PLY. 116 117class PlyLogger(object): 118 def __init__(self,f): 119 self.f = f 120 def debug(self,msg,*args,**kwargs): 121 self.f.write((msg % args) + "\n") 122 info = debug 123 124 def warning(self,msg,*args,**kwargs): 125 self.f.write("WARNING: "+ (msg % args) + "\n") 126 127 def error(self,msg,*args,**kwargs): 128 self.f.write("ERROR: " + (msg % args) + "\n") 129 130 critical = debug 131 132# Null logger is used when no output is generated. Does nothing. 133class NullLogger(object): 134 def __getattribute__(self,name): 135 return self 136 def __call__(self,*args,**kwargs): 137 return self 138 139# Exception raised for yacc-related errors 140class YaccError(Exception): pass 141 142# Format the result message that the parser produces when running in debug mode. 143def format_result(r): 144 repr_str = repr(r) 145 if '\n' in repr_str: repr_str = repr(repr_str) 146 if len(repr_str) > resultlimit: 147 repr_str = repr_str[:resultlimit]+" ..." 148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 149 return result 150 151 152# Format stack entries when the parser is running in debug mode 153def format_stack_entry(r): 154 repr_str = repr(r) 155 if '\n' in repr_str: repr_str = repr(repr_str) 156 if len(repr_str) < 16: 157 return repr_str 158 else: 159 return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 160 161#----------------------------------------------------------------------------- 162# === LR Parsing Engine === 163# 164# The following classes are used for the LR parser itself. These are not 165# used during table construction and are independent of the actual LR 166# table generation algorithm 167#----------------------------------------------------------------------------- 168 169# This class is used to hold non-terminal grammar symbols during parsing. 170# It normally has the following attributes set: 171# .type = Grammar symbol type 172# .value = Symbol value 173# .lineno = Starting line number 174# .endlineno = Ending line number (optional, set automatically) 175# .lexpos = Starting lex position 176# .endlexpos = Ending lex position (optional, set automatically) 177 178class YaccSymbol: 179 def __str__(self): return self.type 180 def __repr__(self): return str(self) 181 182# This class is a wrapper around the objects actually passed to each 183# grammar rule. Index lookup and assignment actually assign the 184# .value attribute of the underlying YaccSymbol object. 185# The lineno() method returns the line number of a given 186# item (or 0 if not defined). The linespan() method returns 187# a tuple of (startline,endline) representing the range of lines 188# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 189# representing the range of positional information for a symbol. 190 191class YaccProduction: 192 def __init__(self,s,stack=None): 193 self.slice = s 194 self.stack = stack 195 self.lexer = None 196 self.parser= None 197 def __getitem__(self,n): 198 if isinstance(n,slice): 199 return [self[i] for i in range(*(n.indices(len(self.slice))))] 200 if n >= 0: return self.slice[n].value 201 else: return self.stack[n].value 202 203 def __setitem__(self,n,v): 204 self.slice[n].value = v 205 206 def __getslice__(self,i,j): 207 return [s.value for s in self.slice[i:j]] 208 209 def __len__(self): 210 return len(self.slice) 211 212 def lineno(self,n): 213 return getattr(self.slice[n],"lineno",0) 214 215 def set_lineno(self,n,lineno): 216 self.slice[n].lineno = lineno 217 218 def linespan(self,n): 219 startline = getattr(self.slice[n],"lineno",0) 220 endline = getattr(self.slice[n],"endlineno",startline) 221 return startline,endline 222 223 def lexpos(self,n): 224 return getattr(self.slice[n],"lexpos",0) 225 226 def lexspan(self,n): 227 startpos = getattr(self.slice[n],"lexpos",0) 228 endpos = getattr(self.slice[n],"endlexpos",startpos) 229 return startpos,endpos 230 231 def error(self): 232 raise SyntaxError 233 234 235# ----------------------------------------------------------------------------- 236# == LRParser == 237# 238# The LR Parsing engine. 239# ----------------------------------------------------------------------------- 240 241class LRParser: 242 def __init__(self,lrtab,errorf): 243 self.productions = lrtab.lr_productions 244 self.action = lrtab.lr_action 245 self.goto = lrtab.lr_goto 246 self.errorfunc = errorf 247 248 def errok(self): 249 self.errorok = 1 250 251 def restart(self): 252 del self.statestack[:] 253 del self.symstack[:] 254 sym = YaccSymbol() 255 sym.type = '$end' 256 self.symstack.append(sym) 257 self.statestack.append(0) 258 259 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 260 if debug or yaccdevel: 261 if isinstance(debug,int): 262 debug = PlyLogger(sys.stderr) 263 return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 264 elif tracking: 265 return self.parseopt(input,lexer,debug,tracking,tokenfunc) 266 else: 267 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 268 269 270 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 271 # parsedebug(). 272 # 273 # This is the debugging enabled version of parse(). All changes made to the 274 # parsing engine should be made here. For the non-debugging version, 275 # copy this code to a method parseopt() and delete all of the sections 276 # enclosed in: 277 # 278 # #--! DEBUG 279 # statements 280 # #--! DEBUG 281 # 282 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 283 284 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 285 lookahead = None # Current lookahead symbol 286 lookaheadstack = [ ] # Stack of lookahead symbols 287 actions = self.action # Local reference to action table (to avoid lookup on self.) 288 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 289 prod = self.productions # Local reference to production list (to avoid lookup on self.) 290 pslice = YaccProduction(None) # Production object passed to grammar rules 291 errorcount = 0 # Used during error recovery 292 293 # --! DEBUG 294 debug.info("PLY: PARSE DEBUG START") 295 # --! DEBUG 296 297 # If no lexer was given, we will try to use the lex module 298 if not lexer: 299 lex = load_ply_lex() 300 lexer = lex.lexer 301 302 # Set up the lexer and parser objects on pslice 303 pslice.lexer = lexer 304 pslice.parser = self 305 306 # If input was supplied, pass to lexer 307 if input is not None: 308 lexer.input(input) 309 310 if tokenfunc is None: 311 # Tokenize function 312 get_token = lexer.token 313 else: 314 get_token = tokenfunc 315 316 # Set up the state and symbol stacks 317 318 statestack = [ ] # Stack of parsing states 319 self.statestack = statestack 320 symstack = [ ] # Stack of grammar symbols 321 self.symstack = symstack 322 323 pslice.stack = symstack # Put in the production 324 errtoken = None # Err token 325 326 # The start state is assumed to be (0,$end) 327 328 statestack.append(0) 329 sym = YaccSymbol() 330 sym.type = "$end" 331 symstack.append(sym) 332 state = 0 333 while 1: 334 # Get the next symbol on the input. If a lookahead symbol 335 # is already set, we just use that. Otherwise, we'll pull 336 # the next token off of the lookaheadstack or from the lexer 337 338 # --! DEBUG 339 debug.debug('') 340 debug.debug('State : %s', state) 341 # --! DEBUG 342 343 if not lookahead: 344 if not lookaheadstack: 345 lookahead = get_token() # Get the next token 346 else: 347 lookahead = lookaheadstack.pop() 348 if not lookahead: 349 lookahead = YaccSymbol() 350 lookahead.type = "$end" 351 352 # --! DEBUG 353 debug.debug('Stack : %s', 354 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 355 # --! DEBUG 356 357 # Check the action table 358 ltype = lookahead.type 359 t = actions[state].get(ltype) 360 361 if t is not None: 362 if t > 0: 363 # shift a symbol on the stack 364 statestack.append(t) 365 state = t 366 367 # --! DEBUG 368 debug.debug("Action : Shift and goto state %s", t) 369 # --! DEBUG 370 371 symstack.append(lookahead) 372 lookahead = None 373 374 # Decrease error count on successful shift 375 if errorcount: errorcount -=1 376 continue 377 378 if t < 0: 379 # reduce a symbol on the stack, emit a production 380 p = prod[-t] 381 pname = p.name 382 plen = p.len 383 384 # Get production function 385 sym = YaccSymbol() 386 sym.type = pname # Production name 387 sym.value = None 388 389 # --! DEBUG 390 if plen: 391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 392 else: 393 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 394 395 # --! DEBUG 396 397 if plen: 398 targ = symstack[-plen-1:] 399 targ[0] = sym 400 401 # --! TRACKING 402 if tracking: 403 t1 = targ[1] 404 sym.lineno = t1.lineno 405 sym.lexpos = t1.lexpos 406 t1 = targ[-1] 407 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 408 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 409 410 # --! TRACKING 411 412 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 413 # The code enclosed in this section is duplicated 414 # below as a performance optimization. Make sure 415 # changes get made in both locations. 416 417 pslice.slice = targ 418 419 try: 420 # Call the grammar rule with our special slice object 421 del symstack[-plen:] 422 del statestack[-plen:] 423 p.callable(pslice) 424 # --! DEBUG 425 debug.info("Result : %s", format_result(pslice[0])) 426 # --! DEBUG 427 symstack.append(sym) 428 state = goto[statestack[-1]][pname] 429 statestack.append(state) 430 except SyntaxError: 431 # If an error was set. Enter error recovery state 432 lookaheadstack.append(lookahead) 433 symstack.pop() 434 statestack.pop() 435 state = statestack[-1] 436 sym.type = 'error' 437 lookahead = sym 438 errorcount = error_count 439 self.errorok = 0 440 continue 441 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 442 443 else: 444 445 # --! TRACKING 446 if tracking: 447 sym.lineno = lexer.lineno 448 sym.lexpos = lexer.lexpos 449 # --! TRACKING 450 451 targ = [ sym ] 452 453 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 454 # The code enclosed in this section is duplicated 455 # above as a performance optimization. Make sure 456 # changes get made in both locations. 457 458 pslice.slice = targ 459 460 try: 461 # Call the grammar rule with our special slice object 462 p.callable(pslice) 463 # --! DEBUG 464 debug.info("Result : %s", format_result(pslice[0])) 465 # --! DEBUG 466 symstack.append(sym) 467 state = goto[statestack[-1]][pname] 468 statestack.append(state) 469 except SyntaxError: 470 # If an error was set. Enter error recovery state 471 lookaheadstack.append(lookahead) 472 symstack.pop() 473 statestack.pop() 474 state = statestack[-1] 475 sym.type = 'error' 476 lookahead = sym 477 errorcount = error_count 478 self.errorok = 0 479 continue 480 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 481 482 if t == 0: 483 n = symstack[-1] 484 result = getattr(n,"value",None) 485 # --! DEBUG 486 debug.info("Done : Returning %s", format_result(result)) 487 debug.info("PLY: PARSE DEBUG END") 488 # --! DEBUG 489 return result 490 491 if t is None: 492 493 # --! DEBUG 494 debug.error('Error : %s', 495 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 496 # --! DEBUG 497 498 # We have some kind of parsing error here. To handle 499 # this, we are going to push the current token onto 500 # the tokenstack and replace it with an 'error' token. 501 # If there are any synchronization rules, they may 502 # catch it. 503 # 504 # In addition to pushing the error token, we call call 505 # the user defined p_error() function if this is the 506 # first syntax error. This function is only called if 507 # errorcount == 0. 508 if errorcount == 0 or self.errorok: 509 errorcount = error_count 510 self.errorok = 0 511 errtoken = lookahead 512 if errtoken.type == "$end": 513 errtoken = None # End of file! 514 if self.errorfunc: 515 global errok,token,restart 516 errok = self.errok # Set some special functions available in error recovery 517 token = get_token 518 restart = self.restart 519 if errtoken and not hasattr(errtoken,'lexer'): 520 errtoken.lexer = lexer 521 tok = self.errorfunc(errtoken) 522 del errok, token, restart # Delete special functions 523 524 if self.errorok: 525 # User must have done some kind of panic 526 # mode recovery on their own. The 527 # returned token is the next lookahead 528 lookahead = tok 529 errtoken = None 530 continue 531 else: 532 if errtoken: 533 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 534 else: lineno = 0 535 if lineno: 536 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 537 else: 538 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 539 else: 540 sys.stderr.write("yacc: Parse error in input. EOF\n") 541 return 542 543 else: 544 errorcount = error_count 545 546 # case 1: the statestack only has 1 entry on it. If we're in this state, the 547 # entire parse has been rolled back and we're completely hosed. The token is 548 # discarded and we just keep going. 549 550 if len(statestack) <= 1 and lookahead.type != "$end": 551 lookahead = None 552 errtoken = None 553 state = 0 554 # Nuke the pushback stack 555 del lookaheadstack[:] 556 continue 557 558 # case 2: the statestack has a couple of entries on it, but we're 559 # at the end of the file. nuke the top entry and generate an error token 560 561 # Start nuking entries on the stack 562 if lookahead.type == "$end": 563 # Whoa. We're really hosed here. Bail out 564 return 565 566 if lookahead.type != 'error': 567 sym = symstack[-1] 568 if sym.type == 'error': 569 # Hmmm. Error is on top of stack, we'll just nuke input 570 # symbol and continue 571 lookahead = None 572 continue 573 t = YaccSymbol() 574 t.type = 'error' 575 if hasattr(lookahead,"lineno"): 576 t.lineno = lookahead.lineno 577 t.value = lookahead 578 lookaheadstack.append(lookahead) 579 lookahead = t 580 else: 581 symstack.pop() 582 statestack.pop() 583 state = statestack[-1] # Potential bug fix 584 585 continue 586 587 # Call an error function here 588 raise RuntimeError("yacc: internal parser error!!!\n") 589 590 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 591 # parseopt(). 592 # 593 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 594 # Edit the debug version above, then copy any modifications to the method 595 # below while removing #--! DEBUG sections. 596 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 597 598 599 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 600 lookahead = None # Current lookahead symbol 601 lookaheadstack = [ ] # Stack of lookahead symbols 602 actions = self.action # Local reference to action table (to avoid lookup on self.) 603 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 604 prod = self.productions # Local reference to production list (to avoid lookup on self.) 605 pslice = YaccProduction(None) # Production object passed to grammar rules 606 errorcount = 0 # Used during error recovery 607 608 # If no lexer was given, we will try to use the lex module 609 if not lexer: 610 lex = load_ply_lex() 611 lexer = lex.lexer 612 613 # Set up the lexer and parser objects on pslice 614 pslice.lexer = lexer 615 pslice.parser = self 616 617 # If input was supplied, pass to lexer 618 if input is not None: 619 lexer.input(input) 620 621 if tokenfunc is None: 622 # Tokenize function 623 get_token = lexer.token 624 else: 625 get_token = tokenfunc 626 627 # Set up the state and symbol stacks 628 629 statestack = [ ] # Stack of parsing states 630 self.statestack = statestack 631 symstack = [ ] # Stack of grammar symbols 632 self.symstack = symstack 633 634 pslice.stack = symstack # Put in the production 635 errtoken = None # Err token 636 637 # The start state is assumed to be (0,$end) 638 639 statestack.append(0) 640 sym = YaccSymbol() 641 sym.type = '$end' 642 symstack.append(sym) 643 state = 0 644 while 1: 645 # Get the next symbol on the input. If a lookahead symbol 646 # is already set, we just use that. Otherwise, we'll pull 647 # the next token off of the lookaheadstack or from the lexer 648 649 if not lookahead: 650 if not lookaheadstack: 651 lookahead = get_token() # Get the next token 652 else: 653 lookahead = lookaheadstack.pop() 654 if not lookahead: 655 lookahead = YaccSymbol() 656 lookahead.type = '$end' 657 658 # Check the action table 659 ltype = lookahead.type 660 t = actions[state].get(ltype) 661 662 if t is not None: 663 if t > 0: 664 # shift a symbol on the stack 665 statestack.append(t) 666 state = t 667 668 symstack.append(lookahead) 669 lookahead = None 670 671 # Decrease error count on successful shift 672 if errorcount: errorcount -=1 673 continue 674 675 if t < 0: 676 # reduce a symbol on the stack, emit a production 677 p = prod[-t] 678 pname = p.name 679 plen = p.len 680 681 # Get production function 682 sym = YaccSymbol() 683 sym.type = pname # Production name 684 sym.value = None 685 686 if plen: 687 targ = symstack[-plen-1:] 688 targ[0] = sym 689 690 # --! TRACKING 691 if tracking: 692 t1 = targ[1] 693 sym.lineno = t1.lineno 694 sym.lexpos = t1.lexpos 695 t1 = targ[-1] 696 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 697 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 698 699 # --! TRACKING 700 701 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 702 # The code enclosed in this section is duplicated 703 # below as a performance optimization. Make sure 704 # changes get made in both locations. 705 706 pslice.slice = targ 707 708 try: 709 # Call the grammar rule with our special slice object 710 del symstack[-plen:] 711 del statestack[-plen:] 712 p.callable(pslice) 713 symstack.append(sym) 714 state = goto[statestack[-1]][pname] 715 statestack.append(state) 716 except SyntaxError: 717 # If an error was set. Enter error recovery state 718 lookaheadstack.append(lookahead) 719 symstack.pop() 720 statestack.pop() 721 state = statestack[-1] 722 sym.type = 'error' 723 lookahead = sym 724 errorcount = error_count 725 self.errorok = 0 726 continue 727 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 728 729 else: 730 731 # --! TRACKING 732 if tracking: 733 sym.lineno = lexer.lineno 734 sym.lexpos = lexer.lexpos 735 # --! TRACKING 736 737 targ = [ sym ] 738 739 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 740 # The code enclosed in this section is duplicated 741 # above as a performance optimization. Make sure 742 # changes get made in both locations. 743 744 pslice.slice = targ 745 746 try: 747 # Call the grammar rule with our special slice object 748 p.callable(pslice) 749 symstack.append(sym) 750 state = goto[statestack[-1]][pname] 751 statestack.append(state) 752 except SyntaxError: 753 # If an error was set. Enter error recovery state 754 lookaheadstack.append(lookahead) 755 symstack.pop() 756 statestack.pop() 757 state = statestack[-1] 758 sym.type = 'error' 759 lookahead = sym 760 errorcount = error_count 761 self.errorok = 0 762 continue 763 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 764 765 if t == 0: 766 n = symstack[-1] 767 return getattr(n,"value",None) 768 769 if t is None: 770 771 # We have some kind of parsing error here. To handle 772 # this, we are going to push the current token onto 773 # the tokenstack and replace it with an 'error' token. 774 # If there are any synchronization rules, they may 775 # catch it. 776 # 777 # In addition to pushing the error token, we call call 778 # the user defined p_error() function if this is the 779 # first syntax error. This function is only called if 780 # errorcount == 0. 781 if errorcount == 0 or self.errorok: 782 errorcount = error_count 783 self.errorok = 0 784 errtoken = lookahead 785 if errtoken.type == '$end': 786 errtoken = None # End of file! 787 if self.errorfunc: 788 global errok,token,restart 789 errok = self.errok # Set some special functions available in error recovery 790 token = get_token 791 restart = self.restart 792 if errtoken and not hasattr(errtoken,'lexer'): 793 errtoken.lexer = lexer 794 tok = self.errorfunc(errtoken) 795 del errok, token, restart # Delete special functions 796 797 if self.errorok: 798 # User must have done some kind of panic 799 # mode recovery on their own. The 800 # returned token is the next lookahead 801 lookahead = tok 802 errtoken = None 803 continue 804 else: 805 if errtoken: 806 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 807 else: lineno = 0 808 if lineno: 809 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 810 else: 811 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 812 else: 813 sys.stderr.write("yacc: Parse error in input. EOF\n") 814 return 815 816 else: 817 errorcount = error_count 818 819 # case 1: the statestack only has 1 entry on it. If we're in this state, the 820 # entire parse has been rolled back and we're completely hosed. The token is 821 # discarded and we just keep going. 822 823 if len(statestack) <= 1 and lookahead.type != '$end': 824 lookahead = None 825 errtoken = None 826 state = 0 827 # Nuke the pushback stack 828 del lookaheadstack[:] 829 continue 830 831 # case 2: the statestack has a couple of entries on it, but we're 832 # at the end of the file. nuke the top entry and generate an error token 833 834 # Start nuking entries on the stack 835 if lookahead.type == '$end': 836 # Whoa. We're really hosed here. Bail out 837 return 838 839 if lookahead.type != 'error': 840 sym = symstack[-1] 841 if sym.type == 'error': 842 # Hmmm. Error is on top of stack, we'll just nuke input 843 # symbol and continue 844 lookahead = None 845 continue 846 t = YaccSymbol() 847 t.type = 'error' 848 if hasattr(lookahead,"lineno"): 849 t.lineno = lookahead.lineno 850 t.value = lookahead 851 lookaheadstack.append(lookahead) 852 lookahead = t 853 else: 854 symstack.pop() 855 statestack.pop() 856 state = statestack[-1] # Potential bug fix 857 858 continue 859 860 # Call an error function here 861 raise RuntimeError("yacc: internal parser error!!!\n") 862 863 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 864 # parseopt_notrack(). 865 # 866 # Optimized version of parseopt() with line number tracking removed. 867 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 868 # code in the #--! TRACKING sections 869 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 870 871 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 872 lookahead = None # Current lookahead symbol 873 lookaheadstack = [ ] # Stack of lookahead symbols 874 actions = self.action # Local reference to action table (to avoid lookup on self.) 875 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 876 prod = self.productions # Local reference to production list (to avoid lookup on self.) 877 pslice = YaccProduction(None) # Production object passed to grammar rules 878 errorcount = 0 # Used during error recovery 879 880 # If no lexer was given, we will try to use the lex module 881 if not lexer: 882 lex = load_ply_lex() 883 lexer = lex.lexer 884 885 # Set up the lexer and parser objects on pslice 886 pslice.lexer = lexer 887 pslice.parser = self 888 889 # If input was supplied, pass to lexer 890 if input is not None: 891 lexer.input(input) 892 893 if tokenfunc is None: 894 # Tokenize function 895 get_token = lexer.token 896 else: 897 get_token = tokenfunc 898 899 # Set up the state and symbol stacks 900 901 statestack = [ ] # Stack of parsing states 902 self.statestack = statestack 903 symstack = [ ] # Stack of grammar symbols 904 self.symstack = symstack 905 906 pslice.stack = symstack # Put in the production 907 errtoken = None # Err token 908 909 # The start state is assumed to be (0,$end) 910 911 statestack.append(0) 912 sym = YaccSymbol() 913 sym.type = '$end' 914 symstack.append(sym) 915 state = 0 916 while 1: 917 # Get the next symbol on the input. If a lookahead symbol 918 # is already set, we just use that. Otherwise, we'll pull 919 # the next token off of the lookaheadstack or from the lexer 920 921 if not lookahead: 922 if not lookaheadstack: 923 lookahead = get_token() # Get the next token 924 else: 925 lookahead = lookaheadstack.pop() 926 if not lookahead: 927 lookahead = YaccSymbol() 928 lookahead.type = '$end' 929 930 # Check the action table 931 ltype = lookahead.type 932 t = actions[state].get(ltype) 933 934 if t is not None: 935 if t > 0: 936 # shift a symbol on the stack 937 statestack.append(t) 938 state = t 939 940 symstack.append(lookahead) 941 lookahead = None 942 943 # Decrease error count on successful shift 944 if errorcount: errorcount -=1 945 continue 946 947 if t < 0: 948 # reduce a symbol on the stack, emit a production 949 p = prod[-t] 950 pname = p.name 951 plen = p.len 952 953 # Get production function 954 sym = YaccSymbol() 955 sym.type = pname # Production name 956 sym.value = None 957 958 if plen: 959 targ = symstack[-plen-1:] 960 targ[0] = sym 961 962 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 963 # The code enclosed in this section is duplicated 964 # below as a performance optimization. Make sure 965 # changes get made in both locations. 966 967 pslice.slice = targ 968 969 try: 970 # Call the grammar rule with our special slice object 971 del symstack[-plen:] 972 del statestack[-plen:] 973 p.callable(pslice) 974 symstack.append(sym) 975 state = goto[statestack[-1]][pname] 976 statestack.append(state) 977 except SyntaxError: 978 # If an error was set. Enter error recovery state 979 lookaheadstack.append(lookahead) 980 symstack.pop() 981 statestack.pop() 982 state = statestack[-1] 983 sym.type = 'error' 984 lookahead = sym 985 errorcount = error_count 986 self.errorok = 0 987 continue 988 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 989 990 else: 991 992 targ = [ sym ] 993 994 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 995 # The code enclosed in this section is duplicated 996 # above as a performance optimization. Make sure 997 # changes get made in both locations. 998 999 pslice.slice = targ 1000 1001 try: 1002 # Call the grammar rule with our special slice object 1003 p.callable(pslice) 1004 symstack.append(sym) 1005 state = goto[statestack[-1]][pname] 1006 statestack.append(state) 1007 except SyntaxError: 1008 # If an error was set. Enter error recovery state 1009 lookaheadstack.append(lookahead) 1010 symstack.pop() 1011 statestack.pop() 1012 state = statestack[-1] 1013 sym.type = 'error' 1014 lookahead = sym 1015 errorcount = error_count 1016 self.errorok = 0 1017 continue 1018 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1019 1020 if t == 0: 1021 n = symstack[-1] 1022 return getattr(n,"value",None) 1023 1024 if t is None: 1025 1026 # We have some kind of parsing error here. To handle 1027 # this, we are going to push the current token onto 1028 # the tokenstack and replace it with an 'error' token. 1029 # If there are any synchronization rules, they may 1030 # catch it. 1031 # 1032 # In addition to pushing the error token, we call call 1033 # the user defined p_error() function if this is the 1034 # first syntax error. This function is only called if 1035 # errorcount == 0. 1036 if errorcount == 0 or self.errorok: 1037 errorcount = error_count 1038 self.errorok = 0 1039 errtoken = lookahead 1040 if errtoken.type == '$end': 1041 errtoken = None # End of file! 1042 if self.errorfunc: 1043 global errok,token,restart 1044 errok = self.errok # Set some special functions available in error recovery 1045 token = get_token 1046 restart = self.restart 1047 if errtoken and not hasattr(errtoken,'lexer'): 1048 errtoken.lexer = lexer 1049 tok = self.errorfunc(errtoken) 1050 del errok, token, restart # Delete special functions 1051 1052 if self.errorok: 1053 # User must have done some kind of panic 1054 # mode recovery on their own. The 1055 # returned token is the next lookahead 1056 lookahead = tok 1057 errtoken = None 1058 continue 1059 else: 1060 if errtoken: 1061 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1062 else: lineno = 0 1063 if lineno: 1064 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1065 else: 1066 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1067 else: 1068 sys.stderr.write("yacc: Parse error in input. EOF\n") 1069 return 1070 1071 else: 1072 errorcount = error_count 1073 1074 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1075 # entire parse has been rolled back and we're completely hosed. The token is 1076 # discarded and we just keep going. 1077 1078 if len(statestack) <= 1 and lookahead.type != '$end': 1079 lookahead = None 1080 errtoken = None 1081 state = 0 1082 # Nuke the pushback stack 1083 del lookaheadstack[:] 1084 continue 1085 1086 # case 2: the statestack has a couple of entries on it, but we're 1087 # at the end of the file. nuke the top entry and generate an error token 1088 1089 # Start nuking entries on the stack 1090 if lookahead.type == '$end': 1091 # Whoa. We're really hosed here. Bail out 1092 return 1093 1094 if lookahead.type != 'error': 1095 sym = symstack[-1] 1096 if sym.type == 'error': 1097 # Hmmm. Error is on top of stack, we'll just nuke input 1098 # symbol and continue 1099 lookahead = None 1100 continue 1101 t = YaccSymbol() 1102 t.type = 'error' 1103 if hasattr(lookahead,"lineno"): 1104 t.lineno = lookahead.lineno 1105 t.value = lookahead 1106 lookaheadstack.append(lookahead) 1107 lookahead = t 1108 else: 1109 symstack.pop() 1110 statestack.pop() 1111 state = statestack[-1] # Potential bug fix 1112 1113 continue 1114 1115 # Call an error function here 1116 raise RuntimeError("yacc: internal parser error!!!\n") 1117 1118# ----------------------------------------------------------------------------- 1119# === Grammar Representation === 1120# 1121# The following functions, classes, and variables are used to represent and 1122# manipulate the rules that make up a grammar. 1123# ----------------------------------------------------------------------------- 1124 1125import re 1126 1127# regex matching identifiers 1128_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1129 1130# ----------------------------------------------------------------------------- 1131# class Production: 1132# 1133# This class stores the raw information about a single production or grammar rule. 1134# A grammar rule refers to a specification such as this: 1135# 1136# expr : expr PLUS term 1137# 1138# Here are the basic attributes defined on all productions 1139# 1140# name - Name of the production. For example 'expr' 1141# prod - A list of symbols on the right side ['expr','PLUS','term'] 1142# prec - Production precedence level 1143# number - Production number. 1144# func - Function that executes on reduce 1145# file - File where production function is defined 1146# lineno - Line number where production function is defined 1147# 1148# The following attributes are defined or optional. 1149# 1150# len - Length of the production (number of symbols on right hand side) 1151# usyms - Set of unique symbols found in the production 1152# ----------------------------------------------------------------------------- 1153 1154class Production(object): 1155 reduced = 0 1156 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 1157 self.name = name 1158 self.prod = tuple(prod) 1159 self.number = number 1160 self.func = func 1161 self.callable = None 1162 self.file = file 1163 self.line = line 1164 self.prec = precedence 1165 1166 # Internal settings used during table construction 1167 1168 self.len = len(self.prod) # Length of the production 1169 1170 # Create a list of unique production symbols used in the production 1171 self.usyms = [ ] 1172 for s in self.prod: 1173 if s not in self.usyms: 1174 self.usyms.append(s) 1175 1176 # List of all LR items for the production 1177 self.lr_items = [] 1178 self.lr_next = None 1179 1180 # Create a string representation 1181 if self.prod: 1182 self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1183 else: 1184 self.str = "%s -> <empty>" % self.name 1185 1186 def __str__(self): 1187 return self.str 1188 1189 def __repr__(self): 1190 return "Production("+str(self)+")" 1191 1192 def __len__(self): 1193 return len(self.prod) 1194 1195 def __nonzero__(self): 1196 return 1 1197 1198 def __getitem__(self,index): 1199 return self.prod[index] 1200 1201 # Return the nth lr_item from the production (or None if at the end) 1202 def lr_item(self,n): 1203 if n > len(self.prod): return None 1204 p = LRItem(self,n) 1205 1206 # Precompute the list of productions immediately following. Hack. Remove later 1207 try: 1208 p.lr_after = self.Prodnames[p.prod[n+1]] 1209 except (IndexError,KeyError): 1210 p.lr_after = [] 1211 try: 1212 p.lr_before = p.prod[n-1] 1213 except IndexError: 1214 p.lr_before = None 1215 1216 return p 1217 1218 # Bind the production function name to a callable 1219 def bind(self,pdict): 1220 if self.func: 1221 self.callable = pdict[self.func] 1222 1223# This class serves as a minimal standin for Production objects when 1224# reading table data from files. It only contains information 1225# actually used by the LR parsing engine, plus some additional 1226# debugging information. 1227class MiniProduction(object): 1228 def __init__(self,str,name,len,func,file,line): 1229 self.name = name 1230 self.len = len 1231 self.func = func 1232 self.callable = None 1233 self.file = file 1234 self.line = line 1235 self.str = str 1236 def __str__(self): 1237 return self.str 1238 def __repr__(self): 1239 return "MiniProduction(%s)" % self.str 1240 1241 # Bind the production function name to a callable 1242 def bind(self,pdict): 1243 if self.func: 1244 self.callable = pdict[self.func] 1245 1246 1247# ----------------------------------------------------------------------------- 1248# class LRItem 1249# 1250# This class represents a specific stage of parsing a production rule. For 1251# example: 1252# 1253# expr : expr . PLUS term 1254# 1255# In the above, the "." represents the current location of the parse. Here 1256# basic attributes: 1257# 1258# name - Name of the production. For example 'expr' 1259# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1260# number - Production number. 1261# 1262# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1263# then lr_next refers to 'expr -> expr PLUS . term' 1264# lr_index - LR item index (location of the ".") in the prod list. 1265# lookaheads - LALR lookahead symbols for this item 1266# len - Length of the production (number of symbols on right hand side) 1267# lr_after - List of all productions that immediately follow 1268# lr_before - Grammar symbol immediately before 1269# ----------------------------------------------------------------------------- 1270 1271class LRItem(object): 1272 def __init__(self,p,n): 1273 self.name = p.name 1274 self.prod = list(p.prod) 1275 self.number = p.number 1276 self.lr_index = n 1277 self.lookaheads = { } 1278 self.prod.insert(n,".") 1279 self.prod = tuple(self.prod) 1280 self.len = len(self.prod) 1281 self.usyms = p.usyms 1282 1283 def __str__(self): 1284 if self.prod: 1285 s = "%s -> %s" % (self.name," ".join(self.prod)) 1286 else: 1287 s = "%s -> <empty>" % self.name 1288 return s 1289 1290 def __repr__(self): 1291 return "LRItem("+str(self)+")" 1292 1293# ----------------------------------------------------------------------------- 1294# rightmost_terminal() 1295# 1296# Return the rightmost terminal from a list of symbols. Used in add_production() 1297# ----------------------------------------------------------------------------- 1298def rightmost_terminal(symbols, terminals): 1299 i = len(symbols) - 1 1300 while i >= 0: 1301 if symbols[i] in terminals: 1302 return symbols[i] 1303 i -= 1 1304 return None 1305 1306# ----------------------------------------------------------------------------- 1307# === GRAMMAR CLASS === 1308# 1309# The following class represents the contents of the specified grammar along 1310# with various computed properties such as first sets, follow sets, LR items, etc. 1311# This data is used for critical parts of the table generation process later. 1312# ----------------------------------------------------------------------------- 1313 1314class GrammarError(YaccError): pass 1315 1316class Grammar(object): 1317 def __init__(self,terminals): 1318 self.Productions = [None] # A list of all of the productions. The first 1319 # entry is always reserved for the purpose of 1320 # building an augmented grammar 1321 1322 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1323 # productions of that nonterminal. 1324 1325 self.Prodmap = { } # A dictionary that is only used to detect duplicate 1326 # productions. 1327 1328 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1329 # list of the rules where they are used. 1330 1331 for term in terminals: 1332 self.Terminals[term] = [] 1333 1334 self.Terminals['error'] = [] 1335 1336 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1337 # of rule numbers where they are used. 1338 1339 self.First = { } # A dictionary of precomputed FIRST(x) symbols 1340 1341 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1342 1343 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1344 # form ('right',level) or ('nonassoc', level) or ('left',level) 1345 1346 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1347 # This is only used to provide error checking and to generate 1348 # a warning about unused precedence rules. 1349 1350 self.Start = None # Starting symbol for the grammar 1351 1352 1353 def __len__(self): 1354 return len(self.Productions) 1355 1356 def __getitem__(self,index): 1357 return self.Productions[index] 1358 1359 # ----------------------------------------------------------------------------- 1360 # set_precedence() 1361 # 1362 # Sets the precedence for a given terminal. assoc is the associativity such as 1363 # 'left','right', or 'nonassoc'. level is a numeric level. 1364 # 1365 # ----------------------------------------------------------------------------- 1366 1367 def set_precedence(self,term,assoc,level): 1368 assert self.Productions == [None],"Must call set_precedence() before add_production()" 1369 if term in self.Precedence: 1370 raise GrammarError("Precedence already specified for terminal '%s'" % term) 1371 if assoc not in ['left','right','nonassoc']: 1372 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1373 self.Precedence[term] = (assoc,level) 1374 1375 # ----------------------------------------------------------------------------- 1376 # add_production() 1377 # 1378 # Given an action function, this function assembles a production rule and 1379 # computes its precedence level. 1380 # 1381 # The production rule is supplied as a list of symbols. For example, 1382 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1383 # symbols ['expr','PLUS','term']. 1384 # 1385 # Precedence is determined by the precedence of the right-most non-terminal 1386 # or the precedence of a terminal specified by %prec. 1387 # 1388 # A variety of error checks are performed to make sure production symbols 1389 # are valid and that %prec is used correctly. 1390 # ----------------------------------------------------------------------------- 1391 1392 def add_production(self,prodname,syms,func=None,file='',line=0): 1393 1394 if prodname in self.Terminals: 1395 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1396 if prodname == 'error': 1397 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1398 if not _is_identifier.match(prodname): 1399 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1400 1401 # Look for literal tokens 1402 for n,s in enumerate(syms): 1403 if s[0] in "'\"": 1404 try: 1405 c = eval(s) 1406 if (len(c) > 1): 1407 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1408 if not c in self.Terminals: 1409 self.Terminals[c] = [] 1410 syms[n] = c 1411 continue 1412 except SyntaxError: 1413 pass 1414 if not _is_identifier.match(s) and s != '%prec': 1415 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1416 1417 # Determine the precedence level 1418 if '%prec' in syms: 1419 if syms[-1] == '%prec': 1420 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1421 if syms[-2] != '%prec': 1422 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1423 precname = syms[-1] 1424 prodprec = self.Precedence.get(precname,None) 1425 if not prodprec: 1426 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1427 else: 1428 self.UsedPrecedence[precname] = 1 1429 del syms[-2:] # Drop %prec from the rule 1430 else: 1431 # If no %prec, precedence is determined by the rightmost terminal symbol 1432 precname = rightmost_terminal(syms,self.Terminals) 1433 prodprec = self.Precedence.get(precname,('right',0)) 1434 1435 # See if the rule is already in the rulemap 1436 map = "%s -> %s" % (prodname,syms) 1437 if map in self.Prodmap: 1438 m = self.Prodmap[map] 1439 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1440 "Previous definition at %s:%d" % (m.file, m.line)) 1441 1442 # From this point on, everything is valid. Create a new Production instance 1443 pnumber = len(self.Productions) 1444 if not prodname in self.Nonterminals: 1445 self.Nonterminals[prodname] = [ ] 1446 1447 # Add the production number to Terminals and Nonterminals 1448 for t in syms: 1449 if t in self.Terminals: 1450 self.Terminals[t].append(pnumber) 1451 else: 1452 if not t in self.Nonterminals: 1453 self.Nonterminals[t] = [ ] 1454 self.Nonterminals[t].append(pnumber) 1455 1456 # Create a production and add it to the list of productions 1457 p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1458 self.Productions.append(p) 1459 self.Prodmap[map] = p 1460 1461 # Add to the global productions list 1462 try: 1463 self.Prodnames[prodname].append(p) 1464 except KeyError: 1465 self.Prodnames[prodname] = [ p ] 1466 return 0 1467 1468 # ----------------------------------------------------------------------------- 1469 # set_start() 1470 # 1471 # Sets the starting symbol and creates the augmented grammar. Production 1472 # rule 0 is S' -> start where start is the start symbol. 1473 # ----------------------------------------------------------------------------- 1474 1475 def set_start(self,start=None): 1476 if not start: 1477 start = self.Productions[1].name 1478 if start not in self.Nonterminals: 1479 raise GrammarError("start symbol %s undefined" % start) 1480 self.Productions[0] = Production(0,"S'",[start]) 1481 self.Nonterminals[start].append(0) 1482 self.Start = start 1483 1484 # ----------------------------------------------------------------------------- 1485 # find_unreachable() 1486 # 1487 # Find all of the nonterminal symbols that can't be reached from the starting 1488 # symbol. Returns a list of nonterminals that can't be reached. 1489 # ----------------------------------------------------------------------------- 1490 1491 def find_unreachable(self): 1492 1493 # Mark all symbols that are reachable from a symbol s 1494 def mark_reachable_from(s): 1495 if reachable[s]: 1496 # We've already reached symbol s. 1497 return 1498 reachable[s] = 1 1499 for p in self.Prodnames.get(s,[]): 1500 for r in p.prod: 1501 mark_reachable_from(r) 1502 1503 reachable = { } 1504 for s in list(self.Terminals) + list(self.Nonterminals): 1505 reachable[s] = 0 1506 1507 mark_reachable_from( self.Productions[0].prod[0] ) 1508 1509 return [s for s in list(self.Nonterminals) 1510 if not reachable[s]] 1511 1512 # ----------------------------------------------------------------------------- 1513 # infinite_cycles() 1514 # 1515 # This function looks at the various parsing rules and tries to detect 1516 # infinite recursion cycles (grammar rules where there is no possible way 1517 # to derive a string of only terminals). 1518 # ----------------------------------------------------------------------------- 1519 1520 def infinite_cycles(self): 1521 terminates = {} 1522 1523 # Terminals: 1524 for t in self.Terminals: 1525 terminates[t] = 1 1526 1527 terminates['$end'] = 1 1528 1529 # Nonterminals: 1530 1531 # Initialize to false: 1532 for n in self.Nonterminals: 1533 terminates[n] = 0 1534 1535 # Then propagate termination until no change: 1536 while 1: 1537 some_change = 0 1538 for (n,pl) in self.Prodnames.items(): 1539 # Nonterminal n terminates iff any of its productions terminates. 1540 for p in pl: 1541 # Production p terminates iff all of its rhs symbols terminate. 1542 for s in p.prod: 1543 if not terminates[s]: 1544 # The symbol s does not terminate, 1545 # so production p does not terminate. 1546 p_terminates = 0 1547 break 1548 else: 1549 # didn't break from the loop, 1550 # so every symbol s terminates 1551 # so production p terminates. 1552 p_terminates = 1 1553 1554 if p_terminates: 1555 # symbol n terminates! 1556 if not terminates[n]: 1557 terminates[n] = 1 1558 some_change = 1 1559 # Don't need to consider any more productions for this n. 1560 break 1561 1562 if not some_change: 1563 break 1564 1565 infinite = [] 1566 for (s,term) in terminates.items(): 1567 if not term: 1568 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1569 # s is used-but-not-defined, and we've already warned of that, 1570 # so it would be overkill to say that it's also non-terminating. 1571 pass 1572 else: 1573 infinite.append(s) 1574 1575 return infinite 1576 1577 1578 # ----------------------------------------------------------------------------- 1579 # undefined_symbols() 1580 # 1581 # Find all symbols that were used the grammar, but not defined as tokens or 1582 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1583 # and prod is the production where the symbol was used. 1584 # ----------------------------------------------------------------------------- 1585 def undefined_symbols(self): 1586 result = [] 1587 for p in self.Productions: 1588 if not p: continue 1589 1590 for s in p.prod: 1591 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1592 result.append((s,p)) 1593 return result 1594 1595 # ----------------------------------------------------------------------------- 1596 # unused_terminals() 1597 # 1598 # Find all terminals that were defined, but not used by the grammar. Returns 1599 # a list of all symbols. 1600 # ----------------------------------------------------------------------------- 1601 def unused_terminals(self): 1602 unused_tok = [] 1603 for s,v in self.Terminals.items(): 1604 if s != 'error' and not v: 1605 unused_tok.append(s) 1606 1607 return unused_tok 1608 1609 # ------------------------------------------------------------------------------ 1610 # unused_rules() 1611 # 1612 # Find all grammar rules that were defined, but not used (maybe not reachable) 1613 # Returns a list of productions. 1614 # ------------------------------------------------------------------------------ 1615 1616 def unused_rules(self): 1617 unused_prod = [] 1618 for s,v in self.Nonterminals.items(): 1619 if not v: 1620 p = self.Prodnames[s][0] 1621 unused_prod.append(p) 1622 return unused_prod 1623 1624 # ----------------------------------------------------------------------------- 1625 # unused_precedence() 1626 # 1627 # Returns a list of tuples (term,precedence) corresponding to precedence 1628 # rules that were never used by the grammar. term is the name of the terminal 1629 # on which precedence was applied and precedence is a string such as 'left' or 1630 # 'right' corresponding to the type of precedence. 1631 # ----------------------------------------------------------------------------- 1632 1633 def unused_precedence(self): 1634 unused = [] 1635 for termname in self.Precedence: 1636 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1637 unused.append((termname,self.Precedence[termname][0])) 1638 1639 return unused 1640 1641 # ------------------------------------------------------------------------- 1642 # _first() 1643 # 1644 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1645 # 1646 # During execution of compute_first1, the result may be incomplete. 1647 # Afterward (e.g., when called from compute_follow()), it will be complete. 1648 # ------------------------------------------------------------------------- 1649 def _first(self,beta): 1650 1651 # We are computing First(x1,x2,x3,...,xn) 1652 result = [ ] 1653 for x in beta: 1654 x_produces_empty = 0 1655 1656 # Add all the non-<empty> symbols of First[x] to the result. 1657 for f in self.First[x]: 1658 if f == '<empty>': 1659 x_produces_empty = 1 1660 else: 1661 if f not in result: result.append(f) 1662 1663 if x_produces_empty: 1664 # We have to consider the next x in beta, 1665 # i.e. stay in the loop. 1666 pass 1667 else: 1668 # We don't have to consider any further symbols in beta. 1669 break 1670 else: 1671 # There was no 'break' from the loop, 1672 # so x_produces_empty was true for all x in beta, 1673 # so beta produces empty as well. 1674 result.append('<empty>') 1675 1676 return result 1677 1678 # ------------------------------------------------------------------------- 1679 # compute_first() 1680 # 1681 # Compute the value of FIRST1(X) for all symbols 1682 # ------------------------------------------------------------------------- 1683 def compute_first(self): 1684 if self.First: 1685 return self.First 1686 1687 # Terminals: 1688 for t in self.Terminals: 1689 self.First[t] = [t] 1690 1691 self.First['$end'] = ['$end'] 1692 1693 # Nonterminals: 1694 1695 # Initialize to the empty set: 1696 for n in self.Nonterminals: 1697 self.First[n] = [] 1698 1699 # Then propagate symbols until no change: 1700 while 1: 1701 some_change = 0 1702 for n in self.Nonterminals: 1703 for p in self.Prodnames[n]: 1704 for f in self._first(p.prod): 1705 if f not in self.First[n]: 1706 self.First[n].append( f ) 1707 some_change = 1 1708 if not some_change: 1709 break 1710 1711 return self.First 1712 1713 # --------------------------------------------------------------------- 1714 # compute_follow() 1715 # 1716 # Computes all of the follow sets for every non-terminal symbol. The 1717 # follow set is the set of all symbols that might follow a given 1718 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1719 # --------------------------------------------------------------------- 1720 def compute_follow(self,start=None): 1721 # If already computed, return the result 1722 if self.Follow: 1723 return self.Follow 1724 1725 # If first sets not computed yet, do that first. 1726 if not self.First: 1727 self.compute_first() 1728 1729 # Add '$end' to the follow list of the start symbol 1730 for k in self.Nonterminals: 1731 self.Follow[k] = [ ] 1732 1733 if not start: 1734 start = self.Productions[1].name 1735 1736 self.Follow[start] = [ '$end' ] 1737 1738 while 1: 1739 didadd = 0 1740 for p in self.Productions[1:]: 1741 # Here is the production set 1742 for i in range(len(p.prod)): 1743 B = p.prod[i] 1744 if B in self.Nonterminals: 1745 # Okay. We got a non-terminal in a production 1746 fst = self._first(p.prod[i+1:]) 1747 hasempty = 0 1748 for f in fst: 1749 if f != '<empty>' and f not in self.Follow[B]: 1750 self.Follow[B].append(f) 1751 didadd = 1 1752 if f == '<empty>': 1753 hasempty = 1 1754 if hasempty or i == (len(p.prod)-1): 1755 # Add elements of follow(a) to follow(b) 1756 for f in self.Follow[p.name]: 1757 if f not in self.Follow[B]: 1758 self.Follow[B].append(f) 1759 didadd = 1 1760 if not didadd: break 1761 return self.Follow 1762 1763 1764 # ----------------------------------------------------------------------------- 1765 # build_lritems() 1766 # 1767 # This function walks the list of productions and builds a complete set of the 1768 # LR items. The LR items are stored in two ways: First, they are uniquely 1769 # numbered and placed in the list _lritems. Second, a linked list of LR items 1770 # is built for each production. For example: 1771 # 1772 # E -> E PLUS E 1773 # 1774 # Creates the list 1775 # 1776 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1777 # ----------------------------------------------------------------------------- 1778 1779 def build_lritems(self): 1780 for p in self.Productions: 1781 lastlri = p 1782 i = 0 1783 lr_items = [] 1784 while 1: 1785 if i > len(p): 1786 lri = None 1787 else: 1788 lri = LRItem(p,i) 1789 # Precompute the list of productions immediately following 1790 try: 1791 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1792 except (IndexError,KeyError): 1793 lri.lr_after = [] 1794 try: 1795 lri.lr_before = lri.prod[i-1] 1796 except IndexError: 1797 lri.lr_before = None 1798 1799 lastlri.lr_next = lri 1800 if not lri: break 1801 lr_items.append(lri) 1802 lastlri = lri 1803 i += 1 1804 p.lr_items = lr_items 1805 1806# ----------------------------------------------------------------------------- 1807# == Class LRTable == 1808# 1809# This basic class represents a basic table of LR parsing information. 1810# Methods for generating the tables are not defined here. They are defined 1811# in the derived class LRGeneratedTable. 1812# ----------------------------------------------------------------------------- 1813 1814class VersionError(YaccError): pass 1815 1816class LRTable(object): 1817 def __init__(self): 1818 self.lr_action = None 1819 self.lr_goto = None 1820 self.lr_productions = None 1821 self.lr_method = None 1822 1823 def read_table(self,module): 1824 if isinstance(module,types.ModuleType): 1825 parsetab = module 1826 else: 1827 if sys.version_info[0] < 3: 1828 exec("import %s as parsetab" % module) 1829 else: 1830 env = { } 1831 exec("import %s as parsetab" % module, env, env) 1832 parsetab = env['parsetab'] 1833 1834 if parsetab._tabversion != __tabversion__: 1835 raise VersionError("yacc table file version is out of date") 1836 1837 self.lr_action = parsetab._lr_action 1838 self.lr_goto = parsetab._lr_goto 1839 1840 self.lr_productions = [] 1841 for p in parsetab._lr_productions: 1842 self.lr_productions.append(MiniProduction(*p)) 1843 1844 self.lr_method = parsetab._lr_method 1845 return parsetab._lr_signature 1846 1847 def read_pickle(self,filename): 1848 try: 1849 import cPickle as pickle 1850 except ImportError: 1851 import pickle 1852 1853 in_f = open(filename,"rb") 1854 1855 tabversion = pickle.load(in_f) 1856 if tabversion != __tabversion__: 1857 raise VersionError("yacc table file version is out of date") 1858 self.lr_method = pickle.load(in_f) 1859 signature = pickle.load(in_f) 1860 self.lr_action = pickle.load(in_f) 1861 self.lr_goto = pickle.load(in_f) 1862 productions = pickle.load(in_f) 1863 1864 self.lr_productions = [] 1865 for p in productions: 1866 self.lr_productions.append(MiniProduction(*p)) 1867 1868 in_f.close() 1869 return signature 1870 1871 # Bind all production function names to callable objects in pdict 1872 def bind_callables(self,pdict): 1873 for p in self.lr_productions: 1874 p.bind(pdict) 1875 1876# ----------------------------------------------------------------------------- 1877# === LR Generator === 1878# 1879# The following classes and functions are used to generate LR parsing tables on 1880# a grammar. 1881# ----------------------------------------------------------------------------- 1882 1883# ----------------------------------------------------------------------------- 1884# digraph() 1885# traverse() 1886# 1887# The following two functions are used to compute set valued functions 1888# of the form: 1889# 1890# F(x) = F'(x) U U{F(y) | x R y} 1891# 1892# This is used to compute the values of Read() sets as well as FOLLOW sets 1893# in LALR(1) generation. 1894# 1895# Inputs: X - An input set 1896# R - A relation 1897# FP - Set-valued function 1898# ------------------------------------------------------------------------------ 1899 1900def digraph(X,R,FP): 1901 N = { } 1902 for x in X: 1903 N[x] = 0 1904 stack = [] 1905 F = { } 1906 for x in X: 1907 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1908 return F 1909 1910def traverse(x,N,stack,F,X,R,FP): 1911 stack.append(x) 1912 d = len(stack) 1913 N[x] = d 1914 F[x] = FP(x) # F(X) <- F'(x) 1915 1916 rel = R(x) # Get y's related to x 1917 for y in rel: 1918 if N[y] == 0: 1919 traverse(y,N,stack,F,X,R,FP) 1920 N[x] = min(N[x],N[y]) 1921 for a in F.get(y,[]): 1922 if a not in F[x]: F[x].append(a) 1923 if N[x] == d: 1924 N[stack[-1]] = MAXINT 1925 F[stack[-1]] = F[x] 1926 element = stack.pop() 1927 while element != x: 1928 N[stack[-1]] = MAXINT 1929 F[stack[-1]] = F[x] 1930 element = stack.pop() 1931 1932class LALRError(YaccError): pass 1933 1934# ----------------------------------------------------------------------------- 1935# == LRGeneratedTable == 1936# 1937# This class implements the LR table generation algorithm. There are no 1938# public methods except for write() 1939# ----------------------------------------------------------------------------- 1940 1941class LRGeneratedTable(LRTable): 1942 def __init__(self,grammar,method='LALR',log=None): 1943 if method not in ['SLR','LALR']: 1944 raise LALRError("Unsupported method %s" % method) 1945 1946 self.grammar = grammar 1947 self.lr_method = method 1948 1949 # Set up the logger 1950 if not log: 1951 log = NullLogger() 1952 self.log = log 1953 1954 # Internal attributes 1955 self.lr_action = {} # Action table 1956 self.lr_goto = {} # Goto table 1957 self.lr_productions = grammar.Productions # Copy of grammar Production array 1958 self.lr_goto_cache = {} # Cache of computed gotos 1959 self.lr0_cidhash = {} # Cache of closures 1960 1961 self._add_count = 0 # Internal counter used to detect cycles 1962 1963 # Diagonistic information filled in by the table generator 1964 self.sr_conflict = 0 1965 self.rr_conflict = 0 1966 self.conflicts = [] # List of conflicts 1967 1968 self.sr_conflicts = [] 1969 self.rr_conflicts = [] 1970 1971 # Build the tables 1972 self.grammar.build_lritems() 1973 self.grammar.compute_first() 1974 self.grammar.compute_follow() 1975 self.lr_parse_table() 1976 1977 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1978 1979 def lr0_closure(self,I): 1980 self._add_count += 1 1981 1982 # Add everything in I to J 1983 J = I[:] 1984 didadd = 1 1985 while didadd: 1986 didadd = 0 1987 for j in J: 1988 for x in j.lr_after: 1989 if getattr(x,"lr0_added",0) == self._add_count: continue 1990 # Add B --> .G to J 1991 J.append(x.lr_next) 1992 x.lr0_added = self._add_count 1993 didadd = 1 1994 1995 return J 1996 1997 # Compute the LR(0) goto function goto(I,X) where I is a set 1998 # of LR(0) items and X is a grammar symbol. This function is written 1999 # in a way that guarantees uniqueness of the generated goto sets 2000 # (i.e. the same goto set will never be returned as two different Python 2001 # objects). With uniqueness, we can later do fast set comparisons using 2002 # id(obj) instead of element-wise comparison. 2003 2004 def lr0_goto(self,I,x): 2005 # First we look for a previously cached entry 2006 g = self.lr_goto_cache.get((id(I),x),None) 2007 if g: return g 2008 2009 # Now we generate the goto set in a way that guarantees uniqueness 2010 # of the result 2011 2012 s = self.lr_goto_cache.get(x,None) 2013 if not s: 2014 s = { } 2015 self.lr_goto_cache[x] = s 2016 2017 gs = [ ] 2018 for p in I: 2019 n = p.lr_next 2020 if n and n.lr_before == x: 2021 s1 = s.get(id(n),None) 2022 if not s1: 2023 s1 = { } 2024 s[id(n)] = s1 2025 gs.append(n) 2026 s = s1 2027 g = s.get('$end',None) 2028 if not g: 2029 if gs: 2030 g = self.lr0_closure(gs) 2031 s['$end'] = g 2032 else: 2033 s['$end'] = gs 2034 self.lr_goto_cache[(id(I),x)] = g 2035 return g 2036 2037 # Compute the LR(0) sets of item function 2038 def lr0_items(self): 2039 2040 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2041 i = 0 2042 for I in C: 2043 self.lr0_cidhash[id(I)] = i 2044 i += 1 2045 2046 # Loop over the items in C and each grammar symbols 2047 i = 0 2048 while i < len(C): 2049 I = C[i] 2050 i += 1 2051 2052 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2053 asyms = { } 2054 for ii in I: 2055 for s in ii.usyms: 2056 asyms[s] = None 2057 2058 for x in asyms: 2059 g = self.lr0_goto(I,x) 2060 if not g: continue 2061 if id(g) in self.lr0_cidhash: continue 2062 self.lr0_cidhash[id(g)] = len(C) 2063 C.append(g) 2064 2065 return C 2066 2067 # ----------------------------------------------------------------------------- 2068 # ==== LALR(1) Parsing ==== 2069 # 2070 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2071 # relying upon Follow() sets when performing reductions, a more selective 2072 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2073 # Thus, we mainly just have to focus on calculating the lookahead sets. 2074 # 2075 # The method used here is due to DeRemer and Pennelo (1982). 2076 # 2077 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2078 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2079 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2080 # 2081 # Further details can also be found in: 2082 # 2083 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2084 # McGraw-Hill Book Company, (1985). 2085 # 2086 # ----------------------------------------------------------------------------- 2087 2088 # ----------------------------------------------------------------------------- 2089 # compute_nullable_nonterminals() 2090 # 2091 # Creates a dictionary containing all of the non-terminals that might produce 2092 # an empty production. 2093 # ----------------------------------------------------------------------------- 2094 2095 def compute_nullable_nonterminals(self): 2096 nullable = {} 2097 num_nullable = 0 2098 while 1: 2099 for p in self.grammar.Productions[1:]: 2100 if p.len == 0: 2101 nullable[p.name] = 1 2102 continue 2103 for t in p.prod: 2104 if not t in nullable: break 2105 else: 2106 nullable[p.name] = 1 2107 if len(nullable) == num_nullable: break 2108 num_nullable = len(nullable) 2109 return nullable 2110 2111 # ----------------------------------------------------------------------------- 2112 # find_nonterminal_trans(C) 2113 # 2114 # Given a set of LR(0) items, this functions finds all of the non-terminal 2115 # transitions. These are transitions in which a dot appears immediately before 2116 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2117 # is the state number and N is the nonterminal symbol. 2118 # 2119 # The input C is the set of LR(0) items. 2120 # ----------------------------------------------------------------------------- 2121 2122 def find_nonterminal_transitions(self,C): 2123 trans = [] 2124 for state in range(len(C)): 2125 for p in C[state]: 2126 if p.lr_index < p.len - 1: 2127 t = (state,p.prod[p.lr_index+1]) 2128 if t[1] in self.grammar.Nonterminals: 2129 if t not in trans: trans.append(t) 2130 state = state + 1 2131 return trans 2132 2133 # ----------------------------------------------------------------------------- 2134 # dr_relation() 2135 # 2136 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2137 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2138 # 2139 # Returns a list of terminals. 2140 # ----------------------------------------------------------------------------- 2141 2142 def dr_relation(self,C,trans,nullable): 2143 dr_set = { } 2144 state,N = trans 2145 terms = [] 2146 2147 g = self.lr0_goto(C[state],N) 2148 for p in g: 2149 if p.lr_index < p.len - 1: 2150 a = p.prod[p.lr_index+1] 2151 if a in self.grammar.Terminals: 2152 if a not in terms: terms.append(a) 2153 2154 # This extra bit is to handle the start state 2155 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2156 terms.append('$end') 2157 2158 return terms 2159 2160 # ----------------------------------------------------------------------------- 2161 # reads_relation() 2162 # 2163 # Computes the READS() relation (p,A) READS (t,C). 2164 # ----------------------------------------------------------------------------- 2165 2166 def reads_relation(self,C, trans, empty): 2167 # Look for empty transitions 2168 rel = [] 2169 state, N = trans 2170 2171 g = self.lr0_goto(C[state],N) 2172 j = self.lr0_cidhash.get(id(g),-1) 2173 for p in g: 2174 if p.lr_index < p.len - 1: 2175 a = p.prod[p.lr_index + 1] 2176 if a in empty: 2177 rel.append((j,a)) 2178 2179 return rel 2180 2181 # ----------------------------------------------------------------------------- 2182 # compute_lookback_includes() 2183 # 2184 # Determines the lookback and includes relations 2185 # 2186 # LOOKBACK: 2187 # 2188 # This relation is determined by running the LR(0) state machine forward. 2189 # For example, starting with a production "N : . A B C", we run it forward 2190 # to obtain "N : A B C ." We then build a relationship between this final 2191 # state and the starting state. These relationships are stored in a dictionary 2192 # lookdict. 2193 # 2194 # INCLUDES: 2195 # 2196 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2197 # 2198 # This relation is used to determine non-terminal transitions that occur 2199 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2200 # if the following holds: 2201 # 2202 # B -> LAT, where T -> epsilon and p' -L-> p 2203 # 2204 # L is essentially a prefix (which may be empty), T is a suffix that must be 2205 # able to derive an empty string. State p' must lead to state p with the string L. 2206 # 2207 # ----------------------------------------------------------------------------- 2208 2209 def compute_lookback_includes(self,C,trans,nullable): 2210 2211 lookdict = {} # Dictionary of lookback relations 2212 includedict = {} # Dictionary of include relations 2213 2214 # Make a dictionary of non-terminal transitions 2215 dtrans = {} 2216 for t in trans: 2217 dtrans[t] = 1 2218 2219 # Loop over all transitions and compute lookbacks and includes 2220 for state,N in trans: 2221 lookb = [] 2222 includes = [] 2223 for p in C[state]: 2224 if p.name != N: continue 2225 2226 # Okay, we have a name match. We now follow the production all the way 2227 # through the state machine until we get the . on the right hand side 2228 2229 lr_index = p.lr_index 2230 j = state 2231 while lr_index < p.len - 1: 2232 lr_index = lr_index + 1 2233 t = p.prod[lr_index] 2234 2235 # Check to see if this symbol and state are a non-terminal transition 2236 if (j,t) in dtrans: 2237 # Yes. Okay, there is some chance that this is an includes relation 2238 # the only way to know for certain is whether the rest of the 2239 # production derives empty 2240 2241 li = lr_index + 1 2242 while li < p.len: 2243 if p.prod[li] in self.grammar.Terminals: break # No forget it 2244 if not p.prod[li] in nullable: break 2245 li = li + 1 2246 else: 2247 # Appears to be a relation between (j,t) and (state,N) 2248 includes.append((j,t)) 2249 2250 g = self.lr0_goto(C[j],t) # Go to next set 2251 j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2252 2253 # When we get here, j is the final state, now we have to locate the production 2254 for r in C[j]: 2255 if r.name != p.name: continue 2256 if r.len != p.len: continue 2257 i = 0 2258 # This look is comparing a production ". A B C" with "A B C ." 2259 while i < r.lr_index: 2260 if r.prod[i] != p.prod[i+1]: break 2261 i = i + 1 2262 else: 2263 lookb.append((j,r)) 2264 for i in includes: 2265 if not i in includedict: includedict[i] = [] 2266 includedict[i].append((state,N)) 2267 lookdict[(state,N)] = lookb 2268 2269 return lookdict,includedict 2270 2271 # ----------------------------------------------------------------------------- 2272 # compute_read_sets() 2273 # 2274 # Given a set of LR(0) items, this function computes the read sets. 2275 # 2276 # Inputs: C = Set of LR(0) items 2277 # ntrans = Set of nonterminal transitions 2278 # nullable = Set of empty transitions 2279 # 2280 # Returns a set containing the read sets 2281 # ----------------------------------------------------------------------------- 2282 2283 def compute_read_sets(self,C, ntrans, nullable): 2284 FP = lambda x: self.dr_relation(C,x,nullable) 2285 R = lambda x: self.reads_relation(C,x,nullable) 2286 F = digraph(ntrans,R,FP) 2287 return F 2288 2289 # ----------------------------------------------------------------------------- 2290 # compute_follow_sets() 2291 # 2292 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2293 # and an include set, this function computes the follow sets 2294 # 2295 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2296 # 2297 # Inputs: 2298 # ntrans = Set of nonterminal transitions 2299 # readsets = Readset (previously computed) 2300 # inclsets = Include sets (previously computed) 2301 # 2302 # Returns a set containing the follow sets 2303 # ----------------------------------------------------------------------------- 2304 2305 def compute_follow_sets(self,ntrans,readsets,inclsets): 2306 FP = lambda x: readsets[x] 2307 R = lambda x: inclsets.get(x,[]) 2308 F = digraph(ntrans,R,FP) 2309 return F 2310 2311 # ----------------------------------------------------------------------------- 2312 # add_lookaheads() 2313 # 2314 # Attaches the lookahead symbols to grammar rules. 2315 # 2316 # Inputs: lookbacks - Set of lookback relations 2317 # followset - Computed follow set 2318 # 2319 # This function directly attaches the lookaheads to productions contained 2320 # in the lookbacks set 2321 # ----------------------------------------------------------------------------- 2322 2323 def add_lookaheads(self,lookbacks,followset): 2324 for trans,lb in lookbacks.items(): 2325 # Loop over productions in lookback 2326 for state,p in lb: 2327 if not state in p.lookaheads: 2328 p.lookaheads[state] = [] 2329 f = followset.get(trans,[]) 2330 for a in f: 2331 if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 2332 2333 # ----------------------------------------------------------------------------- 2334 # add_lalr_lookaheads() 2335 # 2336 # This function does all of the work of adding lookahead information for use 2337 # with LALR parsing 2338 # ----------------------------------------------------------------------------- 2339 2340 def add_lalr_lookaheads(self,C): 2341 # Determine all of the nullable nonterminals 2342 nullable = self.compute_nullable_nonterminals() 2343 2344 # Find all non-terminal transitions 2345 trans = self.find_nonterminal_transitions(C) 2346 2347 # Compute read sets 2348 readsets = self.compute_read_sets(C,trans,nullable) 2349 2350 # Compute lookback/includes relations 2351 lookd, included = self.compute_lookback_includes(C,trans,nullable) 2352 2353 # Compute LALR FOLLOW sets 2354 followsets = self.compute_follow_sets(trans,readsets,included) 2355 2356 # Add all of the lookaheads 2357 self.add_lookaheads(lookd,followsets) 2358 2359 # ----------------------------------------------------------------------------- 2360 # lr_parse_table() 2361 # 2362 # This function constructs the parse tables for SLR or LALR 2363 # ----------------------------------------------------------------------------- 2364 def lr_parse_table(self): 2365 Productions = self.grammar.Productions 2366 Precedence = self.grammar.Precedence 2367 goto = self.lr_goto # Goto array 2368 action = self.lr_action # Action array 2369 log = self.log # Logger for output 2370 2371 actionp = { } # Action production array (temporary) 2372 2373 log.info("Parsing method: %s", self.lr_method) 2374 2375 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2376 # This determines the number of states 2377 2378 C = self.lr0_items() 2379 2380 if self.lr_method == 'LALR': 2381 self.add_lalr_lookaheads(C) 2382 2383 # Build the parser table, state by state 2384 st = 0 2385 for I in C: 2386 # Loop over each production in I 2387 actlist = [ ] # List of actions 2388 st_action = { } 2389 st_actionp = { } 2390 st_goto = { } 2391 log.info("") 2392 log.info("state %d", st) 2393 log.info("") 2394 for p in I: 2395 log.info(" (%d) %s", p.number, str(p)) 2396 log.info("") 2397 2398 for p in I: 2399 if p.len == p.lr_index + 1: 2400 if p.name == "S'": 2401 # Start symbol. Accept! 2402 st_action["$end"] = 0 2403 st_actionp["$end"] = p 2404 else: 2405 # We are at the end of a production. Reduce! 2406 if self.lr_method == 'LALR': 2407 laheads = p.lookaheads[st] 2408 else: 2409 laheads = self.grammar.Follow[p.name] 2410 for a in laheads: 2411 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2412 r = st_action.get(a,None) 2413 if r is not None: 2414 # Whoa. Have a shift/reduce or reduce/reduce conflict 2415 if r > 0: 2416 # Need to decide on shift or reduce here 2417 # By default we favor shifting. Need to add 2418 # some precedence rules here. 2419 sprec,slevel = Productions[st_actionp[a].number].prec 2420 rprec,rlevel = Precedence.get(a,('right',0)) 2421 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2422 # We really need to reduce here. 2423 st_action[a] = -p.number 2424 st_actionp[a] = p 2425 if not slevel and not rlevel: 2426 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2427 self.sr_conflicts.append((st,a,'reduce')) 2428 Productions[p.number].reduced += 1 2429 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2430 st_action[a] = None 2431 else: 2432 # Hmmm. Guess we'll keep the shift 2433 if not rlevel: 2434 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2435 self.sr_conflicts.append((st,a,'shift')) 2436 elif r < 0: 2437 # Reduce/reduce conflict. In this case, we favor the rule 2438 # that was defined first in the grammar file 2439 oldp = Productions[-r] 2440 pp = Productions[p.number] 2441 if oldp.line > pp.line: 2442 st_action[a] = -p.number 2443 st_actionp[a] = p 2444 chosenp,rejectp = pp,oldp 2445 Productions[p.number].reduced += 1 2446 Productions[oldp.number].reduced -= 1 2447 else: 2448 chosenp,rejectp = oldp,pp 2449 self.rr_conflicts.append((st,chosenp,rejectp)) 2450 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2451 else: 2452 raise LALRError("Unknown conflict in state %d" % st) 2453 else: 2454 st_action[a] = -p.number 2455 st_actionp[a] = p 2456 Productions[p.number].reduced += 1 2457 else: 2458 i = p.lr_index 2459 a = p.prod[i+1] # Get symbol right after the "." 2460 if a in self.grammar.Terminals: 2461 g = self.lr0_goto(I,a) 2462 j = self.lr0_cidhash.get(id(g),-1) 2463 if j >= 0: 2464 # We are in a shift state 2465 actlist.append((a,p,"shift and go to state %d" % j)) 2466 r = st_action.get(a,None) 2467 if r is not None: 2468 # Whoa have a shift/reduce or shift/shift conflict 2469 if r > 0: 2470 if r != j: 2471 raise LALRError("Shift/shift conflict in state %d" % st) 2472 elif r < 0: 2473 # Do a precedence check. 2474 # - if precedence of reduce rule is higher, we reduce. 2475 # - if precedence of reduce is same and left assoc, we reduce. 2476 # - otherwise we shift 2477 rprec,rlevel = Productions[st_actionp[a].number].prec 2478 sprec,slevel = Precedence.get(a,('right',0)) 2479 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2480 # We decide to shift here... highest precedence to shift 2481 Productions[st_actionp[a].number].reduced -= 1 2482 st_action[a] = j 2483 st_actionp[a] = p 2484 if not rlevel: 2485 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2486 self.sr_conflicts.append((st,a,'shift')) 2487 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2488 st_action[a] = None 2489 else: 2490 # Hmmm. Guess we'll keep the reduce 2491 if not slevel and not rlevel: 2492 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2493 self.sr_conflicts.append((st,a,'reduce')) 2494 2495 else: 2496 raise LALRError("Unknown conflict in state %d" % st) 2497 else: 2498 st_action[a] = j 2499 st_actionp[a] = p 2500 2501 # Print the actions associated with each terminal 2502 _actprint = { } 2503 for a,p,m in actlist: 2504 if a in st_action: 2505 if p is st_actionp[a]: 2506 log.info(" %-15s %s",a,m) 2507 _actprint[(a,m)] = 1 2508 log.info("") 2509 # Print the actions that were not used. (debugging) 2510 not_used = 0 2511 for a,p,m in actlist: 2512 if a in st_action: 2513 if p is not st_actionp[a]: 2514 if not (a,m) in _actprint: 2515 log.debug(" ! %-15s [ %s ]",a,m) 2516 not_used = 1 2517 _actprint[(a,m)] = 1 2518 if not_used: 2519 log.debug("") 2520 2521 # Construct the goto table for this state 2522 2523 nkeys = { } 2524 for ii in I: 2525 for s in ii.usyms: 2526 if s in self.grammar.Nonterminals: 2527 nkeys[s] = None 2528 for n in nkeys: 2529 g = self.lr0_goto(I,n) 2530 j = self.lr0_cidhash.get(id(g),-1) 2531 if j >= 0: 2532 st_goto[n] = j 2533 log.info(" %-30s shift and go to state %d",n,j) 2534 2535 action[st] = st_action 2536 actionp[st] = st_actionp 2537 goto[st] = st_goto 2538 st += 1 2539 2540 2541 # ----------------------------------------------------------------------------- 2542 # write() 2543 # 2544 # This function writes the LR parsing tables to a file 2545 # ----------------------------------------------------------------------------- 2546 2547 def write_table(self,modulename,outputdir='',signature=""): 2548 basemodulename = modulename.split(".")[-1] 2549 filename = os.path.join(outputdir,basemodulename) + ".py" 2550 try: 2551 f = open(filename,"w") 2552 2553 f.write(""" 2554# %s 2555# This file is automatically generated. Do not edit. 2556_tabversion = %r 2557 2558_lr_method = %r 2559 2560_lr_signature = %r 2561 """ % (filename, __tabversion__, self.lr_method, signature)) 2562 2563 # Change smaller to 0 to go back to original tables 2564 smaller = 1 2565 2566 # Factor out names to try and make smaller 2567 if smaller: 2568 items = { } 2569 2570 for s,nd in self.lr_action.items(): 2571 for name,v in nd.items(): 2572 i = items.get(name) 2573 if not i: 2574 i = ([],[]) 2575 items[name] = i 2576 i[0].append(s) 2577 i[1].append(v) 2578 2579 f.write("\n_lr_action_items = {") 2580 for k,v in items.items(): 2581 f.write("%r:([" % k) 2582 for i in v[0]: 2583 f.write("%r," % i) 2584 f.write("],[") 2585 for i in v[1]: 2586 f.write("%r," % i) 2587 2588 f.write("]),") 2589 f.write("}\n") 2590 2591 f.write(""" 2592_lr_action = { } 2593for _k, _v in _lr_action_items.items(): 2594 for _x,_y in zip(_v[0],_v[1]): 2595 if not _x in _lr_action: _lr_action[_x] = { } 2596 _lr_action[_x][_k] = _y 2597del _lr_action_items 2598""") 2599 2600 else: 2601 f.write("\n_lr_action = { "); 2602 for k,v in self.lr_action.items(): 2603 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2604 f.write("}\n"); 2605 2606 if smaller: 2607 # Factor out names to try and make smaller 2608 items = { } 2609 2610 for s,nd in self.lr_goto.items(): 2611 for name,v in nd.items(): 2612 i = items.get(name) 2613 if not i: 2614 i = ([],[]) 2615 items[name] = i 2616 i[0].append(s) 2617 i[1].append(v) 2618 2619 f.write("\n_lr_goto_items = {") 2620 for k,v in items.items(): 2621 f.write("%r:([" % k) 2622 for i in v[0]: 2623 f.write("%r," % i) 2624 f.write("],[") 2625 for i in v[1]: 2626 f.write("%r," % i) 2627 2628 f.write("]),") 2629 f.write("}\n") 2630 2631 f.write(""" 2632_lr_goto = { } 2633for _k, _v in _lr_goto_items.items(): 2634 for _x,_y in zip(_v[0],_v[1]): 2635 if not _x in _lr_goto: _lr_goto[_x] = { } 2636 _lr_goto[_x][_k] = _y 2637del _lr_goto_items 2638""") 2639 else: 2640 f.write("\n_lr_goto = { "); 2641 for k,v in self.lr_goto.items(): 2642 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2643 f.write("}\n"); 2644 2645 # Write production table 2646 f.write("_lr_productions = [\n") 2647 for p in self.lr_productions: 2648 if p.func: 2649 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2650 else: 2651 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2652 f.write("]\n") 2653 f.close() 2654 2655 except IOError: 2656 e = sys.exc_info()[1] 2657 sys.stderr.write("Unable to create '%s'\n" % filename) 2658 sys.stderr.write(str(e)+"\n") 2659 return 2660 2661 2662 # ----------------------------------------------------------------------------- 2663 # pickle_table() 2664 # 2665 # This function pickles the LR parsing tables to a supplied file object 2666 # ----------------------------------------------------------------------------- 2667 2668 def pickle_table(self,filename,signature=""): 2669 try: 2670 import cPickle as pickle 2671 except ImportError: 2672 import pickle 2673 outf = open(filename,"wb") 2674 pickle.dump(__tabversion__,outf,pickle_protocol) 2675 pickle.dump(self.lr_method,outf,pickle_protocol) 2676 pickle.dump(signature,outf,pickle_protocol) 2677 pickle.dump(self.lr_action,outf,pickle_protocol) 2678 pickle.dump(self.lr_goto,outf,pickle_protocol) 2679 2680 outp = [] 2681 for p in self.lr_productions: 2682 if p.func: 2683 outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 2684 else: 2685 outp.append((str(p),p.name,p.len,None,None,None)) 2686 pickle.dump(outp,outf,pickle_protocol) 2687 outf.close() 2688 2689# ----------------------------------------------------------------------------- 2690# === INTROSPECTION === 2691# 2692# The following functions and classes are used to implement the PLY 2693# introspection features followed by the yacc() function itself. 2694# ----------------------------------------------------------------------------- 2695 2696# ----------------------------------------------------------------------------- 2697# get_caller_module_dict() 2698# 2699# This function returns a dictionary containing all of the symbols defined within 2700# a caller further down the call stack. This is used to get the environment 2701# associated with the yacc() call if none was provided. 2702# ----------------------------------------------------------------------------- 2703 2704def get_caller_module_dict(levels): 2705 try: 2706 raise RuntimeError 2707 except RuntimeError: 2708 e,b,t = sys.exc_info() 2709 f = t.tb_frame 2710 while levels > 0: 2711 f = f.f_back 2712 levels -= 1 2713 ldict = f.f_globals.copy() 2714 if f.f_globals != f.f_locals: 2715 ldict.update(f.f_locals) 2716 2717 return ldict 2718 2719# ----------------------------------------------------------------------------- 2720# parse_grammar() 2721# 2722# This takes a raw grammar rule string and parses it into production data 2723# ----------------------------------------------------------------------------- 2724def parse_grammar(doc,file,line): 2725 grammar = [] 2726 # Split the doc string into lines 2727 pstrings = doc.splitlines() 2728 lastp = None 2729 dline = line 2730 for ps in pstrings: 2731 dline += 1 2732 p = ps.split() 2733 if not p: continue 2734 try: 2735 if p[0] == '|': 2736 # This is a continuation of a previous rule 2737 if not lastp: 2738 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2739 prodname = lastp 2740 syms = p[1:] 2741 else: 2742 prodname = p[0] 2743 lastp = prodname 2744 syms = p[2:] 2745 assign = p[1] 2746 if assign != ':' and assign != '::=': 2747 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2748 2749 grammar.append((file,dline,prodname,syms)) 2750 except SyntaxError: 2751 raise 2752 except Exception: 2753 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2754 2755 return grammar 2756 2757# ----------------------------------------------------------------------------- 2758# ParserReflect() 2759# 2760# This class represents information extracted for building a parser including 2761# start symbol, error function, tokens, precedence list, action functions, 2762# etc. 2763# ----------------------------------------------------------------------------- 2764class ParserReflect(object): 2765 def __init__(self,pdict,log=None): 2766 self.pdict = pdict 2767 self.start = None 2768 self.error_func = None 2769 self.tokens = None 2770 self.files = {} 2771 self.grammar = [] 2772 self.error = 0 2773 2774 if log is None: 2775 self.log = PlyLogger(sys.stderr) 2776 else: 2777 self.log = log 2778 2779 # Get all of the basic information 2780 def get_all(self): 2781 self.get_start() 2782 self.get_error_func() 2783 self.get_tokens() 2784 self.get_precedence() 2785 self.get_pfunctions() 2786 2787 # Validate all of the information 2788 def validate_all(self): 2789 self.validate_start() 2790 self.validate_error_func() 2791 self.validate_tokens() 2792 self.validate_precedence() 2793 self.validate_pfunctions() 2794 self.validate_files() 2795 return self.error 2796 2797 # Compute a signature over the grammar 2798 def signature(self): 2799 try: 2800 import hashlib 2801 except ImportError: 2802 raise RuntimeError("Unable to import hashlib") 2803 try: 2804 sig = hashlib.new('MD5', usedforsecurity=False) 2805 except TypeError: 2806 # Some configurations don't appear to support two arguments 2807 sig = hashlib.new('MD5') 2808 try: 2809 if self.start: 2810 sig.update(self.start.encode('latin-1')) 2811 if self.prec: 2812 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 2813 if self.tokens: 2814 sig.update(" ".join(self.tokens).encode('latin-1')) 2815 for f in self.pfuncs: 2816 if f[3]: 2817 sig.update(f[3].encode('latin-1')) 2818 except (TypeError,ValueError): 2819 pass 2820 return sig.digest() 2821 2822 # ----------------------------------------------------------------------------- 2823 # validate_file() 2824 # 2825 # This method checks to see if there are duplicated p_rulename() functions 2826 # in the parser module file. Without this function, it is really easy for 2827 # users to make mistakes by cutting and pasting code fragments (and it's a real 2828 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2829 # we just do a little regular expression pattern matching of def statements 2830 # to try and detect duplicates. 2831 # ----------------------------------------------------------------------------- 2832 2833 def validate_files(self): 2834 # Match def p_funcname( 2835 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2836 2837 for filename in self.files.keys(): 2838 base,ext = os.path.splitext(filename) 2839 if ext != '.py': return 1 # No idea. Assume it's okay. 2840 2841 try: 2842 f = open(filename) 2843 lines = f.readlines() 2844 f.close() 2845 except IOError: 2846 continue 2847 2848 counthash = { } 2849 for linen,l in enumerate(lines): 2850 linen += 1 2851 m = fre.match(l) 2852 if m: 2853 name = m.group(1) 2854 prev = counthash.get(name) 2855 if not prev: 2856 counthash[name] = linen 2857 else: 2858 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 2859 2860 # Get the start symbol 2861 def get_start(self): 2862 self.start = self.pdict.get('start') 2863 2864 # Validate the start symbol 2865 def validate_start(self): 2866 if self.start is not None: 2867 if not isinstance(self.start,str): 2868 self.log.error("'start' must be a string") 2869 2870 # Look for error handler 2871 def get_error_func(self): 2872 self.error_func = self.pdict.get('p_error') 2873 2874 # Validate the error function 2875 def validate_error_func(self): 2876 if self.error_func: 2877 if isinstance(self.error_func,types.FunctionType): 2878 ismethod = 0 2879 elif isinstance(self.error_func, types.MethodType): 2880 ismethod = 1 2881 else: 2882 self.log.error("'p_error' defined, but is not a function or method") 2883 self.error = 1 2884 return 2885 2886 eline = func_code(self.error_func).co_firstlineno 2887 efile = func_code(self.error_func).co_filename 2888 self.files[efile] = 1 2889 2890 if (func_code(self.error_func).co_argcount != 1+ismethod): 2891 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2892 self.error = 1 2893 2894 # Get the tokens map 2895 def get_tokens(self): 2896 tokens = self.pdict.get("tokens",None) 2897 if not tokens: 2898 self.log.error("No token list is defined") 2899 self.error = 1 2900 return 2901 2902 if not isinstance(tokens,(list, tuple)): 2903 self.log.error("tokens must be a list or tuple") 2904 self.error = 1 2905 return 2906 2907 if not tokens: 2908 self.log.error("tokens is empty") 2909 self.error = 1 2910 return 2911 2912 self.tokens = tokens 2913 2914 # Validate the tokens 2915 def validate_tokens(self): 2916 # Validate the tokens. 2917 if 'error' in self.tokens: 2918 self.log.error("Illegal token name 'error'. Is a reserved word") 2919 self.error = 1 2920 return 2921 2922 terminals = {} 2923 for n in self.tokens: 2924 if n in terminals: 2925 self.log.warning("Token '%s' multiply defined", n) 2926 terminals[n] = 1 2927 2928 # Get the precedence map (if any) 2929 def get_precedence(self): 2930 self.prec = self.pdict.get("precedence",None) 2931 2932 # Validate and parse the precedence map 2933 def validate_precedence(self): 2934 preclist = [] 2935 if self.prec: 2936 if not isinstance(self.prec,(list,tuple)): 2937 self.log.error("precedence must be a list or tuple") 2938 self.error = 1 2939 return 2940 for level,p in enumerate(self.prec): 2941 if not isinstance(p,(list,tuple)): 2942 self.log.error("Bad precedence table") 2943 self.error = 1 2944 return 2945 2946 if len(p) < 2: 2947 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2948 self.error = 1 2949 return 2950 assoc = p[0] 2951 if not isinstance(assoc,str): 2952 self.log.error("precedence associativity must be a string") 2953 self.error = 1 2954 return 2955 for term in p[1:]: 2956 if not isinstance(term,str): 2957 self.log.error("precedence items must be strings") 2958 self.error = 1 2959 return 2960 preclist.append((term,assoc,level+1)) 2961 self.preclist = preclist 2962 2963 # Get all p_functions from the grammar 2964 def get_pfunctions(self): 2965 p_functions = [] 2966 for name, item in self.pdict.items(): 2967 if name[:2] != 'p_': continue 2968 if name == 'p_error': continue 2969 if isinstance(item,(types.FunctionType,types.MethodType)): 2970 line = func_code(item).co_firstlineno 2971 file = func_code(item).co_filename 2972 p_functions.append((line,file,name,item.__doc__)) 2973 2974 # Sort all of the actions by line number 2975 p_functions.sort() 2976 self.pfuncs = p_functions 2977 2978 2979 # Validate all of the p_functions 2980 def validate_pfunctions(self): 2981 grammar = [] 2982 # Check for non-empty symbols 2983 if len(self.pfuncs) == 0: 2984 self.log.error("no rules of the form p_rulename are defined") 2985 self.error = 1 2986 return 2987 2988 for line, file, name, doc in self.pfuncs: 2989 func = self.pdict[name] 2990 if isinstance(func, types.MethodType): 2991 reqargs = 2 2992 else: 2993 reqargs = 1 2994 if func_code(func).co_argcount > reqargs: 2995 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2996 self.error = 1 2997 elif func_code(func).co_argcount < reqargs: 2998 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2999 self.error = 1 3000 elif not func.__doc__: 3001 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 3002 else: 3003 try: 3004 parsed_g = parse_grammar(doc,file,line) 3005 for g in parsed_g: 3006 grammar.append((name, g)) 3007 except SyntaxError: 3008 e = sys.exc_info()[1] 3009 self.log.error(str(e)) 3010 self.error = 1 3011 3012 # Looks like a valid grammar rule 3013 # Mark the file in which defined. 3014 self.files[file] = 1 3015 3016 # Secondary validation step that looks for p_ definitions that are not functions 3017 # or functions that look like they might be grammar rules. 3018 3019 for n,v in self.pdict.items(): 3020 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 3021 if n[0:2] == 't_': continue 3022 if n[0:2] == 'p_' and n != 'p_error': 3023 self.log.warning("'%s' not defined as a function", n) 3024 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 3025 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 3026 try: 3027 doc = v.__doc__.split(" ") 3028 if doc[1] == ':': 3029 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 3030 func_code(v).co_filename, func_code(v).co_firstlineno,n) 3031 except Exception: 3032 pass 3033 3034 self.grammar = grammar 3035 3036# ----------------------------------------------------------------------------- 3037# yacc(module) 3038# 3039# Build a parser 3040# ----------------------------------------------------------------------------- 3041 3042def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3043 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 3044 debuglog=None, errorlog = None, picklefile=None): 3045 3046 global parse # Reference to the parsing method of the last built parser 3047 3048 # If pickling is enabled, table files are not created 3049 3050 if picklefile: 3051 write_tables = 0 3052 3053 if errorlog is None: 3054 errorlog = PlyLogger(sys.stderr) 3055 3056 # Get the module dictionary used for the parser 3057 if module: 3058 _items = [(k,getattr(module,k)) for k in dir(module)] 3059 pdict = dict(_items) 3060 else: 3061 pdict = get_caller_module_dict(2) 3062 3063 # Collect parser information from the dictionary 3064 pinfo = ParserReflect(pdict,log=errorlog) 3065 pinfo.get_all() 3066 3067 if pinfo.error: 3068 raise YaccError("Unable to build parser") 3069 3070 # Check signature against table files (if any) 3071 signature = pinfo.signature() 3072 3073 # Read the tables 3074 try: 3075 lr = LRTable() 3076 if picklefile: 3077 read_signature = lr.read_pickle(picklefile) 3078 else: 3079 read_signature = lr.read_table(tabmodule) 3080 if optimize or (read_signature == signature): 3081 try: 3082 lr.bind_callables(pinfo.pdict) 3083 parser = LRParser(lr,pinfo.error_func) 3084 parse = parser.parse 3085 return parser 3086 except Exception: 3087 e = sys.exc_info()[1] 3088 errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3089 except VersionError: 3090 e = sys.exc_info() 3091 errorlog.warning(str(e)) 3092 except Exception: 3093 pass 3094 3095 if debuglog is None: 3096 if debug: 3097 debuglog = PlyLogger(open(debugfile,"w")) 3098 else: 3099 debuglog = NullLogger() 3100 3101 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3102 3103 3104 errors = 0 3105 3106 # Validate the parser information 3107 if pinfo.validate_all(): 3108 raise YaccError("Unable to build parser") 3109 3110 if not pinfo.error_func: 3111 errorlog.warning("no p_error() function is defined") 3112 3113 # Create a grammar object 3114 grammar = Grammar(pinfo.tokens) 3115 3116 # Set precedence level for terminals 3117 for term, assoc, level in pinfo.preclist: 3118 try: 3119 grammar.set_precedence(term,assoc,level) 3120 except GrammarError: 3121 e = sys.exc_info()[1] 3122 errorlog.warning("%s",str(e)) 3123 3124 # Add productions to the grammar 3125 for funcname, gram in pinfo.grammar: 3126 file, line, prodname, syms = gram 3127 try: 3128 grammar.add_production(prodname,syms,funcname,file,line) 3129 except GrammarError: 3130 e = sys.exc_info()[1] 3131 errorlog.error("%s",str(e)) 3132 errors = 1 3133 3134 # Set the grammar start symbols 3135 try: 3136 if start is None: 3137 grammar.set_start(pinfo.start) 3138 else: 3139 grammar.set_start(start) 3140 except GrammarError: 3141 e = sys.exc_info()[1] 3142 errorlog.error(str(e)) 3143 errors = 1 3144 3145 if errors: 3146 raise YaccError("Unable to build parser") 3147 3148 # Verify the grammar structure 3149 undefined_symbols = grammar.undefined_symbols() 3150 for sym, prod in undefined_symbols: 3151 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3152 errors = 1 3153 3154 unused_terminals = grammar.unused_terminals() 3155 if unused_terminals: 3156 debuglog.info("") 3157 debuglog.info("Unused terminals:") 3158 debuglog.info("") 3159 for term in unused_terminals: 3160 errorlog.warning("Token '%s' defined, but not used", term) 3161 debuglog.info(" %s", term) 3162 3163 # Print out all productions to the debug log 3164 if debug: 3165 debuglog.info("") 3166 debuglog.info("Grammar") 3167 debuglog.info("") 3168 for n,p in enumerate(grammar.Productions): 3169 debuglog.info("Rule %-5d %s", n, p) 3170 3171 # Find unused non-terminals 3172 unused_rules = grammar.unused_rules() 3173 for prod in unused_rules: 3174 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3175 3176 if len(unused_terminals) == 1: 3177 errorlog.warning("There is 1 unused token") 3178 if len(unused_terminals) > 1: 3179 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3180 3181 if len(unused_rules) == 1: 3182 errorlog.warning("There is 1 unused rule") 3183 if len(unused_rules) > 1: 3184 errorlog.warning("There are %d unused rules", len(unused_rules)) 3185 3186 if debug: 3187 debuglog.info("") 3188 debuglog.info("Terminals, with rules where they appear") 3189 debuglog.info("") 3190 terms = list(grammar.Terminals) 3191 terms.sort() 3192 for term in terms: 3193 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3194 3195 debuglog.info("") 3196 debuglog.info("Nonterminals, with rules where they appear") 3197 debuglog.info("") 3198 nonterms = list(grammar.Nonterminals) 3199 nonterms.sort() 3200 for nonterm in nonterms: 3201 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3202 debuglog.info("") 3203 3204 if check_recursion: 3205 unreachable = grammar.find_unreachable() 3206 for u in unreachable: 3207 errorlog.warning("Symbol '%s' is unreachable",u) 3208 3209 infinite = grammar.infinite_cycles() 3210 for inf in infinite: 3211 errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3212 errors = 1 3213 3214 unused_prec = grammar.unused_precedence() 3215 for term, assoc in unused_prec: 3216 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3217 errors = 1 3218 3219 if errors: 3220 raise YaccError("Unable to build parser") 3221 3222 # Run the LRGeneratedTable on the grammar 3223 if debug: 3224 errorlog.debug("Generating %s tables", method) 3225 3226 lr = LRGeneratedTable(grammar,method,debuglog) 3227 3228 if debug: 3229 num_sr = len(lr.sr_conflicts) 3230 3231 # Report shift/reduce and reduce/reduce conflicts 3232 if num_sr == 1: 3233 errorlog.warning("1 shift/reduce conflict") 3234 elif num_sr > 1: 3235 errorlog.warning("%d shift/reduce conflicts", num_sr) 3236 3237 num_rr = len(lr.rr_conflicts) 3238 if num_rr == 1: 3239 errorlog.warning("1 reduce/reduce conflict") 3240 elif num_rr > 1: 3241 errorlog.warning("%d reduce/reduce conflicts", num_rr) 3242 3243 # Write out conflicts to the output file 3244 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3245 debuglog.warning("") 3246 debuglog.warning("Conflicts:") 3247 debuglog.warning("") 3248 3249 for state, tok, resolution in lr.sr_conflicts: 3250 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3251 3252 already_reported = {} 3253 for state, rule, rejected in lr.rr_conflicts: 3254 if (state,id(rule),id(rejected)) in already_reported: 3255 continue 3256 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3257 debuglog.warning("rejected rule (%s) in state %d", rejected,state) 3258 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3259 errorlog.warning("rejected rule (%s) in state %d", rejected, state) 3260 already_reported[state,id(rule),id(rejected)] = 1 3261 3262 warned_never = [] 3263 for state, rule, rejected in lr.rr_conflicts: 3264 if not rejected.reduced and (rejected not in warned_never): 3265 debuglog.warning("Rule (%s) is never reduced", rejected) 3266 errorlog.warning("Rule (%s) is never reduced", rejected) 3267 warned_never.append(rejected) 3268 3269 # Write the table file if requested 3270 if write_tables: 3271 lr.write_table(tabmodule,outputdir,signature) 3272 3273 # Write a pickled version of the tables 3274 if picklefile: 3275 lr.pickle_table(picklefile,signature) 3276 3277 # Build the parser 3278 lr.bind_callables(pinfo.pdict) 3279 parser = LRParser(lr,pinfo.error_func) 3280 3281 parse = parser.parse 3282 return parser 3283