Parser/spark.py

   1 #  Copyright (c) 1998-2002 John Aycock
   2 #
   3 #  Permission is hereby granted, free of charge, to any person obtaining
   4 #  a copy of this software and associated documentation files (the
   5 #  "Software"), to deal in the Software without restriction, including
   6 #  without limitation the rights to use, copy, modify, merge, publish,
   7 #  distribute, sublicense, and/or sell copies of the Software, and to
   8 #  permit persons to whom the Software is furnished to do so, subject to
   9 #  the following conditions:
  10 #
  11 #  The above copyright notice and this permission notice shall be
  12 #  included in all copies or substantial portions of the Software.
  13 #
  14 #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15 #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16 #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17 #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18 #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19 #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20 #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21
  22 __version__ = 'SPARK-0.7 (pre-alpha-5)'
  23
  24 import re
  25 import string
  26
  27 def _namelist(instance):
  28     namelist, namedict, classlist = [], {}, [instance.__class__]
  29     for c in classlist:
  30         for b in c.__bases__:
  31             classlist.append(b)
  32         for name in c.__dict__.keys():
  33             if not namedict.has_key(name):
  34                 namelist.append(name)
  35                 namedict[name] = 1
  36     return namelist
  37
  38 class GenericScanner:
  39     def __init__(self, flags=0):
  40         pattern = self.reflect()
  41         self.re = re.compile(pattern, re.VERBOSE|flags)
  42
  43         self.index2func = {}
  44         for name, number in self.re.groupindex.items():
  45             self.index2func[number-1] = getattr(self, 't_' + name)
  46
  47     def makeRE(self, name):
  48         doc = getattr(self, name).__doc__
  49         rv = '(?P<%s>%s)' % (name[2:], doc)
  50         return rv
  51
  52     def reflect(self):
  53         rv = []
  54         for name in _namelist(self):
  55             if name[:2] == 't_' and name != 't_default':
  56                 rv.append(self.makeRE(name))
  57
  58         rv.append(self.makeRE('t_default'))
  59         return string.join(rv, '|')
  60
  61     def error(self, s, pos):
  62         print "Lexical error at position %s" % pos
  63         raise SystemExit
  64
  65     def tokenize(self, s):
  66         pos = 0
  67         n = len(s)
  68         while pos < n:
  69             m = self.re.match(s, pos)
  70             if m is None:
  71                 self.error(s, pos)
  72
  73             groups = m.groups()
  74             for i in range(len(groups)):
  75                 if groups[i] and self.index2func.has_key(i):
  76                     self.index2func[i](groups[i])
  77             pos = m.end()
  78
  79     def t_default(self, s):
  80         r'( . | \n )+'
  81         print "Specification error: unmatched input"
  82         raise SystemExit
  83
  84 #
  85 #  Extracted from GenericParser and made global so that [un]picking works.
  86 #
  87 class _State:
  88     def __init__(self, stateno, items):
  89         self.T, self.complete, self.items = [], [], items
  90         self.stateno = stateno
  91
  92 class GenericParser:
  93     #
  94     #  An Earley parser, as per J. Earley, "An Efficient Context-Free
  95     #  Parsing Algorithm", CACM 13(2), pp. 94-102.  Also J. C. Earley,
  96     #  "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
  97     #  Carnegie-Mellon University, August 1968.  New formulation of
  98     #  the parser according to J. Aycock, "Practical Earley Parsing
  99     #  and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
 100     #  2001, and J. Aycock and R. N. Horspool, "Practical Earley
 101     #  Parsing", unpublished paper, 2001.
 102     #
 103
 104     def __init__(self, start):
 105         self.rules = {}
 106         self.rule2func = {}
 107         self.rule2name = {}
 108         self.collectRules()
 109         self.augment(start)
 110         self.ruleschanged = 1
 111
 112     _NULLABLE = '\e_'
 113     _START = 'START'
 114     _BOF = '|-'
 115
 116     #
 117     #  When pickling, take the time to generate the full state machine;
 118     #  some information is then extraneous, too.  Unfortunately we
 119     #  can't save the rule2func map.
 120     #
 121     def __getstate__(self):
 122         if self.ruleschanged:
 123             #
 124             #  XXX - duplicated from parse()
 125             #
 126             self.computeNull()
 127             self.newrules = {}
 128             self.new2old = {}
 129             self.makeNewRules()
 130             self.ruleschanged = 0
 131             self.edges, self.cores = {}, {}
 132             self.states = { 0: self.makeState0() }
 133             self.makeState(0, self._BOF)
 134         #
 135         #  XXX - should find a better way to do this..
 136         #
 137         changes = 1
 138         while changes:
 139             changes = 0
 140             for k, v in self.edges.items():
 141                 if v is None:
 142                     state, sym = k
 143                     if self.states.has_key(state):
 144                         self.goto(state, sym)
 145                         changes = 1
 146         rv = self.__dict__.copy()
 147         for s in self.states.values():
 148             del s.items
 149         del rv['rule2func']
 150         del rv['nullable']
 151         del rv['cores']
 152         return rv
 153
 154     def __setstate__(self, D):
 155         self.rules = {}
 156         self.rule2func = {}
 157         self.rule2name = {}
 158         self.collectRules()
 159         start = D['rules'][self._START][0][1][1]        # Blech.
 160         self.augment(start)
 161         D['rule2func'] = self.rule2func
 162         D['makeSet'] = self.makeSet_fast
 163         self.__dict__ = D
 164
 165     #
 166     #  A hook for GenericASTBuilder and GenericASTMatcher.  Mess
 167     #  thee not with this; nor shall thee toucheth the _preprocess
 168     #  argument to addRule.
 169     #
 170     def preprocess(self, rule, func):       return rule, func
 171
 172     def addRule(self, doc, func, _preprocess=1):
 173         fn = func
 174         rules = string.split(doc)
 175
 176         index = []
 177         for i in range(len(rules)):
 178             if rules[i] == '::=':
 179                 index.append(i-1)
 180         index.append(len(rules))
 181
 182         for i in range(len(index)-1):
 183             lhs = rules[index[i]]
 184             rhs = rules[index[i]+2:index[i+1]]
 185             rule = (lhs, tuple(rhs))
 186
 187             if _preprocess:
 188                 rule, fn = self.preprocess(rule, func)
 189
 190             if self.rules.has_key(lhs):
 191                 self.rules[lhs].append(rule)
 192             else:
 193                 self.rules[lhs] = [ rule ]
 194             self.rule2func[rule] = fn
 195             self.rule2name[rule] = func.__name__[2:]
 196         self.ruleschanged = 1
 197
 198     def collectRules(self):
 199         for name in _namelist(self):
 200             if name[:2] == 'p_':
 201                 func = getattr(self, name)
 202                 doc = func.__doc__
 203                 self.addRule(doc, func)
 204
 205     def augment(self, start):
 206         rule = '%s ::= %s %s' % (self._START, self._BOF, start)
 207         self.addRule(rule, lambda args: args[1], 0)
 208
 209     def computeNull(self):
 210         self.nullable = {}
 211         tbd = []
 212
 213         for rulelist in self.rules.values():
 214             lhs = rulelist[0][0]
 215             self.nullable[lhs] = 0
 216             for rule in rulelist:
 217                 rhs = rule[1]
 218                 if len(rhs) == 0:
 219                     self.nullable[lhs] = 1
 220                     continue
 221                 #
 222                 #  We only need to consider rules which
 223                 #  consist entirely of nonterminal symbols.
 224                 #  This should be a savings on typical
 225                 #  grammars.
 226                 #
 227                 for sym in rhs:
 228                     if not self.rules.has_key(sym):
 229                         break
 230                 else:
 231                     tbd.append(rule)
 232         changes = 1
 233         while changes:
 234             changes = 0
 235             for lhs, rhs in tbd:
 236                 if self.nullable[lhs]:
 237                     continue
 238                 for sym in rhs:
 239                     if not self.nullable[sym]:
 240                         break
 241                 else:
 242                     self.nullable[lhs] = 1
 243                     changes = 1
 244
 245     def makeState0(self):
 246         s0 = _State(0, [])
 247         for rule in self.newrules[self._START]:
 248             s0.items.append((rule, 0))
 249         return s0
 250
 251     def finalState(self, tokens):
 252         #
 253         #  Yuck.
 254         #
 255         if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
 256             return 1
 257         start = self.rules[self._START][0][1][1]
 258         return self.goto(1, start)
 259
 260     def makeNewRules(self):
 261         worklist = []
 262         for rulelist in self.rules.values():
 263             for rule in rulelist:
 264                 worklist.append((rule, 0, 1, rule))
 265
 266         for rule, i, candidate, oldrule in worklist:
 267             lhs, rhs = rule
 268             n = len(rhs)
 269             while i < n:
 270                 sym = rhs[i]
 271                 if not self.rules.has_key(sym) or \
 272                    not self.nullable[sym]:
 273                     candidate = 0
 274                     i = i + 1
 275                     continue
 276
 277                 newrhs = list(rhs)
 278                 newrhs[i] = self._NULLABLE+sym
 279                 newrule = (lhs, tuple(newrhs))
 280                 worklist.append((newrule, i+1,
 281                                  candidate, oldrule))
 282                 candidate = 0
 283                 i = i + 1
 284             else:
 285                 if candidate:
 286                     lhs = self._NULLABLE+lhs
 287                     rule = (lhs, rhs)
 288                 if self.newrules.has_key(lhs):
 289                     self.newrules[lhs].append(rule)
 290                 else:
 291                     self.newrules[lhs] = [ rule ]
 292                 self.new2old[rule] = oldrule
 293
 294     def typestring(self, token):
 295         return None
 296
 297     def error(self, token):
 298         print "Syntax error at or near `%s' token" % token
 299         raise SystemExit
 300
 301     def parse(self, tokens):
 302         sets = [ [(1,0), (2,0)] ]
 303         self.links = {}
 304
 305         if self.ruleschanged:
 306             self.computeNull()
 307             self.newrules = {}
 308             self.new2old = {}
 309             self.makeNewRules()
 310             self.ruleschanged = 0
 311             self.edges, self.cores = {}, {}
 312             self.states = { 0: self.makeState0() }
 313             self.makeState(0, self._BOF)
 314
 315         for i in xrange(len(tokens)):
 316             sets.append([])
 317
 318             if sets[i] == []:
 319                 break
 320             self.makeSet(tokens[i], sets, i)
 321         else:
 322             sets.append([])
 323             self.makeSet(None, sets, len(tokens))
 324
 325         #_dump(tokens, sets, self.states)
 326
 327         finalitem = (self.finalState(tokens), 0)
 328         if finalitem not in sets[-2]:
 329             if len(tokens) > 0:
 330                 self.error(tokens[i-1])
 331             else:
 332                 self.error(None)
 333
 334         return self.buildTree(self._START, finalitem,
 335                               tokens, len(sets)-2)
 336
 337     def isnullable(self, sym):
 338         #
 339         #  For symbols in G_e only.  If we weren't supporting 1.5,
 340         #  could just use sym.startswith().
 341         #
 342         return self._NULLABLE == sym[0:len(self._NULLABLE)]
 343
 344     def skip(self, (lhs, rhs), pos=0):
 345         n = len(rhs)
 346         while pos < n:
 347             if not self.isnullable(rhs[pos]):
 348                 break
 349             pos = pos + 1
 350         return pos
 351
 352     def makeState(self, state, sym):
 353         assert sym is not None
 354         #
 355         #  Compute \epsilon-kernel state's core and see if
 356         #  it exists already.
 357         #
 358         kitems = []
 359         for rule, pos in self.states[state].items:
 360             lhs, rhs = rule
 361             if rhs[pos:pos+1] == (sym,):
 362                 kitems.append((rule, self.skip(rule, pos+1)))
 363         core = kitems
 364
 365         core.sort()
 366         tcore = tuple(core)
 367         if self.cores.has_key(tcore):
 368             return self.cores[tcore]
 369         #
 370         #  Nope, doesn't exist.  Compute it and the associated
 371         #  \epsilon-nonkernel state together; we'll need it right away.
 372         #
 373         k = self.cores[tcore] = len(self.states)
 374         K, NK = _State(k, kitems), _State(k+1, [])
 375         self.states[k] = K
 376         predicted = {}
 377
 378         edges = self.edges
 379         rules = self.newrules
 380         for X in K, NK:
 381             worklist = X.items
 382             for item in worklist:
 383                 rule, pos = item
 384                 lhs, rhs = rule
 385                 if pos == len(rhs):
 386                     X.complete.append(rule)
 387                     continue
 388
 389                 nextSym = rhs[pos]
 390                 key = (X.stateno, nextSym)
 391                 if not rules.has_key(nextSym):
 392                     if not edges.has_key(key):
 393                         edges[key] = None
 394                         X.T.append(nextSym)
 395                 else:
 396                     edges[key] = None
 397                     if not predicted.has_key(nextSym):
 398                         predicted[nextSym] = 1
 399                         for prule in rules[nextSym]:
 400                             ppos = self.skip(prule)
 401                             new = (prule, ppos)
 402                             NK.items.append(new)
 403             #
 404             #  Problem: we know K needs generating, but we
 405             #  don't yet know about NK.  Can't commit anything
 406             #  regarding NK to self.edges until we're sure.  Should
 407             #  we delay committing on both K and NK to avoid this
 408             #  hacky code?  This creates other problems..
 409             #
 410             if X is K:
 411                 edges = {}
 412
 413         if NK.items == []:
 414             return k
 415
 416         #
 417         #  Check for \epsilon-nonkernel's core.  Unfortunately we
 418         #  need to know the entire set of predicted nonterminals
 419         #  to do this without accidentally duplicating states.
 420         #
 421         core = predicted.keys()
 422         core.sort()
 423         tcore = tuple(core)
 424         if self.cores.has_key(tcore):
 425             self.edges[(k, None)] = self.cores[tcore]
 426             return k
 427
 428         nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
 429         self.edges.update(edges)
 430         self.states[nk] = NK
 431         return k
 432
 433     def goto(self, state, sym):
 434         key = (state, sym)
 435         if not self.edges.has_key(key):
 436             #
 437             #  No transitions from state on sym.
 438             #
 439             return None
 440
 441         rv = self.edges[key]
 442         if rv is None:
 443             #
 444             #  Target state isn't generated yet.  Remedy this.
 445             #
 446             rv = self.makeState(state, sym)
 447             self.edges[key] = rv
 448         return rv
 449
 450     def gotoT(self, state, t):
 451         return [self.goto(state, t)]
 452
 453     def gotoST(self, state, st):
 454         rv = []
 455         for t in self.states[state].T:
 456             if st == t:
 457                 rv.append(self.goto(state, t))
 458         return rv
 459
 460     def add(self, set, item, i=None, predecessor=None, causal=None):
 461         if predecessor is None:
 462             if item not in set:
 463                 set.append(item)
 464         else:
 465             key = (item, i)
 466             if item not in set:
 467                 self.links[key] = []
 468                 set.append(item)
 469             self.links[key].append((predecessor, causal))
 470
 471     def makeSet(self, token, sets, i):
 472         cur, next = sets[i], sets[i+1]
 473
 474         ttype = token is not None and self.typestring(token) or None
 475         if ttype is not None:
 476             fn, arg = self.gotoT, ttype
 477         else:
 478             fn, arg = self.gotoST, token
 479
 480         for item in cur:
 481             ptr = (item, i)
 482             state, parent = item
 483             add = fn(state, arg)
 484             for k in add:
 485                 if k is not None:
 486                     self.add(next, (k, parent), i+1, ptr)
 487                     nk = self.goto(k, None)
 488                     if nk is not None:
 489                         self.add(next, (nk, i+1))
 490
 491             if parent == i:
 492                 continue
 493
 494             for rule in self.states[state].complete:
 495                 lhs, rhs = rule
 496                 for pitem in sets[parent]:
 497                     pstate, pparent = pitem
 498                     k = self.goto(pstate, lhs)
 499                     if k is not None:
 500                         why = (item, i, rule)
 501                         pptr = (pitem, parent)
 502                         self.add(cur, (k, pparent),
 503                                  i, pptr, why)
 504                         nk = self.goto(k, None)
 505                         if nk is not None:
 506                             self.add(cur, (nk, i))
 507
 508     def makeSet_fast(self, token, sets, i):
 509         #
 510         #  Call *only* when the entire state machine has been built!
 511         #  It relies on self.edges being filled in completely, and
 512         #  then duplicates and inlines code to boost speed at the
 513         #  cost of extreme ugliness.
 514         #
 515         cur, next = sets[i], sets[i+1]
 516         ttype = token is not None and self.typestring(token) or None
 517
 518         for item in cur:
 519             ptr = (item, i)
 520             state, parent = item
 521             if ttype is not None:
 522                 k = self.edges.get((state, ttype), None)
 523                 if k is not None:
 524                     #self.add(next, (k, parent), i+1, ptr)
 525                     #INLINED --v
 526                     new = (k, parent)
 527                     key = (new, i+1)
 528                     if new not in next:
 529                         self.links[key] = []
 530                         next.append(new)
 531                     self.links[key].append((ptr, None))
 532                     #INLINED --^
 533                     #nk = self.goto(k, None)
 534                     nk = self.edges.get((k, None), None)
 535                     if nk is not None:
 536                         #self.add(next, (nk, i+1))
 537                         #INLINED --v
 538                         new = (nk, i+1)
 539                         if new not in next:
 540                             next.append(new)
 541                         #INLINED --^
 542             else:
 543                 add = self.gotoST(state, token)
 544                 for k in add:
 545                     if k is not None:
 546                         self.add(next, (k, parent), i+1, ptr)
 547                         #nk = self.goto(k, None)
 548                         nk = self.edges.get((k, None), None)
 549                         if nk is not None:
 550                             self.add(next, (nk, i+1))
 551
 552             if parent == i:
 553                 continue
 554
 555             for rule in self.states[state].complete:
 556                 lhs, rhs = rule
 557                 for pitem in sets[parent]:
 558                     pstate, pparent = pitem
 559                     #k = self.goto(pstate, lhs)
 560                     k = self.edges.get((pstate, lhs), None)
 561                     if k is not None:
 562                         why = (item, i, rule)
 563                         pptr = (pitem, parent)
 564                         #self.add(cur, (k, pparent),
 565                         #        i, pptr, why)
 566                         #INLINED --v
 567                         new = (k, pparent)
 568                         key = (new, i)
 569                         if new not in cur:
 570                             self.links[key] = []
 571                             cur.append(new)
 572                         self.links[key].append((pptr, why))
 573                         #INLINED --^
 574                         #nk = self.goto(k, None)
 575                         nk = self.edges.get((k, None), None)
 576                         if nk is not None:
 577                             #self.add(cur, (nk, i))
 578                             #INLINED --v
 579                             new = (nk, i)
 580                             if new not in cur:
 581                                 cur.append(new)
 582                             #INLINED --^
 583
 584     def predecessor(self, key, causal):
 585         for p, c in self.links[key]:
 586             if c == causal:
 587                 return p
 588         assert 0
 589
 590     def causal(self, key):
 591         links = self.links[key]
 592         if len(links) == 1:
 593             return links[0][1]
 594         choices = []
 595         rule2cause = {}
 596         for p, c in links:
 597             rule = c[2]
 598             choices.append(rule)
 599             rule2cause[rule] = c
 600         return rule2cause[self.ambiguity(choices)]
 601
 602     def deriveEpsilon(self, nt):
 603         if len(self.newrules[nt]) > 1:
 604             rule = self.ambiguity(self.newrules[nt])
 605         else:
 606             rule = self.newrules[nt][0]
 607         #print rule
 608
 609         rhs = rule[1]
 610         attr = [None] * len(rhs)
 611
 612         for i in range(len(rhs)-1, -1, -1):
 613             attr[i] = self.deriveEpsilon(rhs[i])
 614         return self.rule2func[self.new2old[rule]](attr)
 615
 616     def buildTree(self, nt, item, tokens, k):
 617         state, parent = item
 618
 619         choices = []
 620         for rule in self.states[state].complete:
 621             if rule[0] == nt:
 622                 choices.append(rule)
 623         rule = choices[0]
 624         if len(choices) > 1:
 625             rule = self.ambiguity(choices)
 626         #print rule
 627
 628         rhs = rule[1]
 629         attr = [None] * len(rhs)
 630
 631         for i in range(len(rhs)-1, -1, -1):
 632             sym = rhs[i]
 633             if not self.newrules.has_key(sym):
 634                 if sym != self._BOF:
 635                     attr[i] = tokens[k-1]
 636                     key = (item, k)
 637                     item, k = self.predecessor(key, None)
 638             #elif self.isnullable(sym):
 639             elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
 640                 attr[i] = self.deriveEpsilon(sym)
 641             else:
 642                 key = (item, k)
 643                 why = self.causal(key)
 644                 attr[i] = self.buildTree(sym, why[0],
 645                                          tokens, why[1])
 646                 item, k = self.predecessor(key, why)
 647         return self.rule2func[self.new2old[rule]](attr)
 648
 649     def ambiguity(self, rules):
 650         #
 651         #  XXX - problem here and in collectRules() if the same rule
 652         #        appears in >1 method.  Also undefined results if rules
 653         #        causing the ambiguity appear in the same method.
 654         #
 655         sortlist = []
 656         name2index = {}
 657         for i in range(len(rules)):
 658             lhs, rhs = rule = rules[i]
 659             name = self.rule2name[self.new2old[rule]]
 660             sortlist.append((len(rhs), name))
 661             name2index[name] = i
 662         sortlist.sort()
 663         list = map(lambda (a,b): b, sortlist)
 664         return rules[name2index[self.resolve(list)]]
 665
 666     def resolve(self, list):
 667         #
 668         #  Resolve ambiguity in favor of the shortest RHS.
 669         #  Since we walk the tree from the top down, this
 670         #  should effectively resolve in favor of a "shift".
 671         #
 672         return list[0]
 673
 674 #
 675 #  GenericASTBuilder automagically constructs a concrete/abstract syntax tree
 676 #  for a given input.  The extra argument is a class (not an instance!)
 677 #  which supports the "__setslice__" and "__len__" methods.
 678 #
 679 #  XXX - silently overrides any user code in methods.
 680 #
 681
 682 class GenericASTBuilder(GenericParser):
 683     def __init__(self, AST, start):
 684         GenericParser.__init__(self, start)
 685         self.AST = AST
 686
 687     def preprocess(self, rule, func):
 688         rebind = lambda lhs, self=self: \
 689                         lambda args, lhs=lhs, self=self: \
 690                                 self.buildASTNode(args, lhs)
 691         lhs, rhs = rule
 692         return rule, rebind(lhs)
 693
 694     def buildASTNode(self, args, lhs):
 695         children = []
 696         for arg in args:
 697             if isinstance(arg, self.AST):
 698                 children.append(arg)
 699             else:
 700                 children.append(self.terminal(arg))
 701         return self.nonterminal(lhs, children)
 702
 703     def terminal(self, token):      return token
 704
 705     def nonterminal(self, type, args):
 706         rv = self.AST(type)
 707         rv[:len(args)] = args
 708         return rv
 709
 710 #
 711 #  GenericASTTraversal is a Visitor pattern according to Design Patterns.  For
 712 #  each node it attempts to invoke the method n_<node type>, falling
 713 #  back onto the default() method if the n_* can't be found.  The preorder
 714 #  traversal also looks for an exit hook named n_<node type>_exit (no default
 715 #  routine is called if it's not found).  To prematurely halt traversal
 716 #  of a subtree, call the prune() method -- this only makes sense for a
 717 #  preorder traversal.  Node type is determined via the typestring() method.
 718 #
 719
 720 class GenericASTTraversalPruningException:
 721     pass
 722
 723 class GenericASTTraversal:
 724     def __init__(self, ast):
 725         self.ast = ast
 726
 727     def typestring(self, node):
 728         return node.type
 729
 730     def prune(self):
 731         raise GenericASTTraversalPruningException
 732
 733     def preorder(self, node=None):
 734         if node is None:
 735             node = self.ast
 736
 737         try:
 738             name = 'n_' + self.typestring(node)
 739             if hasattr(self, name):
 740                 func = getattr(self, name)
 741                 func(node)
 742             else:
 743                 self.default(node)
 744         except GenericASTTraversalPruningException:
 745             return
 746
 747         for kid in node:
 748             self.preorder(kid)
 749
 750         name = name + '_exit'
 751         if hasattr(self, name):
 752             func = getattr(self, name)
 753             func(node)
 754
 755     def postorder(self, node=None):
 756         if node is None:
 757             node = self.ast
 758
 759         for kid in node:
 760             self.postorder(kid)
 761
 762         name = 'n_' + self.typestring(node)
 763         if hasattr(self, name):
 764             func = getattr(self, name)
 765             func(node)
 766         else:
 767             self.default(node)
 768
 769
 770     def default(self, node):
 771         pass
 772
 773 #
 774 #  GenericASTMatcher.  AST nodes must have "__getitem__" and "__cmp__"
 775 #  implemented.
 776 #
 777 #  XXX - makes assumptions about how GenericParser walks the parse tree.
 778 #
 779
 780 class GenericASTMatcher(GenericParser):
 781     def __init__(self, start, ast):
 782         GenericParser.__init__(self, start)
 783         self.ast = ast
 784
 785     def preprocess(self, rule, func):
 786         rebind = lambda func, self=self: \
 787                         lambda args, func=func, self=self: \
 788                                 self.foundMatch(args, func)
 789         lhs, rhs = rule
 790         rhslist = list(rhs)
 791         rhslist.reverse()
 792
 793         return (lhs, tuple(rhslist)), rebind(func)
 794
 795     def foundMatch(self, args, func):
 796         func(args[-1])
 797         return args[-1]
 798
 799     def match_r(self, node):
 800         self.input.insert(0, node)
 801         children = 0
 802
 803         for child in node:
 804             if children == 0:
 805                 self.input.insert(0, '(')
 806             children = children + 1
 807             self.match_r(child)
 808
 809         if children > 0:
 810             self.input.insert(0, ')')
 811
 812     def match(self, ast=None):
 813         if ast is None:
 814             ast = self.ast
 815         self.input = []
 816
 817         self.match_r(ast)
 818         self.parse(self.input)
 819
 820     def resolve(self, list):
 821         #
 822         #  Resolve ambiguity in favor of the longest RHS.
 823         #
 824         return list[-1]
 825
 826 def _dump(tokens, sets, states):
 827     for i in range(len(sets)):
 828         print 'set', i
 829         for item in sets[i]:
 830             print '\t', item
 831             for (lhs, rhs), pos in states[item[0]].items:
 832                 print '\t\t', lhs, '::=',
 833                 print string.join(rhs[:pos]),
 834                 print '.',
 835                 print string.join(rhs[pos:])
 836         if i < len(tokens):
 837             print
 838             print 'token', str(tokens[i])
 839             print