1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Python parse tree definitions.
6 This is a very concrete parse tree; we need to keep every token and
7 even the comments and whitespace between tokens.
9 There's also a pattern matching implementation here.
12 __author__
= "Guido van Rossum <guido@python.org>"
15 HUGE
= 0x7FFFFFFF # maximum repeat count, default max
18 def type_repr(type_num
):
21 from .pygram
import python_symbols
22 # printing tokens is possible but not as useful
23 # from .pgen2 import token // token.__dict__.items():
24 for name
, val
in python_symbols
.__dict
__.items():
25 if type(val
) == int: _type_reprs
[val
] = name
26 return _type_reprs
.setdefault(type_num
, type_num
)
31 """Abstract base class for Node and Leaf.
33 This provides some default functionality and boilerplate using the
36 A node may be a subnode of at most one parent.
39 # Default values for instance variables
40 type = None # int: token number (< 256) or symbol number (>= 256)
41 parent
= None # Parent node pointer, or None
42 children
= () # Tuple of subnodes
45 def __new__(cls
, *args
, **kwds
):
46 """Constructor that prevents Base from being instantiated."""
47 assert cls
is not Base
, "Cannot instantiate Base"
48 return object.__new
__(cls
)
50 def __eq__(self
, other
):
51 """Compares two nodes for equality.
53 This calls the method _eq().
55 if self
.__class
__ is not other
.__class
__:
57 return self
._eq
(other
)
59 def __ne__(self
, other
):
60 """Compares two nodes for inequality.
62 This calls the method _eq().
64 if self
.__class
__ is not other
.__class
__:
66 return not self
._eq
(other
)
69 """Compares two nodes for equality.
71 This is called by __eq__ and __ne__. It is only called if the
72 two nodes have the same type. This must be implemented by the
73 concrete subclass. Nodes should be considered equal if they
74 have the same structure, ignoring the prefix string and other
77 raise NotImplementedError
80 """Returns a cloned (deep) copy of self.
82 This must be implemented by the concrete subclass.
84 raise NotImplementedError
87 """Returns a post-order iterator for the tree.
89 This must be implemented by the concrete subclass.
91 raise NotImplementedError
94 """Returns a pre-order iterator for the tree.
96 This must be implemented by the concrete subclass.
98 raise NotImplementedError
100 def set_prefix(self
, prefix
):
101 """Sets the prefix for the node (see Leaf class).
103 This must be implemented by the concrete subclass.
105 raise NotImplementedError
107 def get_prefix(self
):
108 """Returns the prefix for the node (see Leaf class).
110 This must be implemented by the concrete subclass.
112 raise NotImplementedError
114 def replace(self
, new
):
115 """Replaces this node with a new one in the parent."""
116 assert self
.parent
is not None, str(self
)
117 assert new
is not None
118 if not isinstance(new
, list):
122 for ch
in self
.parent
.children
:
124 assert not found
, (self
.parent
.children
, self
, new
)
126 l_children
.extend(new
)
129 l_children
.append(ch
)
130 assert found
, (self
.children
, self
, new
)
131 self
.parent
.changed()
132 self
.parent
.children
= l_children
134 x
.parent
= self
.parent
137 def get_lineno(self
):
138 """Returns the line number which generated the invocant node."""
140 while not isinstance(node
, Leaf
):
141 if not node
.children
:
143 node
= node
.children
[0]
148 self
.parent
.changed()
149 self
.was_changed
= True
152 """Remove the node from the tree. Returns the position of the node
153 in its parent's children before it was removed."""
155 for i
, node
in enumerate(self
.parent
.children
):
157 self
.parent
.changed()
158 del self
.parent
.children
[i
]
162 def get_next_sibling(self
):
163 """Return the node immediately following the invocant in their
164 parent's children list. If the invocant does not have a next
165 sibling, return None."""
166 if self
.parent
is None:
169 # Can't use index(); we need to test by identity
170 for i
, sibling
in enumerate(self
.parent
.children
):
173 return self
.parent
.children
[i
+1]
177 def get_suffix(self
):
178 """Return the string immediately following the invocant node. This
179 is effectively equivalent to node.get_next_sibling().get_prefix()"""
180 next_sib
= self
.get_next_sibling()
183 return next_sib
.get_prefix()
188 """Concrete implementation for interior nodes."""
190 def __init__(self
, type, children
, context
=None, prefix
=None):
193 Takes a type constant (a symbol number >= 256), a sequence of
194 child nodes, and an optional context keyword argument.
196 As a side effect, the parent pointers of the children are updated.
198 assert type >= 256, type
200 self
.children
= list(children
)
201 for ch
in self
.children
:
202 assert ch
.parent
is None, repr(ch
)
204 if prefix
is not None:
205 self
.set_prefix(prefix
)
208 """Returns a canonical string representation."""
209 return "%s(%s, %r)" % (self
.__class
__.__name
__,
210 type_repr(self
.type),
214 """Returns a pretty string representation.
216 This reproduces the input source exactly.
218 return "".join(map(str, self
.children
))
220 def _eq(self
, other
):
221 """Compares two nodes for equality."""
222 return (self
.type, self
.children
) == (other
.type, other
.children
)
225 """Returns a cloned (deep) copy of self."""
226 return Node(self
.type, [ch
.clone() for ch
in self
.children
])
228 def post_order(self
):
229 """Returns a post-order iterator for the tree."""
230 for child
in self
.children
:
231 for node
in child
.post_order():
236 """Returns a pre-order iterator for the tree."""
238 for child
in self
.children
:
239 for node
in child
.post_order():
242 def set_prefix(self
, prefix
):
243 """Sets the prefix for the node.
245 This passes the responsibility on to the first child.
248 self
.children
[0].set_prefix(prefix
)
250 def get_prefix(self
):
251 """Returns the prefix for the node.
253 This passes the call on to the first child.
255 if not self
.children
:
257 return self
.children
[0].get_prefix()
259 def set_child(self
, i
, child
):
260 """Equivalent to 'node.children[i] = child'. This method also sets the
261 child's parent attribute appropriately."""
263 self
.children
[i
].parent
= None
264 self
.children
[i
] = child
266 def insert_child(self
, i
, child
):
267 """Equivalent to 'node.children.insert(i, child)'. This method also
268 sets the child's parent attribute appropriately."""
270 self
.children
.insert(i
, child
)
272 def append_child(self
, child
):
273 """Equivalent to 'node.children.append(child)'. This method also
274 sets the child's parent attribute appropriately."""
276 self
.children
.append(child
)
281 """Concrete implementation for leaf nodes."""
283 # Default values for instance variables
284 prefix
= "" # Whitespace and comments preceding this token in the input
285 lineno
= 0 # Line where this token starts in the input
286 column
= 0 # Column where this token tarts in the input
288 def __init__(self
, type, value
, context
=None, prefix
=None):
291 Takes a type constant (a token number < 256), a string value,
292 and an optional context keyword argument.
294 assert 0 <= type < 256, type
295 if context
is not None:
296 self
.prefix
, (self
.lineno
, self
.column
) = context
299 if prefix
is not None:
303 """Returns a canonical string representation."""
304 return "%s(%r, %r)" % (self
.__class
__.__name
__,
309 """Returns a pretty string representation.
311 This reproduces the input source exactly.
313 return self
.prefix
+ str(self
.value
)
315 def _eq(self
, other
):
316 """Compares two nodes for equality."""
317 return (self
.type, self
.value
) == (other
.type, other
.value
)
320 """Returns a cloned (deep) copy of self."""
321 return Leaf(self
.type, self
.value
,
322 (self
.prefix
, (self
.lineno
, self
.column
)))
324 def post_order(self
):
325 """Returns a post-order iterator for the tree."""
329 """Returns a pre-order iterator for the tree."""
332 def set_prefix(self
, prefix
):
333 """Sets the prefix for the node."""
337 def get_prefix(self
):
338 """Returns the prefix for the node."""
342 def convert(gr
, raw_node
):
343 """Converts raw node information to a Node or Leaf instance.
345 This is passed to the parser driver which calls it whenever a
346 reduction of a grammar rule produces a new complete node, so that
347 the tree is build strictly bottom-up.
349 type, value
, context
, children
= raw_node
350 if children
or type in gr
.number2symbol
:
351 # If there's exactly one child, return that child instead of
352 # creating a new node.
353 if len(children
) == 1:
355 return Node(type, children
, context
=context
)
357 return Leaf(type, value
, context
=context
)
360 class BasePattern(object):
362 """A pattern is a tree matching pattern.
364 It looks for a specific node type (token or symbol), and
365 optionally for a specific content.
367 This is an abstract base class. There are three concrete
370 - LeafPattern matches a single leaf node;
371 - NodePattern matches a single node (usually non-leaf);
372 - WildcardPattern matches a sequence of nodes of variable length.
375 # Defaults for instance variables
376 type = None # Node type (token if < 256, symbol if >= 256)
377 content
= None # Optional content matching pattern
378 name
= None # Optional name used to store match in results dict
380 def __new__(cls
, *args
, **kwds
):
381 """Constructor that prevents BasePattern from being instantiated."""
382 assert cls
is not BasePattern
, "Cannot instantiate BasePattern"
383 return object.__new
__(cls
)
386 args
= [type_repr(self
.type), self
.content
, self
.name
]
387 while args
and args
[-1] is None:
389 return "%s(%s)" % (self
.__class
__.__name
__, ", ".join(map(repr, args
)))
392 """A subclass can define this as a hook for optimizations.
394 Returns either self or another node with the same effect.
398 def match(self
, node
, results
=None):
399 """Does this pattern exactly match a node?
401 Returns True if it matches, False if not.
403 If results is not None, it must be a dict which will be
404 updated with the nodes matching named subpatterns.
406 Default implementation for non-wildcard patterns.
408 if self
.type is not None and node
.type != self
.type:
410 if self
.content
is not None:
412 if results
is not None:
414 if not self
._submatch
(node
, r
):
418 if results
is not None and self
.name
:
419 results
[self
.name
] = node
422 def match_seq(self
, nodes
, results
=None):
423 """Does this pattern exactly match a sequence of nodes?
425 Default implementation for non-wildcard patterns.
429 return self
.match(nodes
[0], results
)
431 def generate_matches(self
, nodes
):
432 """Generator yielding all matches for this pattern.
434 Default implementation for non-wildcard patterns.
437 if nodes
and self
.match(nodes
[0], r
):
441 class LeafPattern(BasePattern
):
443 def __init__(self
, type=None, content
=None, name
=None):
444 """Initializer. Takes optional type, content, and name.
446 The type, if given must be a token type (< 256). If not given,
447 this matches any *leaf* node; the content may still be required.
449 The content, if given, must be a string.
451 If a name is given, the matching node is stored in the results
455 assert 0 <= type < 256, type
456 if content
is not None:
457 assert isinstance(content
, basestring
), repr(content
)
459 self
.content
= content
462 def match(self
, node
, results
=None):
463 """Override match() to insist on a leaf node."""
464 if not isinstance(node
, Leaf
):
466 return BasePattern
.match(self
, node
, results
)
468 def _submatch(self
, node
, results
=None):
469 """Match the pattern's content to the node's children.
471 This assumes the node type matches and self.content is not None.
473 Returns True if it matches, False if not.
475 If results is not None, it must be a dict which will be
476 updated with the nodes matching named subpatterns.
478 When returning False, the results dict may still be updated.
480 return self
.content
== node
.value
483 class NodePattern(BasePattern
):
487 def __init__(self
, type=None, content
=None, name
=None):
488 """Initializer. Takes optional type, content, and name.
490 The type, if given, must be a symbol type (>= 256). If the
491 type is None this matches *any* single node (leaf or not),
492 except if content is not None, in which it only matches
493 non-leaf nodes that also match the content pattern.
495 The content, if not None, must be a sequence of Patterns that
496 must match the node's children exactly. If the content is
497 given, the type must not be None.
499 If a name is given, the matching node is stored in the results
503 assert type >= 256, type
504 if content
is not None:
505 assert not isinstance(content
, basestring
), repr(content
)
506 content
= list(content
)
507 for i
, item
in enumerate(content
):
508 assert isinstance(item
, BasePattern
), (i
, item
)
509 if isinstance(item
, WildcardPattern
):
510 self
.wildcards
= True
512 self
.content
= content
515 def _submatch(self
, node
, results
=None):
516 """Match the pattern's content to the node's children.
518 This assumes the node type matches and self.content is not None.
520 Returns True if it matches, False if not.
522 If results is not None, it must be a dict which will be
523 updated with the nodes matching named subpatterns.
525 When returning False, the results dict may still be updated.
528 for c
, r
in generate_matches(self
.content
, node
.children
):
529 if c
== len(node
.children
):
530 if results
is not None:
534 if len(self
.content
) != len(node
.children
):
536 for subpattern
, child
in zip(self
.content
, node
.children
):
537 if not subpattern
.match(child
, results
):
542 class WildcardPattern(BasePattern
):
544 """A wildcard pattern can match zero or more nodes.
546 This has all the flexibility needed to implement patterns like:
550 (...)* (...)+ (...)? (...){m,n}
552 except it always uses non-greedy matching.
555 def __init__(self
, content
=None, min=0, max=HUGE
, name
=None):
559 content: optional sequence of subsequences of patterns;
560 if absent, matches one node;
561 if present, each subsequence is an alternative [*]
562 min: optinal minumum number of times to match, default 0
563 max: optional maximum number of times tro match, default HUGE
564 name: optional name assigned to this match
566 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
567 equivalent to (a b c | d e | f g h); if content is None,
568 this is equivalent to '.' in regular expression terms.
569 The min and max parameters work as follows:
570 min=0, max=maxint: .*
571 min=1, max=maxint: .+
574 If content is not None, replace the dot with the parenthesized
575 list of alternatives, e.g. (a b c | d e | f g h)*
577 assert 0 <= min <= max <= HUGE
, (min, max)
578 if content
is not None:
579 content
= tuple(map(tuple, content
)) # Protect against alterations
580 # Check sanity of alternatives
581 assert len(content
), repr(content
) # Can't have zero alternatives
583 assert len(alt
), repr(alt
) # Can have empty alternatives
584 self
.content
= content
590 """Optimize certain stacked wildcard patterns."""
592 if (self
.content
is not None and
593 len(self
.content
) == 1 and len(self
.content
[0]) == 1):
594 subpattern
= self
.content
[0][0]
595 if self
.min == 1 and self
.max == 1:
596 if self
.content
is None:
597 return NodePattern(name
=self
.name
)
598 if subpattern
is not None and self
.name
== subpattern
.name
:
599 return subpattern
.optimize()
600 if (self
.min <= 1 and isinstance(subpattern
, WildcardPattern
) and
601 subpattern
.min <= 1 and self
.name
== subpattern
.name
):
602 return WildcardPattern(subpattern
.content
,
603 self
.min*subpattern
.min,
604 self
.max*subpattern
.max,
608 def match(self
, node
, results
=None):
609 """Does this pattern exactly match a node?"""
610 return self
.match_seq([node
], results
)
612 def match_seq(self
, nodes
, results
=None):
613 """Does this pattern exactly match a sequence of nodes?"""
614 for c
, r
in self
.generate_matches(nodes
):
616 if results
is not None:
619 results
[self
.name
] = list(nodes
)
623 def generate_matches(self
, nodes
):
624 """Generator yielding matches for a sequence of nodes.
627 nodes: sequence of nodes
630 (count, results) tuples where:
631 count: the match comprises nodes[:count];
632 results: dict containing named submatches.
634 if self
.content
is None:
635 # Shortcut for special case (see __init__.__doc__)
636 for count
in xrange(self
.min, 1 + min(len(nodes
), self
.max)):
639 r
[self
.name
] = nodes
[:count
]
642 for count
, r
in self
._recursive
_matches
(nodes
, 0):
644 r
[self
.name
] = nodes
[:count
]
647 def _recursive_matches(self
, nodes
, count
):
648 """Helper to recursively yield the matches."""
649 assert self
.content
is not None
650 if count
>= self
.min:
653 r
[self
.name
] = nodes
[:0]
656 for alt
in self
.content
:
657 for c0
, r0
in generate_matches(alt
, nodes
):
658 for c1
, r1
in self
._recursive
_matches
(nodes
[c0
:], count
+1):
665 class NegatedPattern(BasePattern
):
667 def __init__(self
, content
=None):
670 The argument is either a pattern or None. If it is None, this
671 only matches an empty sequence (effectively '$' in regex
672 lingo). If it is not None, this matches whenever the argument
673 pattern doesn't have any matches.
675 if content
is not None:
676 assert isinstance(content
, BasePattern
), repr(content
)
677 self
.content
= content
679 def match(self
, node
):
680 # We never match a node in its entirety
683 def match_seq(self
, nodes
):
684 # We only match an empty sequence of nodes in its entirety
685 return len(nodes
) == 0
687 def generate_matches(self
, nodes
):
688 if self
.content
is None:
689 # Return a match if there is an empty sequence
693 # Return a match if the argument pattern has no matches
694 for c
, r
in self
.content
.generate_matches(nodes
):
699 def generate_matches(patterns
, nodes
):
700 """Generator yielding matches for a sequence of patterns and nodes.
703 patterns: a sequence of patterns
704 nodes: a sequence of nodes
707 (count, results) tuples where:
708 count: the entire sequence of patterns matches nodes[:count];
709 results: dict containing named submatches.
714 p
, rest
= patterns
[0], patterns
[1:]
715 for c0
, r0
in p
.generate_matches(nodes
):
719 for c1
, r1
in generate_matches(rest
, nodes
[c0
:]):