1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
13 __author__
= "Guido van Rossum <guido@python.org>"
17 from StringIO
import StringIO
20 HUGE
= 0x7FFFFFFF # maximum repeat count, default max
23 def type_repr(type_num
):
26 from .pygram
import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name
, val
in python_symbols
.__dict
__.items():
30 if type(val
) == int: _type_reprs
[val
] = name
31 return _type_reprs
.setdefault(type_num
, type_num
)
37 Abstract base class for Node and Leaf.
39 This provides some default functionality and boilerplate using the
42 A node may be a subnode of at most one parent.
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent
= None # Parent node pointer, or None
48 children
= () # Tuple of subnodes
51 def __new__(cls
, *args
, **kwds
):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls
is not Base
, "Cannot instantiate Base"
54 return object.__new
__(cls
)
56 def __eq__(self
, other
):
58 Compare two nodes for equality.
60 This calls the method _eq().
62 if self
.__class
__ is not other
.__class
__:
64 return self
._eq
(other
)
66 __hash__
= None # For Py3 compatibility.
68 def __ne__(self
, other
):
70 Compare two nodes for inequality.
72 This calls the method _eq().
74 if self
.__class
__ is not other
.__class
__:
76 return not self
._eq
(other
)
80 Compare two nodes for equality.
82 This is called by __eq__ and __ne__. It is only called if the two nodes
83 have the same type. This must be implemented by the concrete subclass.
84 Nodes should be considered equal if they have the same structure,
85 ignoring the prefix string and other context information.
87 raise NotImplementedError
91 Return a cloned (deep) copy of self.
93 This must be implemented by the concrete subclass.
95 raise NotImplementedError
99 Return a post-order iterator for the tree.
101 This must be implemented by the concrete subclass.
103 raise NotImplementedError
107 Return a pre-order iterator for the tree.
109 This must be implemented by the concrete subclass.
111 raise NotImplementedError
113 def set_prefix(self
, prefix
):
115 Set the prefix for the node (see Leaf class).
117 DEPRECATED; use the prefix property directly.
119 warnings
.warn("set_prefix() is deprecated; use the prefix property",
120 DeprecationWarning, stacklevel
=2)
123 def get_prefix(self
):
125 Return the prefix for the node (see Leaf class).
127 DEPRECATED; use the prefix property directly.
129 warnings
.warn("get_prefix() is deprecated; use the prefix property",
130 DeprecationWarning, stacklevel
=2)
133 def replace(self
, new
):
134 """Replace this node with a new one in the parent."""
135 assert self
.parent
is not None, str(self
)
136 assert new
is not None
137 if not isinstance(new
, list):
141 for ch
in self
.parent
.children
:
143 assert not found
, (self
.parent
.children
, self
, new
)
145 l_children
.extend(new
)
148 l_children
.append(ch
)
149 assert found
, (self
.children
, self
, new
)
150 self
.parent
.changed()
151 self
.parent
.children
= l_children
153 x
.parent
= self
.parent
156 def get_lineno(self
):
157 """Return the line number which generated the invocant node."""
159 while not isinstance(node
, Leaf
):
160 if not node
.children
:
162 node
= node
.children
[0]
167 self
.parent
.changed()
168 self
.was_changed
= True
172 Remove the node from the tree. Returns the position of the node in its
173 parent's children before it was removed.
176 for i
, node
in enumerate(self
.parent
.children
):
178 self
.parent
.changed()
179 del self
.parent
.children
[i
]
184 def next_sibling(self
):
186 The node immediately following the invocant in their parent's children
187 list. If the invocant does not have a next sibling, it is None
189 if self
.parent
is None:
192 # Can't use index(); we need to test by identity
193 for i
, child
in enumerate(self
.parent
.children
):
196 return self
.parent
.children
[i
+1]
201 def prev_sibling(self
):
203 The node immediately preceding the invocant in their parent's children
204 list. If the invocant does not have a previous sibling, it is None.
206 if self
.parent
is None:
209 # Can't use index(); we need to test by identity
210 for i
, child
in enumerate(self
.parent
.children
):
214 return self
.parent
.children
[i
-1]
216 def get_suffix(self
):
218 Return the string immediately following the invocant node. This is
219 effectively equivalent to node.next_sibling.prefix
221 next_sib
= self
.next_sibling
224 return next_sib
.prefix
226 if sys
.version_info
< (3, 0):
228 return unicode(self
).encode("ascii")
233 """Concrete implementation for interior nodes."""
235 def __init__(self
, type, children
, context
=None, prefix
=None):
239 Takes a type constant (a symbol number >= 256), a sequence of
240 child nodes, and an optional context keyword argument.
242 As a side effect, the parent pointers of the children are updated.
244 assert type >= 256, type
246 self
.children
= list(children
)
247 for ch
in self
.children
:
248 assert ch
.parent
is None, repr(ch
)
250 if prefix
is not None:
254 """Return a canonical string representation."""
255 return "%s(%s, %r)" % (self
.__class
__.__name
__,
256 type_repr(self
.type),
259 def __unicode__(self
):
261 Return a pretty string representation.
263 This reproduces the input source exactly.
265 return u
"".join(map(unicode, self
.children
))
267 if sys
.version_info
> (3, 0):
268 __str__
= __unicode__
270 def _eq(self
, other
):
271 """Compare two nodes for equality."""
272 return (self
.type, self
.children
) == (other
.type, other
.children
)
275 """Return a cloned (deep) copy of self."""
276 return Node(self
.type, [ch
.clone() for ch
in self
.children
])
278 def post_order(self
):
279 """Return a post-order iterator for the tree."""
280 for child
in self
.children
:
281 for node
in child
.post_order():
286 """Return a pre-order iterator for the tree."""
288 for child
in self
.children
:
289 for node
in child
.post_order():
295 The whitespace and comments preceding this node in the input.
297 if not self
.children
:
299 return self
.children
[0].prefix
302 def prefix(self
, prefix
):
304 self
.children
[0].prefix
= prefix
306 def set_child(self
, i
, child
):
308 Equivalent to 'node.children[i] = child'. This method also sets the
309 child's parent attribute appropriately.
312 self
.children
[i
].parent
= None
313 self
.children
[i
] = child
316 def insert_child(self
, i
, child
):
318 Equivalent to 'node.children.insert(i, child)'. This method also sets
319 the child's parent attribute appropriately.
322 self
.children
.insert(i
, child
)
325 def append_child(self
, child
):
327 Equivalent to 'node.children.append(child)'. This method also sets the
328 child's parent attribute appropriately.
331 self
.children
.append(child
)
337 """Concrete implementation for leaf nodes."""
339 # Default values for instance variables
340 _prefix
= "" # Whitespace and comments preceding this token in the input
341 lineno
= 0 # Line where this token starts in the input
342 column
= 0 # Column where this token tarts in the input
344 def __init__(self
, type, value
, context
=None, prefix
=None):
348 Takes a type constant (a token number < 256), a string value, and an
349 optional context keyword argument.
351 assert 0 <= type < 256, type
352 if context
is not None:
353 self
._prefix
, (self
.lineno
, self
.column
) = context
356 if prefix
is not None:
357 self
._prefix
= prefix
360 """Return a canonical string representation."""
361 return "%s(%r, %r)" % (self
.__class
__.__name
__,
365 def __unicode__(self
):
367 Return a pretty string representation.
369 This reproduces the input source exactly.
371 return self
.prefix
+ unicode(self
.value
)
373 if sys
.version_info
> (3, 0):
374 __str__
= __unicode__
376 def _eq(self
, other
):
377 """Compare two nodes for equality."""
378 return (self
.type, self
.value
) == (other
.type, other
.value
)
381 """Return a cloned (deep) copy of self."""
382 return Leaf(self
.type, self
.value
,
383 (self
.prefix
, (self
.lineno
, self
.column
)))
385 def post_order(self
):
386 """Return a post-order iterator for the tree."""
390 """Return a pre-order iterator for the tree."""
396 The whitespace and comments preceding this token in the input.
401 def prefix(self
, prefix
):
403 self
._prefix
= prefix
406 def convert(gr
, raw_node
):
408 Convert raw node information to a Node or Leaf instance.
410 This is passed to the parser driver which calls it whenever a reduction of a
411 grammar rule produces a new complete node, so that the tree is build
414 type, value
, context
, children
= raw_node
415 if children
or type in gr
.number2symbol
:
416 # If there's exactly one child, return that child instead of
417 # creating a new node.
418 if len(children
) == 1:
420 return Node(type, children
, context
=context
)
422 return Leaf(type, value
, context
=context
)
425 class BasePattern(object):
428 A pattern is a tree matching pattern.
430 It looks for a specific node type (token or symbol), and
431 optionally for a specific content.
433 This is an abstract base class. There are three concrete
436 - LeafPattern matches a single leaf node;
437 - NodePattern matches a single node (usually non-leaf);
438 - WildcardPattern matches a sequence of nodes of variable length.
441 # Defaults for instance variables
442 type = None # Node type (token if < 256, symbol if >= 256)
443 content
= None # Optional content matching pattern
444 name
= None # Optional name used to store match in results dict
446 def __new__(cls
, *args
, **kwds
):
447 """Constructor that prevents BasePattern from being instantiated."""
448 assert cls
is not BasePattern
, "Cannot instantiate BasePattern"
449 return object.__new
__(cls
)
452 args
= [type_repr(self
.type), self
.content
, self
.name
]
453 while args
and args
[-1] is None:
455 return "%s(%s)" % (self
.__class
__.__name
__, ", ".join(map(repr, args
)))
459 A subclass can define this as a hook for optimizations.
461 Returns either self or another node with the same effect.
465 def match(self
, node
, results
=None):
467 Does this pattern exactly match a node?
469 Returns True if it matches, False if not.
471 If results is not None, it must be a dict which will be
472 updated with the nodes matching named subpatterns.
474 Default implementation for non-wildcard patterns.
476 if self
.type is not None and node
.type != self
.type:
478 if self
.content
is not None:
480 if results
is not None:
482 if not self
._submatch
(node
, r
):
486 if results
is not None and self
.name
:
487 results
[self
.name
] = node
490 def match_seq(self
, nodes
, results
=None):
492 Does this pattern exactly match a sequence of nodes?
494 Default implementation for non-wildcard patterns.
498 return self
.match(nodes
[0], results
)
500 def generate_matches(self
, nodes
):
502 Generator yielding all matches for this pattern.
504 Default implementation for non-wildcard patterns.
507 if nodes
and self
.match(nodes
[0], r
):
511 class LeafPattern(BasePattern
):
513 def __init__(self
, type=None, content
=None, name
=None):
515 Initializer. Takes optional type, content, and name.
517 The type, if given must be a token type (< 256). If not given,
518 this matches any *leaf* node; the content may still be required.
520 The content, if given, must be a string.
522 If a name is given, the matching node is stored in the results
526 assert 0 <= type < 256, type
527 if content
is not None:
528 assert isinstance(content
, basestring
), repr(content
)
530 self
.content
= content
533 def match(self
, node
, results
=None):
534 """Override match() to insist on a leaf node."""
535 if not isinstance(node
, Leaf
):
537 return BasePattern
.match(self
, node
, results
)
539 def _submatch(self
, node
, results
=None):
541 Match the pattern's content to the node's children.
543 This assumes the node type matches and self.content is not None.
545 Returns True if it matches, False if not.
547 If results is not None, it must be a dict which will be
548 updated with the nodes matching named subpatterns.
550 When returning False, the results dict may still be updated.
552 return self
.content
== node
.value
555 class NodePattern(BasePattern
):
559 def __init__(self
, type=None, content
=None, name
=None):
561 Initializer. Takes optional type, content, and name.
563 The type, if given, must be a symbol type (>= 256). If the
564 type is None this matches *any* single node (leaf or not),
565 except if content is not None, in which it only matches
566 non-leaf nodes that also match the content pattern.
568 The content, if not None, must be a sequence of Patterns that
569 must match the node's children exactly. If the content is
570 given, the type must not be None.
572 If a name is given, the matching node is stored in the results
576 assert type >= 256, type
577 if content
is not None:
578 assert not isinstance(content
, basestring
), repr(content
)
579 content
= list(content
)
580 for i
, item
in enumerate(content
):
581 assert isinstance(item
, BasePattern
), (i
, item
)
582 if isinstance(item
, WildcardPattern
):
583 self
.wildcards
= True
585 self
.content
= content
588 def _submatch(self
, node
, results
=None):
590 Match the pattern's content to the node's children.
592 This assumes the node type matches and self.content is not None.
594 Returns True if it matches, False if not.
596 If results is not None, it must be a dict which will be
597 updated with the nodes matching named subpatterns.
599 When returning False, the results dict may still be updated.
602 for c
, r
in generate_matches(self
.content
, node
.children
):
603 if c
== len(node
.children
):
604 if results
is not None:
608 if len(self
.content
) != len(node
.children
):
610 for subpattern
, child
in zip(self
.content
, node
.children
):
611 if not subpattern
.match(child
, results
):
616 class WildcardPattern(BasePattern
):
619 A wildcard pattern can match zero or more nodes.
621 This has all the flexibility needed to implement patterns like:
625 (...)* (...)+ (...)? (...){m,n}
627 except it always uses non-greedy matching.
630 def __init__(self
, content
=None, min=0, max=HUGE
, name
=None):
635 content: optional sequence of subsequences of patterns;
636 if absent, matches one node;
637 if present, each subsequence is an alternative [*]
638 min: optinal minumum number of times to match, default 0
639 max: optional maximum number of times tro match, default HUGE
640 name: optional name assigned to this match
642 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
643 equivalent to (a b c | d e | f g h); if content is None,
644 this is equivalent to '.' in regular expression terms.
645 The min and max parameters work as follows:
646 min=0, max=maxint: .*
647 min=1, max=maxint: .+
650 If content is not None, replace the dot with the parenthesized
651 list of alternatives, e.g. (a b c | d e | f g h)*
653 assert 0 <= min <= max <= HUGE
, (min, max)
654 if content
is not None:
655 content
= tuple(map(tuple, content
)) # Protect against alterations
656 # Check sanity of alternatives
657 assert len(content
), repr(content
) # Can't have zero alternatives
659 assert len(alt
), repr(alt
) # Can have empty alternatives
660 self
.content
= content
666 """Optimize certain stacked wildcard patterns."""
668 if (self
.content
is not None and
669 len(self
.content
) == 1 and len(self
.content
[0]) == 1):
670 subpattern
= self
.content
[0][0]
671 if self
.min == 1 and self
.max == 1:
672 if self
.content
is None:
673 return NodePattern(name
=self
.name
)
674 if subpattern
is not None and self
.name
== subpattern
.name
:
675 return subpattern
.optimize()
676 if (self
.min <= 1 and isinstance(subpattern
, WildcardPattern
) and
677 subpattern
.min <= 1 and self
.name
== subpattern
.name
):
678 return WildcardPattern(subpattern
.content
,
679 self
.min*subpattern
.min,
680 self
.max*subpattern
.max,
684 def match(self
, node
, results
=None):
685 """Does this pattern exactly match a node?"""
686 return self
.match_seq([node
], results
)
688 def match_seq(self
, nodes
, results
=None):
689 """Does this pattern exactly match a sequence of nodes?"""
690 for c
, r
in self
.generate_matches(nodes
):
692 if results
is not None:
695 results
[self
.name
] = list(nodes
)
699 def generate_matches(self
, nodes
):
701 Generator yielding matches for a sequence of nodes.
704 nodes: sequence of nodes
707 (count, results) tuples where:
708 count: the match comprises nodes[:count];
709 results: dict containing named submatches.
711 if self
.content
is None:
712 # Shortcut for special case (see __init__.__doc__)
713 for count
in xrange(self
.min, 1 + min(len(nodes
), self
.max)):
716 r
[self
.name
] = nodes
[:count
]
718 elif self
.name
== "bare_name":
719 yield self
._bare
_name
_matches
(nodes
)
721 # The reason for this is that hitting the recursion limit usually
722 # results in some ugly messages about how RuntimeErrors are being
724 save_stderr
= sys
.stderr
725 sys
.stderr
= StringIO()
727 for count
, r
in self
._recursive
_matches
(nodes
, 0):
729 r
[self
.name
] = nodes
[:count
]
732 # We fall back to the iterative pattern matching scheme if the recursive
733 # scheme hits the recursion limit.
734 for count
, r
in self
._iterative
_matches
(nodes
):
736 r
[self
.name
] = nodes
[:count
]
739 sys
.stderr
= save_stderr
741 def _iterative_matches(self
, nodes
):
742 """Helper to iteratively yield the matches."""
748 # generate matches that use just one alt from self.content
749 for alt
in self
.content
:
750 for c
, r
in generate_matches(alt
, nodes
):
752 results
.append((c
, r
))
754 # for each match, iterate down the nodes
757 for c0
, r0
in results
:
758 # stop if the entire set of nodes has been matched
759 if c0
< nodelen
and c0
<= self
.max:
760 for alt
in self
.content
:
761 for c1
, r1
in generate_matches(alt
, nodes
[c0
:]):
767 new_results
.append((c0
+ c1
, r
))
768 results
= new_results
770 def _bare_name_matches(self
, nodes
):
771 """Special optimized matcher for bare_name."""
776 while not done
and count
< max:
778 for leaf
in self
.content
:
779 if leaf
[0].match(nodes
[count
], r
):
783 r
[self
.name
] = nodes
[:count
]
786 def _recursive_matches(self
, nodes
, count
):
787 """Helper to recursively yield the matches."""
788 assert self
.content
is not None
789 if count
>= self
.min:
792 for alt
in self
.content
:
793 for c0
, r0
in generate_matches(alt
, nodes
):
794 for c1
, r1
in self
._recursive
_matches
(nodes
[c0
:], count
+1):
801 class NegatedPattern(BasePattern
):
803 def __init__(self
, content
=None):
807 The argument is either a pattern or None. If it is None, this
808 only matches an empty sequence (effectively '$' in regex
809 lingo). If it is not None, this matches whenever the argument
810 pattern doesn't have any matches.
812 if content
is not None:
813 assert isinstance(content
, BasePattern
), repr(content
)
814 self
.content
= content
816 def match(self
, node
):
817 # We never match a node in its entirety
820 def match_seq(self
, nodes
):
821 # We only match an empty sequence of nodes in its entirety
822 return len(nodes
) == 0
824 def generate_matches(self
, nodes
):
825 if self
.content
is None:
826 # Return a match if there is an empty sequence
830 # Return a match if the argument pattern has no matches
831 for c
, r
in self
.content
.generate_matches(nodes
):
836 def generate_matches(patterns
, nodes
):
838 Generator yielding matches for a sequence of patterns and nodes.
841 patterns: a sequence of patterns
842 nodes: a sequence of nodes
845 (count, results) tuples where:
846 count: the entire sequence of patterns matches nodes[:count];
847 results: dict containing named submatches.
852 p
, rest
= patterns
[0], patterns
[1:]
853 for c0
, r0
in p
.generate_matches(nodes
):
857 for c1
, r1
in generate_matches(rest
, nodes
[c0
:]):