Issue #1515: Enable use of deepcopy() with instance methods. Patch by Robert Collins.
[python.git] / Lib / lib2to3 / pytree.py
blob27cdd72835007b659decac3ebde09363012fbc1b
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """
5 Python parse tree definitions.
7 This is a very concrete parse tree; we need to keep every token and
8 even the comments and whitespace between tokens.
10 There's also a pattern matching implementation here.
11 """
13 __author__ = "Guido van Rossum <guido@python.org>"
15 import sys
16 import warnings
17 from StringIO import StringIO
20 HUGE = 0x7FFFFFFF # maximum repeat count, default max
22 _type_reprs = {}
23 def type_repr(type_num):
24 global _type_reprs
25 if not _type_reprs:
26 from .pygram import python_symbols
27 # printing tokens is possible but not as useful
28 # from .pgen2 import token // token.__dict__.items():
29 for name, val in python_symbols.__dict__.items():
30 if type(val) == int: _type_reprs[val] = name
31 return _type_reprs.setdefault(type_num, type_num)
34 class Base(object):
36 """
37 Abstract base class for Node and Leaf.
39 This provides some default functionality and boilerplate using the
40 template pattern.
42 A node may be a subnode of at most one parent.
43 """
45 # Default values for instance variables
46 type = None # int: token number (< 256) or symbol number (>= 256)
47 parent = None # Parent node pointer, or None
48 children = () # Tuple of subnodes
49 was_changed = False
51 def __new__(cls, *args, **kwds):
52 """Constructor that prevents Base from being instantiated."""
53 assert cls is not Base, "Cannot instantiate Base"
54 return object.__new__(cls)
56 def __eq__(self, other):
57 """
58 Compare two nodes for equality.
60 This calls the method _eq().
61 """
62 if self.__class__ is not other.__class__:
63 return NotImplemented
64 return self._eq(other)
66 __hash__ = None # For Py3 compatibility.
68 def __ne__(self, other):
69 """
70 Compare two nodes for inequality.
72 This calls the method _eq().
73 """
74 if self.__class__ is not other.__class__:
75 return NotImplemented
76 return not self._eq(other)
78 def _eq(self, other):
79 """
80 Compare two nodes for equality.
82 This is called by __eq__ and __ne__. It is only called if the two nodes
83 have the same type. This must be implemented by the concrete subclass.
84 Nodes should be considered equal if they have the same structure,
85 ignoring the prefix string and other context information.
86 """
87 raise NotImplementedError
89 def clone(self):
90 """
91 Return a cloned (deep) copy of self.
93 This must be implemented by the concrete subclass.
94 """
95 raise NotImplementedError
97 def post_order(self):
98 """
99 Return a post-order iterator for the tree.
101 This must be implemented by the concrete subclass.
103 raise NotImplementedError
105 def pre_order(self):
107 Return a pre-order iterator for the tree.
109 This must be implemented by the concrete subclass.
111 raise NotImplementedError
113 def set_prefix(self, prefix):
115 Set the prefix for the node (see Leaf class).
117 DEPRECATED; use the prefix property directly.
119 warnings.warn("set_prefix() is deprecated; use the prefix property",
120 DeprecationWarning, stacklevel=2)
121 self.prefix = prefix
123 def get_prefix(self):
125 Return the prefix for the node (see Leaf class).
127 DEPRECATED; use the prefix property directly.
129 warnings.warn("get_prefix() is deprecated; use the prefix property",
130 DeprecationWarning, stacklevel=2)
131 return self.prefix
133 def replace(self, new):
134 """Replace this node with a new one in the parent."""
135 assert self.parent is not None, str(self)
136 assert new is not None
137 if not isinstance(new, list):
138 new = [new]
139 l_children = []
140 found = False
141 for ch in self.parent.children:
142 if ch is self:
143 assert not found, (self.parent.children, self, new)
144 if new is not None:
145 l_children.extend(new)
146 found = True
147 else:
148 l_children.append(ch)
149 assert found, (self.children, self, new)
150 self.parent.changed()
151 self.parent.children = l_children
152 for x in new:
153 x.parent = self.parent
154 self.parent = None
156 def get_lineno(self):
157 """Return the line number which generated the invocant node."""
158 node = self
159 while not isinstance(node, Leaf):
160 if not node.children:
161 return
162 node = node.children[0]
163 return node.lineno
165 def changed(self):
166 if self.parent:
167 self.parent.changed()
168 self.was_changed = True
170 def remove(self):
172 Remove the node from the tree. Returns the position of the node in its
173 parent's children before it was removed.
175 if self.parent:
176 for i, node in enumerate(self.parent.children):
177 if node is self:
178 self.parent.changed()
179 del self.parent.children[i]
180 self.parent = None
181 return i
183 @property
184 def next_sibling(self):
186 The node immediately following the invocant in their parent's children
187 list. If the invocant does not have a next sibling, it is None
189 if self.parent is None:
190 return None
192 # Can't use index(); we need to test by identity
193 for i, child in enumerate(self.parent.children):
194 if child is self:
195 try:
196 return self.parent.children[i+1]
197 except IndexError:
198 return None
200 @property
201 def prev_sibling(self):
203 The node immediately preceding the invocant in their parent's children
204 list. If the invocant does not have a previous sibling, it is None.
206 if self.parent is None:
207 return None
209 # Can't use index(); we need to test by identity
210 for i, child in enumerate(self.parent.children):
211 if child is self:
212 if i == 0:
213 return None
214 return self.parent.children[i-1]
216 def get_suffix(self):
218 Return the string immediately following the invocant node. This is
219 effectively equivalent to node.next_sibling.prefix
221 next_sib = self.next_sibling
222 if next_sib is None:
223 return u""
224 return next_sib.prefix
226 if sys.version_info < (3, 0):
227 def __str__(self):
228 return unicode(self).encode("ascii")
231 class Node(Base):
233 """Concrete implementation for interior nodes."""
235 def __init__(self, type, children, context=None, prefix=None):
237 Initializer.
239 Takes a type constant (a symbol number >= 256), a sequence of
240 child nodes, and an optional context keyword argument.
242 As a side effect, the parent pointers of the children are updated.
244 assert type >= 256, type
245 self.type = type
246 self.children = list(children)
247 for ch in self.children:
248 assert ch.parent is None, repr(ch)
249 ch.parent = self
250 if prefix is not None:
251 self.prefix = prefix
253 def __repr__(self):
254 """Return a canonical string representation."""
255 return "%s(%s, %r)" % (self.__class__.__name__,
256 type_repr(self.type),
257 self.children)
259 def __unicode__(self):
261 Return a pretty string representation.
263 This reproduces the input source exactly.
265 return u"".join(map(unicode, self.children))
267 if sys.version_info > (3, 0):
268 __str__ = __unicode__
270 def _eq(self, other):
271 """Compare two nodes for equality."""
272 return (self.type, self.children) == (other.type, other.children)
274 def clone(self):
275 """Return a cloned (deep) copy of self."""
276 return Node(self.type, [ch.clone() for ch in self.children])
278 def post_order(self):
279 """Return a post-order iterator for the tree."""
280 for child in self.children:
281 for node in child.post_order():
282 yield node
283 yield self
285 def pre_order(self):
286 """Return a pre-order iterator for the tree."""
287 yield self
288 for child in self.children:
289 for node in child.post_order():
290 yield node
292 @property
293 def prefix(self):
295 The whitespace and comments preceding this node in the input.
297 if not self.children:
298 return ""
299 return self.children[0].prefix
301 @prefix.setter
302 def prefix(self, prefix):
303 if self.children:
304 self.children[0].prefix = prefix
306 def set_child(self, i, child):
308 Equivalent to 'node.children[i] = child'. This method also sets the
309 child's parent attribute appropriately.
311 child.parent = self
312 self.children[i].parent = None
313 self.children[i] = child
314 self.changed()
316 def insert_child(self, i, child):
318 Equivalent to 'node.children.insert(i, child)'. This method also sets
319 the child's parent attribute appropriately.
321 child.parent = self
322 self.children.insert(i, child)
323 self.changed()
325 def append_child(self, child):
327 Equivalent to 'node.children.append(child)'. This method also sets the
328 child's parent attribute appropriately.
330 child.parent = self
331 self.children.append(child)
332 self.changed()
335 class Leaf(Base):
337 """Concrete implementation for leaf nodes."""
339 # Default values for instance variables
340 _prefix = "" # Whitespace and comments preceding this token in the input
341 lineno = 0 # Line where this token starts in the input
342 column = 0 # Column where this token tarts in the input
344 def __init__(self, type, value, context=None, prefix=None):
346 Initializer.
348 Takes a type constant (a token number < 256), a string value, and an
349 optional context keyword argument.
351 assert 0 <= type < 256, type
352 if context is not None:
353 self._prefix, (self.lineno, self.column) = context
354 self.type = type
355 self.value = value
356 if prefix is not None:
357 self._prefix = prefix
359 def __repr__(self):
360 """Return a canonical string representation."""
361 return "%s(%r, %r)" % (self.__class__.__name__,
362 self.type,
363 self.value)
365 def __unicode__(self):
367 Return a pretty string representation.
369 This reproduces the input source exactly.
371 return self.prefix + unicode(self.value)
373 if sys.version_info > (3, 0):
374 __str__ = __unicode__
376 def _eq(self, other):
377 """Compare two nodes for equality."""
378 return (self.type, self.value) == (other.type, other.value)
380 def clone(self):
381 """Return a cloned (deep) copy of self."""
382 return Leaf(self.type, self.value,
383 (self.prefix, (self.lineno, self.column)))
385 def post_order(self):
386 """Return a post-order iterator for the tree."""
387 yield self
389 def pre_order(self):
390 """Return a pre-order iterator for the tree."""
391 yield self
393 @property
394 def prefix(self):
396 The whitespace and comments preceding this token in the input.
398 return self._prefix
400 @prefix.setter
401 def prefix(self, prefix):
402 self.changed()
403 self._prefix = prefix
406 def convert(gr, raw_node):
408 Convert raw node information to a Node or Leaf instance.
410 This is passed to the parser driver which calls it whenever a reduction of a
411 grammar rule produces a new complete node, so that the tree is build
412 strictly bottom-up.
414 type, value, context, children = raw_node
415 if children or type in gr.number2symbol:
416 # If there's exactly one child, return that child instead of
417 # creating a new node.
418 if len(children) == 1:
419 return children[0]
420 return Node(type, children, context=context)
421 else:
422 return Leaf(type, value, context=context)
425 class BasePattern(object):
428 A pattern is a tree matching pattern.
430 It looks for a specific node type (token or symbol), and
431 optionally for a specific content.
433 This is an abstract base class. There are three concrete
434 subclasses:
436 - LeafPattern matches a single leaf node;
437 - NodePattern matches a single node (usually non-leaf);
438 - WildcardPattern matches a sequence of nodes of variable length.
441 # Defaults for instance variables
442 type = None # Node type (token if < 256, symbol if >= 256)
443 content = None # Optional content matching pattern
444 name = None # Optional name used to store match in results dict
446 def __new__(cls, *args, **kwds):
447 """Constructor that prevents BasePattern from being instantiated."""
448 assert cls is not BasePattern, "Cannot instantiate BasePattern"
449 return object.__new__(cls)
451 def __repr__(self):
452 args = [type_repr(self.type), self.content, self.name]
453 while args and args[-1] is None:
454 del args[-1]
455 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
457 def optimize(self):
459 A subclass can define this as a hook for optimizations.
461 Returns either self or another node with the same effect.
463 return self
465 def match(self, node, results=None):
467 Does this pattern exactly match a node?
469 Returns True if it matches, False if not.
471 If results is not None, it must be a dict which will be
472 updated with the nodes matching named subpatterns.
474 Default implementation for non-wildcard patterns.
476 if self.type is not None and node.type != self.type:
477 return False
478 if self.content is not None:
479 r = None
480 if results is not None:
481 r = {}
482 if not self._submatch(node, r):
483 return False
484 if r:
485 results.update(r)
486 if results is not None and self.name:
487 results[self.name] = node
488 return True
490 def match_seq(self, nodes, results=None):
492 Does this pattern exactly match a sequence of nodes?
494 Default implementation for non-wildcard patterns.
496 if len(nodes) != 1:
497 return False
498 return self.match(nodes[0], results)
500 def generate_matches(self, nodes):
502 Generator yielding all matches for this pattern.
504 Default implementation for non-wildcard patterns.
506 r = {}
507 if nodes and self.match(nodes[0], r):
508 yield 1, r
511 class LeafPattern(BasePattern):
513 def __init__(self, type=None, content=None, name=None):
515 Initializer. Takes optional type, content, and name.
517 The type, if given must be a token type (< 256). If not given,
518 this matches any *leaf* node; the content may still be required.
520 The content, if given, must be a string.
522 If a name is given, the matching node is stored in the results
523 dict under that key.
525 if type is not None:
526 assert 0 <= type < 256, type
527 if content is not None:
528 assert isinstance(content, basestring), repr(content)
529 self.type = type
530 self.content = content
531 self.name = name
533 def match(self, node, results=None):
534 """Override match() to insist on a leaf node."""
535 if not isinstance(node, Leaf):
536 return False
537 return BasePattern.match(self, node, results)
539 def _submatch(self, node, results=None):
541 Match the pattern's content to the node's children.
543 This assumes the node type matches and self.content is not None.
545 Returns True if it matches, False if not.
547 If results is not None, it must be a dict which will be
548 updated with the nodes matching named subpatterns.
550 When returning False, the results dict may still be updated.
552 return self.content == node.value
555 class NodePattern(BasePattern):
557 wildcards = False
559 def __init__(self, type=None, content=None, name=None):
561 Initializer. Takes optional type, content, and name.
563 The type, if given, must be a symbol type (>= 256). If the
564 type is None this matches *any* single node (leaf or not),
565 except if content is not None, in which it only matches
566 non-leaf nodes that also match the content pattern.
568 The content, if not None, must be a sequence of Patterns that
569 must match the node's children exactly. If the content is
570 given, the type must not be None.
572 If a name is given, the matching node is stored in the results
573 dict under that key.
575 if type is not None:
576 assert type >= 256, type
577 if content is not None:
578 assert not isinstance(content, basestring), repr(content)
579 content = list(content)
580 for i, item in enumerate(content):
581 assert isinstance(item, BasePattern), (i, item)
582 if isinstance(item, WildcardPattern):
583 self.wildcards = True
584 self.type = type
585 self.content = content
586 self.name = name
588 def _submatch(self, node, results=None):
590 Match the pattern's content to the node's children.
592 This assumes the node type matches and self.content is not None.
594 Returns True if it matches, False if not.
596 If results is not None, it must be a dict which will be
597 updated with the nodes matching named subpatterns.
599 When returning False, the results dict may still be updated.
601 if self.wildcards:
602 for c, r in generate_matches(self.content, node.children):
603 if c == len(node.children):
604 if results is not None:
605 results.update(r)
606 return True
607 return False
608 if len(self.content) != len(node.children):
609 return False
610 for subpattern, child in zip(self.content, node.children):
611 if not subpattern.match(child, results):
612 return False
613 return True
616 class WildcardPattern(BasePattern):
619 A wildcard pattern can match zero or more nodes.
621 This has all the flexibility needed to implement patterns like:
623 .* .+ .? .{m,n}
624 (a b c | d e | f)
625 (...)* (...)+ (...)? (...){m,n}
627 except it always uses non-greedy matching.
630 def __init__(self, content=None, min=0, max=HUGE, name=None):
632 Initializer.
634 Args:
635 content: optional sequence of subsequences of patterns;
636 if absent, matches one node;
637 if present, each subsequence is an alternative [*]
638 min: optinal minumum number of times to match, default 0
639 max: optional maximum number of times tro match, default HUGE
640 name: optional name assigned to this match
642 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
643 equivalent to (a b c | d e | f g h); if content is None,
644 this is equivalent to '.' in regular expression terms.
645 The min and max parameters work as follows:
646 min=0, max=maxint: .*
647 min=1, max=maxint: .+
648 min=0, max=1: .?
649 min=1, max=1: .
650 If content is not None, replace the dot with the parenthesized
651 list of alternatives, e.g. (a b c | d e | f g h)*
653 assert 0 <= min <= max <= HUGE, (min, max)
654 if content is not None:
655 content = tuple(map(tuple, content)) # Protect against alterations
656 # Check sanity of alternatives
657 assert len(content), repr(content) # Can't have zero alternatives
658 for alt in content:
659 assert len(alt), repr(alt) # Can have empty alternatives
660 self.content = content
661 self.min = min
662 self.max = max
663 self.name = name
665 def optimize(self):
666 """Optimize certain stacked wildcard patterns."""
667 subpattern = None
668 if (self.content is not None and
669 len(self.content) == 1 and len(self.content[0]) == 1):
670 subpattern = self.content[0][0]
671 if self.min == 1 and self.max == 1:
672 if self.content is None:
673 return NodePattern(name=self.name)
674 if subpattern is not None and self.name == subpattern.name:
675 return subpattern.optimize()
676 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
677 subpattern.min <= 1 and self.name == subpattern.name):
678 return WildcardPattern(subpattern.content,
679 self.min*subpattern.min,
680 self.max*subpattern.max,
681 subpattern.name)
682 return self
684 def match(self, node, results=None):
685 """Does this pattern exactly match a node?"""
686 return self.match_seq([node], results)
688 def match_seq(self, nodes, results=None):
689 """Does this pattern exactly match a sequence of nodes?"""
690 for c, r in self.generate_matches(nodes):
691 if c == len(nodes):
692 if results is not None:
693 results.update(r)
694 if self.name:
695 results[self.name] = list(nodes)
696 return True
697 return False
699 def generate_matches(self, nodes):
701 Generator yielding matches for a sequence of nodes.
703 Args:
704 nodes: sequence of nodes
706 Yields:
707 (count, results) tuples where:
708 count: the match comprises nodes[:count];
709 results: dict containing named submatches.
711 if self.content is None:
712 # Shortcut for special case (see __init__.__doc__)
713 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
714 r = {}
715 if self.name:
716 r[self.name] = nodes[:count]
717 yield count, r
718 elif self.name == "bare_name":
719 yield self._bare_name_matches(nodes)
720 else:
721 # The reason for this is that hitting the recursion limit usually
722 # results in some ugly messages about how RuntimeErrors are being
723 # ignored.
724 save_stderr = sys.stderr
725 sys.stderr = StringIO()
726 try:
727 for count, r in self._recursive_matches(nodes, 0):
728 if self.name:
729 r[self.name] = nodes[:count]
730 yield count, r
731 except RuntimeError:
732 # We fall back to the iterative pattern matching scheme if the recursive
733 # scheme hits the recursion limit.
734 for count, r in self._iterative_matches(nodes):
735 if self.name:
736 r[self.name] = nodes[:count]
737 yield count, r
738 finally:
739 sys.stderr = save_stderr
741 def _iterative_matches(self, nodes):
742 """Helper to iteratively yield the matches."""
743 nodelen = len(nodes)
744 if 0 >= self.min:
745 yield 0, {}
747 results = []
748 # generate matches that use just one alt from self.content
749 for alt in self.content:
750 for c, r in generate_matches(alt, nodes):
751 yield c, r
752 results.append((c, r))
754 # for each match, iterate down the nodes
755 while results:
756 new_results = []
757 for c0, r0 in results:
758 # stop if the entire set of nodes has been matched
759 if c0 < nodelen and c0 <= self.max:
760 for alt in self.content:
761 for c1, r1 in generate_matches(alt, nodes[c0:]):
762 if c1 > 0:
763 r = {}
764 r.update(r0)
765 r.update(r1)
766 yield c0 + c1, r
767 new_results.append((c0 + c1, r))
768 results = new_results
770 def _bare_name_matches(self, nodes):
771 """Special optimized matcher for bare_name."""
772 count = 0
773 r = {}
774 done = False
775 max = len(nodes)
776 while not done and count < max:
777 done = True
778 for leaf in self.content:
779 if leaf[0].match(nodes[count], r):
780 count += 1
781 done = False
782 break
783 r[self.name] = nodes[:count]
784 return count, r
786 def _recursive_matches(self, nodes, count):
787 """Helper to recursively yield the matches."""
788 assert self.content is not None
789 if count >= self.min:
790 yield 0, {}
791 if count < self.max:
792 for alt in self.content:
793 for c0, r0 in generate_matches(alt, nodes):
794 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
795 r = {}
796 r.update(r0)
797 r.update(r1)
798 yield c0 + c1, r
801 class NegatedPattern(BasePattern):
803 def __init__(self, content=None):
805 Initializer.
807 The argument is either a pattern or None. If it is None, this
808 only matches an empty sequence (effectively '$' in regex
809 lingo). If it is not None, this matches whenever the argument
810 pattern doesn't have any matches.
812 if content is not None:
813 assert isinstance(content, BasePattern), repr(content)
814 self.content = content
816 def match(self, node):
817 # We never match a node in its entirety
818 return False
820 def match_seq(self, nodes):
821 # We only match an empty sequence of nodes in its entirety
822 return len(nodes) == 0
824 def generate_matches(self, nodes):
825 if self.content is None:
826 # Return a match if there is an empty sequence
827 if len(nodes) == 0:
828 yield 0, {}
829 else:
830 # Return a match if the argument pattern has no matches
831 for c, r in self.content.generate_matches(nodes):
832 return
833 yield 0, {}
836 def generate_matches(patterns, nodes):
838 Generator yielding matches for a sequence of patterns and nodes.
840 Args:
841 patterns: a sequence of patterns
842 nodes: a sequence of nodes
844 Yields:
845 (count, results) tuples where:
846 count: the entire sequence of patterns matches nodes[:count];
847 results: dict containing named submatches.
849 if not patterns:
850 yield 0, {}
851 else:
852 p, rest = patterns[0], patterns[1:]
853 for c0, r0 in p.generate_matches(nodes):
854 if not rest:
855 yield c0, r0
856 else:
857 for c1, r1 in generate_matches(rest, nodes[c0:]):
858 r = {}
859 r.update(r0)
860 r.update(r1)
861 yield c0 + c1, r