Enable doctest running for several other documents.
[python.git] / Lib / lib2to3 / pytree.py
blob8b6780afdb1bf2ef8af878b4c9c15cd1adf793f6
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Python parse tree definitions.
6 This is a very concrete parse tree; we need to keep every token and
7 even the comments and whitespace between tokens.
9 There's also a pattern matching implementation here.
10 """
12 __author__ = "Guido van Rossum <guido@python.org>"
15 HUGE = 0x7FFFFFFF # maximum repeat count, default max
17 _type_reprs = {}
18 def type_repr(type_num):
19 global _type_reprs
20 if not _type_reprs:
21 from .pygram import python_symbols
22 # printing tokens is possible but not as useful
23 # from .pgen2 import token // token.__dict__.items():
24 for name, val in python_symbols.__dict__.items():
25 if type(val) == int: _type_reprs[val] = name
26 return _type_reprs.setdefault(type_num, type_num)
29 class Base(object):
31 """Abstract base class for Node and Leaf.
33 This provides some default functionality and boilerplate using the
34 template pattern.
36 A node may be a subnode of at most one parent.
37 """
39 # Default values for instance variables
40 type = None # int: token number (< 256) or symbol number (>= 256)
41 parent = None # Parent node pointer, or None
42 children = () # Tuple of subnodes
43 was_changed = False
45 def __new__(cls, *args, **kwds):
46 """Constructor that prevents Base from being instantiated."""
47 assert cls is not Base, "Cannot instantiate Base"
48 return object.__new__(cls)
50 def __eq__(self, other):
51 """Compares two nodes for equality.
53 This calls the method _eq().
54 """
55 if self.__class__ is not other.__class__:
56 return NotImplemented
57 return self._eq(other)
59 def __ne__(self, other):
60 """Compares two nodes for inequality.
62 This calls the method _eq().
63 """
64 if self.__class__ is not other.__class__:
65 return NotImplemented
66 return not self._eq(other)
68 def _eq(self, other):
69 """Compares two nodes for equality.
71 This is called by __eq__ and __ne__. It is only called if the
72 two nodes have the same type. This must be implemented by the
73 concrete subclass. Nodes should be considered equal if they
74 have the same structure, ignoring the prefix string and other
75 context information.
76 """
77 raise NotImplementedError
79 def clone(self):
80 """Returns a cloned (deep) copy of self.
82 This must be implemented by the concrete subclass.
83 """
84 raise NotImplementedError
86 def post_order(self):
87 """Returns a post-order iterator for the tree.
89 This must be implemented by the concrete subclass.
90 """
91 raise NotImplementedError
93 def pre_order(self):
94 """Returns a pre-order iterator for the tree.
96 This must be implemented by the concrete subclass.
97 """
98 raise NotImplementedError
100 def set_prefix(self, prefix):
101 """Sets the prefix for the node (see Leaf class).
103 This must be implemented by the concrete subclass.
105 raise NotImplementedError
107 def get_prefix(self):
108 """Returns the prefix for the node (see Leaf class).
110 This must be implemented by the concrete subclass.
112 raise NotImplementedError
114 def replace(self, new):
115 """Replaces this node with a new one in the parent."""
116 assert self.parent is not None, str(self)
117 assert new is not None
118 if not isinstance(new, list):
119 new = [new]
120 l_children = []
121 found = False
122 for ch in self.parent.children:
123 if ch is self:
124 assert not found, (self.parent.children, self, new)
125 if new is not None:
126 l_children.extend(new)
127 found = True
128 else:
129 l_children.append(ch)
130 assert found, (self.children, self, new)
131 self.parent.changed()
132 self.parent.children = l_children
133 for x in new:
134 x.parent = self.parent
135 self.parent = None
137 def get_lineno(self):
138 """Returns the line number which generated the invocant node."""
139 node = self
140 while not isinstance(node, Leaf):
141 if not node.children:
142 return
143 node = node.children[0]
144 return node.lineno
146 def changed(self):
147 if self.parent:
148 self.parent.changed()
149 self.was_changed = True
151 def remove(self):
152 """Remove the node from the tree. Returns the position of the node
153 in its parent's children before it was removed."""
154 if self.parent:
155 for i, node in enumerate(self.parent.children):
156 if node is self:
157 self.parent.changed()
158 del self.parent.children[i]
159 self.parent = None
160 return i
162 def get_next_sibling(self):
163 """Return the node immediately following the invocant in their
164 parent's children list. If the invocant does not have a next
165 sibling, return None."""
166 if self.parent is None:
167 return None
169 # Can't use index(); we need to test by identity
170 for i, sibling in enumerate(self.parent.children):
171 if sibling is self:
172 try:
173 return self.parent.children[i+1]
174 except IndexError:
175 return None
177 def get_suffix(self):
178 """Return the string immediately following the invocant node. This
179 is effectively equivalent to node.get_next_sibling().get_prefix()"""
180 next_sib = self.get_next_sibling()
181 if next_sib is None:
182 return ""
183 return next_sib.get_prefix()
186 class Node(Base):
188 """Concrete implementation for interior nodes."""
190 def __init__(self, type, children, context=None, prefix=None):
191 """Initializer.
193 Takes a type constant (a symbol number >= 256), a sequence of
194 child nodes, and an optional context keyword argument.
196 As a side effect, the parent pointers of the children are updated.
198 assert type >= 256, type
199 self.type = type
200 self.children = list(children)
201 for ch in self.children:
202 assert ch.parent is None, repr(ch)
203 ch.parent = self
204 if prefix is not None:
205 self.set_prefix(prefix)
207 def __repr__(self):
208 """Returns a canonical string representation."""
209 return "%s(%s, %r)" % (self.__class__.__name__,
210 type_repr(self.type),
211 self.children)
213 def __str__(self):
214 """Returns a pretty string representation.
216 This reproduces the input source exactly.
218 return "".join(map(str, self.children))
220 def _eq(self, other):
221 """Compares two nodes for equality."""
222 return (self.type, self.children) == (other.type, other.children)
224 def clone(self):
225 """Returns a cloned (deep) copy of self."""
226 return Node(self.type, [ch.clone() for ch in self.children])
228 def post_order(self):
229 """Returns a post-order iterator for the tree."""
230 for child in self.children:
231 for node in child.post_order():
232 yield node
233 yield self
235 def pre_order(self):
236 """Returns a pre-order iterator for the tree."""
237 yield self
238 for child in self.children:
239 for node in child.post_order():
240 yield node
242 def set_prefix(self, prefix):
243 """Sets the prefix for the node.
245 This passes the responsibility on to the first child.
247 if self.children:
248 self.children[0].set_prefix(prefix)
250 def get_prefix(self):
251 """Returns the prefix for the node.
253 This passes the call on to the first child.
255 if not self.children:
256 return ""
257 return self.children[0].get_prefix()
259 def set_child(self, i, child):
260 """Equivalent to 'node.children[i] = child'. This method also sets the
261 child's parent attribute appropriately."""
262 child.parent = self
263 self.children[i].parent = None
264 self.children[i] = child
266 def insert_child(self, i, child):
267 """Equivalent to 'node.children.insert(i, child)'. This method also
268 sets the child's parent attribute appropriately."""
269 child.parent = self
270 self.children.insert(i, child)
272 def append_child(self, child):
273 """Equivalent to 'node.children.append(child)'. This method also
274 sets the child's parent attribute appropriately."""
275 child.parent = self
276 self.children.append(child)
279 class Leaf(Base):
281 """Concrete implementation for leaf nodes."""
283 # Default values for instance variables
284 prefix = "" # Whitespace and comments preceding this token in the input
285 lineno = 0 # Line where this token starts in the input
286 column = 0 # Column where this token tarts in the input
288 def __init__(self, type, value, context=None, prefix=None):
289 """Initializer.
291 Takes a type constant (a token number < 256), a string value,
292 and an optional context keyword argument.
294 assert 0 <= type < 256, type
295 if context is not None:
296 self.prefix, (self.lineno, self.column) = context
297 self.type = type
298 self.value = value
299 if prefix is not None:
300 self.prefix = prefix
302 def __repr__(self):
303 """Returns a canonical string representation."""
304 return "%s(%r, %r)" % (self.__class__.__name__,
305 self.type,
306 self.value)
308 def __str__(self):
309 """Returns a pretty string representation.
311 This reproduces the input source exactly.
313 return self.prefix + str(self.value)
315 def _eq(self, other):
316 """Compares two nodes for equality."""
317 return (self.type, self.value) == (other.type, other.value)
319 def clone(self):
320 """Returns a cloned (deep) copy of self."""
321 return Leaf(self.type, self.value,
322 (self.prefix, (self.lineno, self.column)))
324 def post_order(self):
325 """Returns a post-order iterator for the tree."""
326 yield self
328 def pre_order(self):
329 """Returns a pre-order iterator for the tree."""
330 yield self
332 def set_prefix(self, prefix):
333 """Sets the prefix for the node."""
334 self.changed()
335 self.prefix = prefix
337 def get_prefix(self):
338 """Returns the prefix for the node."""
339 return self.prefix
342 def convert(gr, raw_node):
343 """Converts raw node information to a Node or Leaf instance.
345 This is passed to the parser driver which calls it whenever a
346 reduction of a grammar rule produces a new complete node, so that
347 the tree is build strictly bottom-up.
349 type, value, context, children = raw_node
350 if children or type in gr.number2symbol:
351 # If there's exactly one child, return that child instead of
352 # creating a new node.
353 if len(children) == 1:
354 return children[0]
355 return Node(type, children, context=context)
356 else:
357 return Leaf(type, value, context=context)
360 class BasePattern(object):
362 """A pattern is a tree matching pattern.
364 It looks for a specific node type (token or symbol), and
365 optionally for a specific content.
367 This is an abstract base class. There are three concrete
368 subclasses:
370 - LeafPattern matches a single leaf node;
371 - NodePattern matches a single node (usually non-leaf);
372 - WildcardPattern matches a sequence of nodes of variable length.
375 # Defaults for instance variables
376 type = None # Node type (token if < 256, symbol if >= 256)
377 content = None # Optional content matching pattern
378 name = None # Optional name used to store match in results dict
380 def __new__(cls, *args, **kwds):
381 """Constructor that prevents BasePattern from being instantiated."""
382 assert cls is not BasePattern, "Cannot instantiate BasePattern"
383 return object.__new__(cls)
385 def __repr__(self):
386 args = [type_repr(self.type), self.content, self.name]
387 while args and args[-1] is None:
388 del args[-1]
389 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
391 def optimize(self):
392 """A subclass can define this as a hook for optimizations.
394 Returns either self or another node with the same effect.
396 return self
398 def match(self, node, results=None):
399 """Does this pattern exactly match a node?
401 Returns True if it matches, False if not.
403 If results is not None, it must be a dict which will be
404 updated with the nodes matching named subpatterns.
406 Default implementation for non-wildcard patterns.
408 if self.type is not None and node.type != self.type:
409 return False
410 if self.content is not None:
411 r = None
412 if results is not None:
413 r = {}
414 if not self._submatch(node, r):
415 return False
416 if r:
417 results.update(r)
418 if results is not None and self.name:
419 results[self.name] = node
420 return True
422 def match_seq(self, nodes, results=None):
423 """Does this pattern exactly match a sequence of nodes?
425 Default implementation for non-wildcard patterns.
427 if len(nodes) != 1:
428 return False
429 return self.match(nodes[0], results)
431 def generate_matches(self, nodes):
432 """Generator yielding all matches for this pattern.
434 Default implementation for non-wildcard patterns.
436 r = {}
437 if nodes and self.match(nodes[0], r):
438 yield 1, r
441 class LeafPattern(BasePattern):
443 def __init__(self, type=None, content=None, name=None):
444 """Initializer. Takes optional type, content, and name.
446 The type, if given must be a token type (< 256). If not given,
447 this matches any *leaf* node; the content may still be required.
449 The content, if given, must be a string.
451 If a name is given, the matching node is stored in the results
452 dict under that key.
454 if type is not None:
455 assert 0 <= type < 256, type
456 if content is not None:
457 assert isinstance(content, basestring), repr(content)
458 self.type = type
459 self.content = content
460 self.name = name
462 def match(self, node, results=None):
463 """Override match() to insist on a leaf node."""
464 if not isinstance(node, Leaf):
465 return False
466 return BasePattern.match(self, node, results)
468 def _submatch(self, node, results=None):
469 """Match the pattern's content to the node's children.
471 This assumes the node type matches and self.content is not None.
473 Returns True if it matches, False if not.
475 If results is not None, it must be a dict which will be
476 updated with the nodes matching named subpatterns.
478 When returning False, the results dict may still be updated.
480 return self.content == node.value
483 class NodePattern(BasePattern):
485 wildcards = False
487 def __init__(self, type=None, content=None, name=None):
488 """Initializer. Takes optional type, content, and name.
490 The type, if given, must be a symbol type (>= 256). If the
491 type is None this matches *any* single node (leaf or not),
492 except if content is not None, in which it only matches
493 non-leaf nodes that also match the content pattern.
495 The content, if not None, must be a sequence of Patterns that
496 must match the node's children exactly. If the content is
497 given, the type must not be None.
499 If a name is given, the matching node is stored in the results
500 dict under that key.
502 if type is not None:
503 assert type >= 256, type
504 if content is not None:
505 assert not isinstance(content, basestring), repr(content)
506 content = list(content)
507 for i, item in enumerate(content):
508 assert isinstance(item, BasePattern), (i, item)
509 if isinstance(item, WildcardPattern):
510 self.wildcards = True
511 self.type = type
512 self.content = content
513 self.name = name
515 def _submatch(self, node, results=None):
516 """Match the pattern's content to the node's children.
518 This assumes the node type matches and self.content is not None.
520 Returns True if it matches, False if not.
522 If results is not None, it must be a dict which will be
523 updated with the nodes matching named subpatterns.
525 When returning False, the results dict may still be updated.
527 if self.wildcards:
528 for c, r in generate_matches(self.content, node.children):
529 if c == len(node.children):
530 if results is not None:
531 results.update(r)
532 return True
533 return False
534 if len(self.content) != len(node.children):
535 return False
536 for subpattern, child in zip(self.content, node.children):
537 if not subpattern.match(child, results):
538 return False
539 return True
542 class WildcardPattern(BasePattern):
544 """A wildcard pattern can match zero or more nodes.
546 This has all the flexibility needed to implement patterns like:
548 .* .+ .? .{m,n}
549 (a b c | d e | f)
550 (...)* (...)+ (...)? (...){m,n}
552 except it always uses non-greedy matching.
555 def __init__(self, content=None, min=0, max=HUGE, name=None):
556 """Initializer.
558 Args:
559 content: optional sequence of subsequences of patterns;
560 if absent, matches one node;
561 if present, each subsequence is an alternative [*]
562 min: optinal minumum number of times to match, default 0
563 max: optional maximum number of times tro match, default HUGE
564 name: optional name assigned to this match
566 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
567 equivalent to (a b c | d e | f g h); if content is None,
568 this is equivalent to '.' in regular expression terms.
569 The min and max parameters work as follows:
570 min=0, max=maxint: .*
571 min=1, max=maxint: .+
572 min=0, max=1: .?
573 min=1, max=1: .
574 If content is not None, replace the dot with the parenthesized
575 list of alternatives, e.g. (a b c | d e | f g h)*
577 assert 0 <= min <= max <= HUGE, (min, max)
578 if content is not None:
579 content = tuple(map(tuple, content)) # Protect against alterations
580 # Check sanity of alternatives
581 assert len(content), repr(content) # Can't have zero alternatives
582 for alt in content:
583 assert len(alt), repr(alt) # Can have empty alternatives
584 self.content = content
585 self.min = min
586 self.max = max
587 self.name = name
589 def optimize(self):
590 """Optimize certain stacked wildcard patterns."""
591 subpattern = None
592 if (self.content is not None and
593 len(self.content) == 1 and len(self.content[0]) == 1):
594 subpattern = self.content[0][0]
595 if self.min == 1 and self.max == 1:
596 if self.content is None:
597 return NodePattern(name=self.name)
598 if subpattern is not None and self.name == subpattern.name:
599 return subpattern.optimize()
600 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
601 subpattern.min <= 1 and self.name == subpattern.name):
602 return WildcardPattern(subpattern.content,
603 self.min*subpattern.min,
604 self.max*subpattern.max,
605 subpattern.name)
606 return self
608 def match(self, node, results=None):
609 """Does this pattern exactly match a node?"""
610 return self.match_seq([node], results)
612 def match_seq(self, nodes, results=None):
613 """Does this pattern exactly match a sequence of nodes?"""
614 for c, r in self.generate_matches(nodes):
615 if c == len(nodes):
616 if results is not None:
617 results.update(r)
618 if self.name:
619 results[self.name] = list(nodes)
620 return True
621 return False
623 def generate_matches(self, nodes):
624 """Generator yielding matches for a sequence of nodes.
626 Args:
627 nodes: sequence of nodes
629 Yields:
630 (count, results) tuples where:
631 count: the match comprises nodes[:count];
632 results: dict containing named submatches.
634 if self.content is None:
635 # Shortcut for special case (see __init__.__doc__)
636 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
637 r = {}
638 if self.name:
639 r[self.name] = nodes[:count]
640 yield count, r
641 else:
642 for count, r in self._recursive_matches(nodes, 0):
643 if self.name:
644 r[self.name] = nodes[:count]
645 yield count, r
647 def _recursive_matches(self, nodes, count):
648 """Helper to recursively yield the matches."""
649 assert self.content is not None
650 if count >= self.min:
651 r = {}
652 if self.name:
653 r[self.name] = nodes[:0]
654 yield 0, r
655 if count < self.max:
656 for alt in self.content:
657 for c0, r0 in generate_matches(alt, nodes):
658 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
659 r = {}
660 r.update(r0)
661 r.update(r1)
662 yield c0 + c1, r
665 class NegatedPattern(BasePattern):
667 def __init__(self, content=None):
668 """Initializer.
670 The argument is either a pattern or None. If it is None, this
671 only matches an empty sequence (effectively '$' in regex
672 lingo). If it is not None, this matches whenever the argument
673 pattern doesn't have any matches.
675 if content is not None:
676 assert isinstance(content, BasePattern), repr(content)
677 self.content = content
679 def match(self, node):
680 # We never match a node in its entirety
681 return False
683 def match_seq(self, nodes):
684 # We only match an empty sequence of nodes in its entirety
685 return len(nodes) == 0
687 def generate_matches(self, nodes):
688 if self.content is None:
689 # Return a match if there is an empty sequence
690 if len(nodes) == 0:
691 yield 0, {}
692 else:
693 # Return a match if the argument pattern has no matches
694 for c, r in self.content.generate_matches(nodes):
695 return
696 yield 0, {}
699 def generate_matches(patterns, nodes):
700 """Generator yielding matches for a sequence of patterns and nodes.
702 Args:
703 patterns: a sequence of patterns
704 nodes: a sequence of nodes
706 Yields:
707 (count, results) tuples where:
708 count: the entire sequence of patterns matches nodes[:count];
709 results: dict containing named submatches.
711 if not patterns:
712 yield 0, {}
713 else:
714 p, rest = patterns[0], patterns[1:]
715 for c0, r0 in p.generate_matches(nodes):
716 if not rest:
717 yield c0, r0
718 else:
719 for c1, r1 in generate_matches(rest, nodes[c0:]):
720 r = {}
721 r.update(r0)
722 r.update(r1)
723 yield c0 + c1, r