Merged revisions 76518 via svnmerge from
[python/dscho.git] / Lib / lib2to3 / patcomp.py
blob38316222d45545cbb63831e60f4a3ce86a9e7128
1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
4 """Pattern compiler.
6 The grammer is taken from PatternGrammar.txt.
8 The compiler compiles a pattern to a pytree.*Pattern instance.
9 """
11 __author__ = "Guido van Rossum <guido@python.org>"
13 # Python imports
14 import os
16 # Fairly local imports
17 from .pgen2 import driver, literals, token, tokenize, parse, grammar
19 # Really local imports
20 from . import pytree
21 from . import pygram
23 # The pattern grammar file
24 _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
25 "PatternGrammar.txt")
28 class PatternSyntaxError(Exception):
29 pass
32 def tokenize_wrapper(input):
33 """Tokenizes a string suppressing significant whitespace."""
34 skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
35 tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__)
36 for quintuple in tokens:
37 type, value, start, end, line_text = quintuple
38 if type not in skip:
39 yield quintuple
42 class PatternCompiler(object):
44 def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
45 """Initializer.
47 Takes an optional alternative filename for the pattern grammar.
48 """
49 self.grammar = driver.load_grammar(grammar_file)
50 self.syms = pygram.Symbols(self.grammar)
51 self.pygrammar = pygram.python_grammar
52 self.pysyms = pygram.python_symbols
53 self.driver = driver.Driver(self.grammar, convert=pattern_convert)
55 def compile_pattern(self, input, debug=False):
56 """Compiles a pattern string to a nested pytree.*Pattern object."""
57 tokens = tokenize_wrapper(input)
58 try:
59 root = self.driver.parse_tokens(tokens, debug=debug)
60 except parse.ParseError as e:
61 raise PatternSyntaxError(str(e))
62 return self.compile_node(root)
64 def compile_node(self, node):
65 """Compiles a node, recursively.
67 This is one big switch on the node type.
68 """
69 # XXX Optimize certain Wildcard-containing-Wildcard patterns
70 # that can be merged
71 if node.type == self.syms.Matcher:
72 node = node.children[0] # Avoid unneeded recursion
74 if node.type == self.syms.Alternatives:
75 # Skip the odd children since they are just '|' tokens
76 alts = [self.compile_node(ch) for ch in node.children[::2]]
77 if len(alts) == 1:
78 return alts[0]
79 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
80 return p.optimize()
82 if node.type == self.syms.Alternative:
83 units = [self.compile_node(ch) for ch in node.children]
84 if len(units) == 1:
85 return units[0]
86 p = pytree.WildcardPattern([units], min=1, max=1)
87 return p.optimize()
89 if node.type == self.syms.NegatedUnit:
90 pattern = self.compile_basic(node.children[1:])
91 p = pytree.NegatedPattern(pattern)
92 return p.optimize()
94 assert node.type == self.syms.Unit
96 name = None
97 nodes = node.children
98 if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
99 name = nodes[0].value
100 nodes = nodes[2:]
101 repeat = None
102 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
103 repeat = nodes[-1]
104 nodes = nodes[:-1]
106 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
107 pattern = self.compile_basic(nodes, repeat)
109 if repeat is not None:
110 assert repeat.type == self.syms.Repeater
111 children = repeat.children
112 child = children[0]
113 if child.type == token.STAR:
114 min = 0
115 max = pytree.HUGE
116 elif child.type == token.PLUS:
117 min = 1
118 max = pytree.HUGE
119 elif child.type == token.LBRACE:
120 assert children[-1].type == token.RBRACE
121 assert len(children) in (3, 5)
122 min = max = self.get_int(children[1])
123 if len(children) == 5:
124 max = self.get_int(children[3])
125 else:
126 assert False
127 if min != 1 or max != 1:
128 pattern = pattern.optimize()
129 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
131 if name is not None:
132 pattern.name = name
133 return pattern.optimize()
135 def compile_basic(self, nodes, repeat=None):
136 # Compile STRING | NAME [Details] | (...) | [...]
137 assert len(nodes) >= 1
138 node = nodes[0]
139 if node.type == token.STRING:
140 value = str(literals.evalString(node.value))
141 return pytree.LeafPattern(_type_of_literal(value), value)
142 elif node.type == token.NAME:
143 value = node.value
144 if value.isupper():
145 if value not in TOKEN_MAP:
146 raise PatternSyntaxError("Invalid token: %r" % value)
147 if nodes[1:]:
148 raise PatternSyntaxError("Can't have details for token")
149 return pytree.LeafPattern(TOKEN_MAP[value])
150 else:
151 if value == "any":
152 type = None
153 elif not value.startswith("_"):
154 type = getattr(self.pysyms, value, None)
155 if type is None:
156 raise PatternSyntaxError("Invalid symbol: %r" % value)
157 if nodes[1:]: # Details present
158 content = [self.compile_node(nodes[1].children[1])]
159 else:
160 content = None
161 return pytree.NodePattern(type, content)
162 elif node.value == "(":
163 return self.compile_node(nodes[1])
164 elif node.value == "[":
165 assert repeat is None
166 subpattern = self.compile_node(nodes[1])
167 return pytree.WildcardPattern([[subpattern]], min=0, max=1)
168 assert False, node
170 def get_int(self, node):
171 assert node.type == token.NUMBER
172 return int(node.value)
175 # Map named tokens to the type value for a LeafPattern
176 TOKEN_MAP = {"NAME": token.NAME,
177 "STRING": token.STRING,
178 "NUMBER": token.NUMBER,
179 "TOKEN": None}
182 def _type_of_literal(value):
183 if value[0].isalpha():
184 return token.NAME
185 elif value in grammar.opmap:
186 return grammar.opmap[value]
187 else:
188 return None
191 def pattern_convert(grammar, raw_node_info):
192 """Converts raw node information to a Node or Leaf instance."""
193 type, value, context, children = raw_node_info
194 if children or type in grammar.number2symbol:
195 return pytree.Node(type, children, context=context)
196 else:
197 return pytree.Leaf(type, value, context=context)
200 def compile_pattern(pattern):
201 return PatternCompiler().compile_pattern(pattern)