1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
6 The grammer is taken from PatternGrammar.txt.
8 The compiler compiles a pattern to a pytree.*Pattern instance.
11 __author__
= "Guido van Rossum <guido@python.org>"
16 # Fairly local imports
17 from .pgen2
import driver
, literals
, token
, tokenize
, parse
, grammar
19 # Really local imports
23 # The pattern grammar file
24 _PATTERN_GRAMMAR_FILE
= os
.path
.join(os
.path
.dirname(__file__
),
28 class PatternSyntaxError(Exception):
32 def tokenize_wrapper(input):
33 """Tokenizes a string suppressing significant whitespace."""
34 skip
= set((token
.NEWLINE
, token
.INDENT
, token
.DEDENT
))
35 tokens
= tokenize
.generate_tokens(driver
.generate_lines(input).next
)
36 for quintuple
in tokens
:
37 type, value
, start
, end
, line_text
= quintuple
42 class PatternCompiler(object):
44 def __init__(self
, grammar_file
=_PATTERN_GRAMMAR_FILE
):
47 Takes an optional alternative filename for the pattern grammar.
49 self
.grammar
= driver
.load_grammar(grammar_file
)
50 self
.syms
= pygram
.Symbols(self
.grammar
)
51 self
.pygrammar
= pygram
.python_grammar
52 self
.pysyms
= pygram
.python_symbols
53 self
.driver
= driver
.Driver(self
.grammar
, convert
=pattern_convert
)
55 def compile_pattern(self
, input, debug
=False):
56 """Compiles a pattern string to a nested pytree.*Pattern object."""
57 tokens
= tokenize_wrapper(input)
59 root
= self
.driver
.parse_tokens(tokens
, debug
=debug
)
60 except parse
.ParseError
as e
:
61 raise PatternSyntaxError(str(e
))
62 return self
.compile_node(root
)
64 def compile_node(self
, node
):
65 """Compiles a node, recursively.
67 This is one big switch on the node type.
69 # XXX Optimize certain Wildcard-containing-Wildcard patterns
71 if node
.type == self
.syms
.Matcher
:
72 node
= node
.children
[0] # Avoid unneeded recursion
74 if node
.type == self
.syms
.Alternatives
:
75 # Skip the odd children since they are just '|' tokens
76 alts
= [self
.compile_node(ch
) for ch
in node
.children
[::2]]
79 p
= pytree
.WildcardPattern([[a
] for a
in alts
], min=1, max=1)
82 if node
.type == self
.syms
.Alternative
:
83 units
= [self
.compile_node(ch
) for ch
in node
.children
]
86 p
= pytree
.WildcardPattern([units
], min=1, max=1)
89 if node
.type == self
.syms
.NegatedUnit
:
90 pattern
= self
.compile_basic(node
.children
[1:])
91 p
= pytree
.NegatedPattern(pattern
)
94 assert node
.type == self
.syms
.Unit
98 if len(nodes
) >= 3 and nodes
[1].type == token
.EQUAL
:
102 if len(nodes
) >= 2 and nodes
[-1].type == self
.syms
.Repeater
:
106 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
107 pattern
= self
.compile_basic(nodes
, repeat
)
109 if repeat
is not None:
110 assert repeat
.type == self
.syms
.Repeater
111 children
= repeat
.children
113 if child
.type == token
.STAR
:
116 elif child
.type == token
.PLUS
:
119 elif child
.type == token
.LBRACE
:
120 assert children
[-1].type == token
.RBRACE
121 assert len(children
) in (3, 5)
122 min = max = self
.get_int(children
[1])
123 if len(children
) == 5:
124 max = self
.get_int(children
[3])
127 if min != 1 or max != 1:
128 pattern
= pattern
.optimize()
129 pattern
= pytree
.WildcardPattern([[pattern
]], min=min, max=max)
133 return pattern
.optimize()
135 def compile_basic(self
, nodes
, repeat
=None):
136 # Compile STRING | NAME [Details] | (...) | [...]
137 assert len(nodes
) >= 1
139 if node
.type == token
.STRING
:
140 value
= unicode(literals
.evalString(node
.value
))
141 return pytree
.LeafPattern(_type_of_literal(value
), value
)
142 elif node
.type == token
.NAME
:
145 if value
not in TOKEN_MAP
:
146 raise PatternSyntaxError("Invalid token: %r" % value
)
148 raise PatternSyntaxError("Can't have details for token")
149 return pytree
.LeafPattern(TOKEN_MAP
[value
])
153 elif not value
.startswith("_"):
154 type = getattr(self
.pysyms
, value
, None)
156 raise PatternSyntaxError("Invalid symbol: %r" % value
)
157 if nodes
[1:]: # Details present
158 content
= [self
.compile_node(nodes
[1].children
[1])]
161 return pytree
.NodePattern(type, content
)
162 elif node
.value
== "(":
163 return self
.compile_node(nodes
[1])
164 elif node
.value
== "[":
165 assert repeat
is None
166 subpattern
= self
.compile_node(nodes
[1])
167 return pytree
.WildcardPattern([[subpattern
]], min=0, max=1)
170 def get_int(self
, node
):
171 assert node
.type == token
.NUMBER
172 return int(node
.value
)
175 # Map named tokens to the type value for a LeafPattern
176 TOKEN_MAP
= {"NAME": token
.NAME
,
177 "STRING": token
.STRING
,
178 "NUMBER": token
.NUMBER
,
182 def _type_of_literal(value
):
183 if value
[0].isalpha():
185 elif value
in grammar
.opmap
:
186 return grammar
.opmap
[value
]
191 def pattern_convert(grammar
, raw_node_info
):
192 """Converts raw node information to a Node or Leaf instance."""
193 type, value
, context
, children
= raw_node_info
194 if children
or type in grammar
.number2symbol
:
195 return pytree
.Node(type, children
, context
=context
)
197 return pytree
.Leaf(type, value
, context
=context
)
200 def compile_pattern(pattern
):
201 return PatternCompiler().compile_pattern(pattern
)