Put code examples at left margin instead of indenting them
[python.git] / Tools / compiler / astgen.py
blob59b98f2b0a186f562fbe729ea9b35d19dc3ac6d4
1 """Generate ast module from specification
3 This script generates the ast module from a simple specification,
4 which makes it easy to accomodate changes in the grammar. This
5 approach would be quite reasonable if the grammar changed often.
6 Instead, it is rather complex to generate the appropriate code. And
7 the Node interface has changed more often than the grammar.
8 """
10 import fileinput
11 import getopt
12 import re
13 import sys
14 from StringIO import StringIO
16 SPEC = "ast.txt"
17 COMMA = ", "
19 def load_boilerplate(file):
20 f = open(file)
21 buf = f.read()
22 f.close()
23 i = buf.find('### ''PROLOGUE')
24 j = buf.find('### ''EPILOGUE')
25 pro = buf[i+12:j].strip()
26 epi = buf[j+12:].strip()
27 return pro, epi
29 def strip_default(arg):
30 """Return the argname from an 'arg = default' string"""
31 i = arg.find('=')
32 if i == -1:
33 return arg
34 t = arg[:i].strip()
35 return t
37 P_NODE = 1
38 P_OTHER = 2
39 P_NESTED = 3
40 P_NONE = 4
42 class NodeInfo:
43 """Each instance describes a specific AST node"""
44 def __init__(self, name, args):
45 self.name = name
46 self.args = args.strip()
47 self.argnames = self.get_argnames()
48 self.argprops = self.get_argprops()
49 self.nargs = len(self.argnames)
50 self.init = []
52 def get_argnames(self):
53 if '(' in self.args:
54 i = self.args.find('(')
55 j = self.args.rfind(')')
56 args = self.args[i+1:j]
57 else:
58 args = self.args
59 return [strip_default(arg.strip())
60 for arg in args.split(',') if arg]
62 def get_argprops(self):
63 """Each argument can have a property like '*' or '!'
65 XXX This method modifies the argnames in place!
66 """
67 d = {}
68 hardest_arg = P_NODE
69 for i in range(len(self.argnames)):
70 arg = self.argnames[i]
71 if arg.endswith('*'):
72 arg = self.argnames[i] = arg[:-1]
73 d[arg] = P_OTHER
74 hardest_arg = max(hardest_arg, P_OTHER)
75 elif arg.endswith('!'):
76 arg = self.argnames[i] = arg[:-1]
77 d[arg] = P_NESTED
78 hardest_arg = max(hardest_arg, P_NESTED)
79 elif arg.endswith('&'):
80 arg = self.argnames[i] = arg[:-1]
81 d[arg] = P_NONE
82 hardest_arg = max(hardest_arg, P_NONE)
83 else:
84 d[arg] = P_NODE
85 self.hardest_arg = hardest_arg
87 if hardest_arg > P_NODE:
88 self.args = self.args.replace('*', '')
89 self.args = self.args.replace('!', '')
90 self.args = self.args.replace('&', '')
92 return d
94 def gen_source(self):
95 buf = StringIO()
96 print >> buf, "class %s(Node):" % self.name
97 self._gen_init(buf)
98 print >> buf
99 self._gen_getChildren(buf)
100 print >> buf
101 self._gen_getChildNodes(buf)
102 print >> buf
103 self._gen_repr(buf)
104 buf.seek(0, 0)
105 return buf.read()
107 def _gen_init(self, buf):
108 if self.args:
109 print >> buf, " def __init__(self, %s, lineno=None):" % self.args
110 else:
111 print >> buf, " def __init__(self, lineno=None):"
112 if self.argnames:
113 for name in self.argnames:
114 print >> buf, " self.%s = %s" % (name, name)
115 print >> buf, " self.lineno = lineno"
116 # Copy the lines in self.init, indented four spaces. The rstrip()
117 # business is to get rid of the four spaces if line happens to be
118 # empty, so that reindent.py is happy with the output.
119 for line in self.init:
120 print >> buf, (" " + line).rstrip()
122 def _gen_getChildren(self, buf):
123 print >> buf, " def getChildren(self):"
124 if len(self.argnames) == 0:
125 print >> buf, " return ()"
126 else:
127 if self.hardest_arg < P_NESTED:
128 clist = COMMA.join(["self.%s" % c
129 for c in self.argnames])
130 if self.nargs == 1:
131 print >> buf, " return %s," % clist
132 else:
133 print >> buf, " return %s" % clist
134 else:
135 if len(self.argnames) == 1:
136 print >> buf, " return tuple(flatten(self.%s))" % self.argnames[0]
137 else:
138 print >> buf, " children = []"
139 template = " children.%s(%sself.%s%s)"
140 for name in self.argnames:
141 if self.argprops[name] == P_NESTED:
142 print >> buf, template % ("extend", "flatten(",
143 name, ")")
144 else:
145 print >> buf, template % ("append", "", name, "")
146 print >> buf, " return tuple(children)"
148 def _gen_getChildNodes(self, buf):
149 print >> buf, " def getChildNodes(self):"
150 if len(self.argnames) == 0:
151 print >> buf, " return ()"
152 else:
153 if self.hardest_arg < P_NESTED:
154 clist = ["self.%s" % c
155 for c in self.argnames
156 if self.argprops[c] == P_NODE]
157 if len(clist) == 0:
158 print >> buf, " return ()"
159 elif len(clist) == 1:
160 print >> buf, " return %s," % clist[0]
161 else:
162 print >> buf, " return %s" % COMMA.join(clist)
163 else:
164 print >> buf, " nodelist = []"
165 template = " nodelist.%s(%sself.%s%s)"
166 for name in self.argnames:
167 if self.argprops[name] == P_NONE:
168 tmp = (" if self.%s is not None:\n"
169 " nodelist.append(self.%s)")
170 print >> buf, tmp % (name, name)
171 elif self.argprops[name] == P_NESTED:
172 print >> buf, template % ("extend", "flatten_nodes(",
173 name, ")")
174 elif self.argprops[name] == P_NODE:
175 print >> buf, template % ("append", "", name, "")
176 print >> buf, " return tuple(nodelist)"
178 def _gen_repr(self, buf):
179 print >> buf, " def __repr__(self):"
180 if self.argnames:
181 fmt = COMMA.join(["%s"] * self.nargs)
182 if '(' in self.args:
183 fmt = '(%s)' % fmt
184 vals = ["repr(self.%s)" % name for name in self.argnames]
185 vals = COMMA.join(vals)
186 if self.nargs == 1:
187 vals = vals + ","
188 print >> buf, ' return "%s(%s)" %% (%s)' % \
189 (self.name, fmt, vals)
190 else:
191 print >> buf, ' return "%s()"' % self.name
193 rx_init = re.compile('init\((.*)\):')
195 def parse_spec(file):
196 classes = {}
197 cur = None
198 for line in fileinput.input(file):
199 if line.strip().startswith('#'):
200 continue
201 mo = rx_init.search(line)
202 if mo is None:
203 if cur is None:
204 # a normal entry
205 try:
206 name, args = line.split(':')
207 except ValueError:
208 continue
209 classes[name] = NodeInfo(name, args)
210 cur = None
211 else:
212 # some code for the __init__ method
213 cur.init.append(line)
214 else:
215 # some extra code for a Node's __init__ method
216 name = mo.group(1)
217 cur = classes[name]
218 return sorted(classes.values(), key=lambda n: n.name)
220 def main():
221 prologue, epilogue = load_boilerplate(sys.argv[-1])
222 print prologue
223 print
224 classes = parse_spec(SPEC)
225 for info in classes:
226 print info.gen_source()
227 print epilogue
229 if __name__ == "__main__":
230 main()
231 sys.exit(0)
233 ### PROLOGUE
234 """Python abstract syntax node definitions
236 This file is automatically generated by Tools/compiler/astgen.py
238 from consts import CO_VARARGS, CO_VARKEYWORDS
240 def flatten(seq):
241 l = []
242 for elt in seq:
243 t = type(elt)
244 if t is tuple or t is list:
245 for elt2 in flatten(elt):
246 l.append(elt2)
247 else:
248 l.append(elt)
249 return l
251 def flatten_nodes(seq):
252 return [n for n in flatten(seq) if isinstance(n, Node)]
254 nodes = {}
256 class Node:
257 """Abstract base class for ast nodes."""
258 def getChildren(self):
259 pass # implemented by subclasses
260 def __iter__(self):
261 for n in self.getChildren():
262 yield n
263 def asList(self): # for backwards compatibility
264 return self.getChildren()
265 def getChildNodes(self):
266 pass # implemented by subclasses
268 class EmptyNode(Node):
269 pass
271 class Expression(Node):
272 # Expression is an artificial node class to support "eval"
273 nodes["expression"] = "Expression"
274 def __init__(self, node):
275 self.node = node
277 def getChildren(self):
278 return self.node,
280 def getChildNodes(self):
281 return self.node,
283 def __repr__(self):
284 return "Expression(%s)" % (repr(self.node))
286 ### EPILOGUE
287 for name, obj in globals().items():
288 if isinstance(obj, type) and issubclass(obj, Node):
289 nodes[name.lower()] = obj