1 """Parse a Python module and describe its classes and methods.
3 Parse enough of a Python file to recognize imports and class and
4 method definitions, and to find out the superclasses of a class.
6 The interface consists of a single function:
7 readmodule_ex(module [, path])
8 where module is the name of a Python module, and path is an optional
9 list of directories where the module is to be searched. If present,
10 path is prepended to the system search path sys.path. The return
11 value is a dictionary. The keys of the dictionary are the names of
12 the classes defined in the module (including classes that are defined
13 via the from XXX import YYY construct). The values are class
14 instances of the class Class defined here. One special key/value pair
15 is present for packages: the key '__path__' has a list as its value
16 which contains the package search path.
18 A class is described by the class Class in this module. Instances
19 of this class have the following instance variables:
20 module -- the module name
21 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
26 The dictionary of methods uses the method names as keys and the line
27 numbers on which the method was defined as values.
28 If the name of a super class is not recognized, the corresponding
29 entry in the list of super classes is not a class instance but a
30 string giving the name of the super class. Since import statements
31 are recognized and imported modules are scanned as well, this
32 shouldn't happen often.
34 A function is described by the class Function in this module.
35 Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
44 import tokenize
# Python tokenizer
45 from token
import NAME
, DEDENT
, NEWLINE
, OP
46 from operator
import itemgetter
48 __all__
= ["readmodule", "readmodule_ex", "Class", "Function"]
50 _modules
= {} # cache of modules we've seen
52 # each Python class is represented by an instance of this class
54 '''Class to represent a Python class.'''
55 def __init__(self
, module
, name
, super, file, lineno
):
65 def _addmethod(self
, name
, lineno
):
66 self
.methods
[name
] = lineno
69 '''Class to represent a top-level Python function'''
70 def __init__(self
, module
, name
, file, lineno
):
76 def readmodule(module
, path
=[]):
77 '''Backwards compatible interface.
79 Call readmodule_ex() and then only keep Class objects from the
80 resulting dictionary.'''
82 dict = _readmodule(module
, path
)
84 for key
, value
in dict.items():
85 if isinstance(value
, Class
):
89 def readmodule_ex(module
, path
=[]):
90 '''Read a module file and return a dictionary of classes.
92 Search for MODULE in PATH and sys.path, read and parse the
93 module and return a dictionary with one entry for each class
96 If INPACKAGE is true, it must be the dotted name of the package in
97 which we are searching for a submodule, and then PATH must be the
98 package search path; otherwise, we are searching for a top-level
99 module, and PATH is combined with sys.path.
101 return _readmodule(module
, path
)
103 def _readmodule(module
, path
, inpackage
=None):
104 '''Do the hard work for readmodule[_ex].'''
105 # Compute the full module name (prepending inpackage if set)
107 fullmodule
= "%s.%s" % (inpackage
, module
)
112 if fullmodule
in _modules
:
113 return _modules
[fullmodule
]
115 # Initialize the dict for this module's contents
118 # Check if it is a built-in module; we don't do much for these
119 if module
in sys
.builtin_module_names
and not inpackage
:
120 _modules
[module
] = dict
123 # Check for a dotted module name
124 i
= module
.rfind('.')
127 submodule
= module
[i
+1:]
128 parent
= _readmodule(package
, path
, inpackage
)
130 package
= "%s.%s" % (inpackage
, package
)
131 return _readmodule(submodule
, parent
['__path__'], package
)
133 # Search the path for the module
136 f
, file, (suff
, mode
, type) = imp
.find_module(module
, path
)
138 f
, file, (suff
, mode
, type) = imp
.find_module(module
, path
+ sys
.path
)
139 if type == imp
.PKG_DIRECTORY
:
140 dict['__path__'] = [file]
142 f
, file, (suff
, mode
, type) = imp
.find_module('__init__', [file])
143 _modules
[fullmodule
] = dict
144 if type != imp
.PY_SOURCE
:
145 # not Python source, can't do anything with this module
149 stack
= [] # stack of (class, indent) pairs
151 g
= tokenize
.generate_tokens(f
.readline
)
153 for tokentype
, token
, start
, end
, line
in g
:
154 if tokentype
== DEDENT
:
155 lineno
, thisindent
= start
156 # close nested classes and defs
157 while stack
and stack
[-1][1] >= thisindent
:
160 lineno
, thisindent
= start
161 # close previous nested classes and defs
162 while stack
and stack
[-1][1] >= thisindent
:
164 tokentype
, meth_name
, start
, end
, line
= g
.next()
165 if tokentype
!= NAME
:
166 continue # Syntax error
168 cur_class
= stack
[-1][0]
169 if isinstance(cur_class
, Class
):
171 cur_class
._addmethod
(meth_name
, lineno
)
172 # else it's a nested def
175 dict[meth_name
] = Function(fullmodule
, meth_name
, file, lineno
)
176 stack
.append((None, thisindent
)) # Marker for nested fns
177 elif token
== 'class':
178 lineno
, thisindent
= start
179 # close previous nested classes and defs
180 while stack
and stack
[-1][1] >= thisindent
:
182 tokentype
, class_name
, start
, end
, line
= g
.next()
183 if tokentype
!= NAME
:
184 continue # Syntax error
185 # parse what follows the class name
186 tokentype
, token
, start
, end
, line
= g
.next()
189 names
= [] # List of superclasses
190 # there's a list of superclasses
192 super = [] # Tokens making up current superclass
194 tokentype
, token
, start
, end
, line
= g
.next()
195 if token
in (')', ',') and level
== 1:
198 # we know this super class
203 # super class is of the form
204 # module.class: look in module for
220 elif token
== ',' and level
== 1:
222 # only use NAME and OP (== dot) tokens for type name
223 elif tokentype
in (NAME
, OP
) and level
== 1:
225 # expressions in the base list are not supported
227 cur_class
= Class(fullmodule
, class_name
, inherit
, file, lineno
)
229 dict[class_name
] = cur_class
230 stack
.append((cur_class
, thisindent
))
231 elif token
== 'import' and start
[1] == 0:
232 modules
= _getnamelist(g
)
233 for mod
, mod2
in modules
:
235 # Recursively read the imported module
237 _readmodule(mod
, path
)
240 _readmodule(mod
, path
, inpackage
)
244 # If we can't find or parse the imported module,
245 # too bad -- don't die here.
247 elif token
== 'from' and start
[1] == 0:
248 mod
, token
= _getname(g
)
249 if not mod
or token
!= "import":
251 names
= _getnamelist(g
)
253 # Recursively read the imported module
254 d
= _readmodule(mod
, path
, inpackage
)
256 # If we can't find or parse the imported module,
257 # too bad -- don't die here.
259 # add any classes that were defined in the imported module
260 # to our name space if they were mentioned in the list
265 # don't add names that start with _
269 except StopIteration:
276 # Helper to get a comma-separated list of dotted names plus 'as'
277 # clauses. Return a list of pairs (name, name2) where name2 is
278 # the 'as' name, or None if there is no 'as' clause.
281 name
, token
= _getname(g
)
285 name2
, token
= _getname(g
)
288 names
.append((name
, name2
))
289 while token
!= "," and "\n" not in token
:
290 tokentype
, token
, start
, end
, line
= g
.next()
296 # Helper to get a dotted name, return a pair (name, token) where
297 # name is the dotted name, or None if there was no dotted name,
298 # and token is the next input token.
300 tokentype
, token
, start
, end
, line
= g
.next()
301 if tokentype
!= NAME
and token
!= '*':
305 tokentype
, token
, start
, end
, line
= g
.next()
308 tokentype
, token
, start
, end
, line
= g
.next()
309 if tokentype
!= NAME
:
312 return (".".join(parts
), token
)
315 # Main program for testing.
318 if os
.path
.exists(mod
):
319 path
= [os
.path
.dirname(mod
)]
320 mod
= os
.path
.basename(mod
)
321 if mod
.lower().endswith(".py"):
325 dict = readmodule_ex(mod
, path
)
327 objs
.sort(lambda a
, b
: cmp(getattr(a
, 'lineno', 0),
328 getattr(b
, 'lineno', 0)))
330 if isinstance(obj
, Class
):
331 print "class", obj
.name
, obj
.super, obj
.lineno
332 methods
= sorted(obj
.methods
.iteritems(), key
=itemgetter(1))
333 for name
, lineno
in methods
:
334 if name
!= "__path__":
335 print " def", name
, lineno
336 elif isinstance(obj
, Function
):
337 print "def", obj
.name
, obj
.lineno
339 if __name__
== "__main__":