Bump API version for new plugin entry points (oops)
[geany-mirror.git] / scripts / create_py_tags.py
blobaee03a3c0214d97803d91ac45218758e586fec62
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Author: Enrico Tröger
5 # License: GPL v2 or later
7 # (based on the script at http://svn.python.org/view/*checkout*/python/trunk/Tools/scripts/ptags.py)
9 # This script should be run in the top source directory.
11 # Parses all files given on command line for Python classes or functions and write
12 # them into data/python.tags (internal tagmanager format).
13 # If called without command line arguments, a preset of common Python libs is used.
15 # WARNING
16 # Be aware that running this script will actually *import* modules in the specified directory
17 # or in the standard library path of your Python installation. Dependent on what Python modules
18 # you have installed, this might not be want you want and can have weird side effects.
19 # You have been warned.
21 # It should be however relatively safe to execute this script from a fresh Python installation
22 # installed into a dedicated prefix. Then nothing else is necessary as to change the interpreter
23 # with which you start this script.
26 import datetime
27 import imp
28 import inspect
29 import os
30 import re
31 import sys
32 import types
34 PYTHON_LIB_DIRECTORY = os.path.dirname(os.__file__)
35 PYTHON_LIB_IGNORE_PACKAGES = (u'test', u'dist-packages', u'site-packages', 'Tools')
36 # some modules execute funky code when they are imported which we really don't want here
37 # (though if you feel funny, try: 'import antigravity')
38 PYTHON_LIB_IGNORE_MODULES = (u'antigravity.py', u'idlelib/idle.py', u'multiprocessing/util.py')
39 PYTHON_KEYWORDS = ('and', 'as', 'assert', 'break', 'class', 'continue', 'def', 'del', 'elif',
40 'else', 'except', 'exec', 'finally', 'for', 'from', 'global', 'if', 'import',
41 'in', 'is', 'lambda', 'not', 'or', 'pass', 'print', 'raise', 'return', 'try',
42 'while', 'with', 'yield', 'False', 'None', 'True')
44 # (from tagmanager/tm_tag.c:32)
45 TA_NAME = '%c' % 200,
46 TA_TYPE = '%c' % 204
47 TA_ARGLIST = '%c' % 205
48 TA_SCOPE = '%c' % 206
50 # TMTagType (tagmanager/tm_tag.h:47)
51 TYPE_CLASS = '%d' % 1
52 TYPE_FUNCTION = '%d' % 128
54 tag_filename = 'data/python.tags'
55 tag_regexp = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]'
58 ########################################################################
59 class Parser:
61 #----------------------------------------------------------------------
62 def __init__(self):
63 self.tags = {}
64 self.re_matcher = re.compile(tag_regexp)
66 #----------------------------------------------------------------------
67 def _get_superclass(self, _object):
68 """
69 Python class base-finder
70 (found on http://mail.python.org/pipermail/python-list/2002-November/173949.html)
72 @param _object (object)
73 @return superclass (object)
74 """
75 try:
76 #~ TODO print inspect.getmro(c)
77 if type(_object) == types.ClassType:
78 return _object.__bases__[0].__name__
79 else:
80 return _object.__mro__[1].__name__
81 except IndexError:
82 return ''
84 #----------------------------------------------------------------------
85 def _formatargspec(self, args, varargs=None, varkw=None, defaults=None,
86 formatarg=str,
87 formatvarargs=lambda name: '*' + name,
88 formatvarkw=lambda name: '**' + name,
89 formatvalue=lambda value: '=' + repr(value),
90 join=inspect.joinseq):
91 """Format an argument spec from the 4 values returned by getargspec.
93 The first four arguments are (args, varargs, varkw, defaults). The
94 other four arguments are the corresponding optional formatting functions
95 that are called to turn names and values into strings. The ninth
96 argument is an optional function to format the sequence of arguments."""
97 specs = []
98 if defaults:
99 firstdefault = len(args) - len(defaults)
100 for i in range(len(args)):
101 spec = inspect.strseq(args[i], formatarg, join)
102 if defaults and i >= firstdefault:
103 d = defaults[i - firstdefault]
104 # this is the difference from the original formatargspec() function
105 # to use nicer names then the default repr() output
106 if hasattr(d, '__name__'):
107 d = d.__name__
108 spec = spec + formatvalue(d)
109 specs.append(spec)
110 if varargs is not None:
111 specs.append(formatvarargs(varargs))
112 if varkw is not None:
113 specs.append(formatvarkw(varkw))
114 return ', '.join(specs)
116 #----------------------------------------------------------------------
117 def _add_tag(self, obj, tag_type, parent=''):
119 Verify the found tag name and if it is valid, add it to the list
121 @param obj (instance)
122 @param tag_type (str)
123 @param parent (str)
125 args = ''
126 scope = ''
127 try:
128 args = apply(self._formatargspec, inspect.getargspec(obj))
129 except (TypeError, KeyError):
130 pass
131 if parent:
132 if tag_type == TYPE_CLASS:
133 args = '(%s)' % parent
134 else:
135 scope = '%s%s' % (TA_SCOPE, parent)
136 if isinstance(obj, basestring):
137 tagname = obj
138 else:
139 tagname = obj.__name__
140 # check for duplicates
141 if len(tagname) < 4:
142 # skip short tags
143 return
144 tag = '%s%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args, scope)
146 if not tagname in self.tags and not tagname_is_like_keyword(tagname):
147 self.tags[tagname] = tag
149 #----------------------------------------------------------------------
150 def process_file(self, filename):
152 Read the file specified by filename and look for class and function definitions
154 @param filename (str)
156 try:
157 module = imp.load_source('tags_file_module', filename)
158 except IOError, e:
159 # file not found
160 print '%s: %s' % (filename, e)
161 return
162 except Exception:
163 module = None
165 if module:
166 symbols = inspect.getmembers(module, callable)
167 for obj_name, obj in symbols:
168 try:
169 name = obj.__name__
170 except AttributeError:
171 name = obj_name
172 if not name or not isinstance(name, basestring) or is_private_identifier(name):
173 # skip non-public tags
174 continue
175 if inspect.isfunction(obj):
176 self._add_tag(obj, TYPE_FUNCTION)
177 elif inspect.isclass(obj):
178 self._add_tag(obj, TYPE_CLASS, self._get_superclass(obj))
179 try:
180 methods = inspect.getmembers(obj, inspect.ismethod)
181 except (TypeError, AttributeError):
182 methods = []
183 for m_name, m_obj in methods:
184 # skip non-public tags
185 if is_private_identifier(m_name) or not inspect.ismethod(m_obj):
186 continue
187 self._add_tag(m_obj, TYPE_FUNCTION, name)
188 else:
189 # plain regular expression based parsing
190 filep = open(filename)
191 for line in filep:
192 m = self.re_matcher.match(line)
193 if m:
194 tag_type_str, tagname, args = m.groups()
195 if not tagname or is_private_identifier(tagname):
196 # skip non-public tags
197 continue
198 if tag_type_str == 'class':
199 tag_type = TYPE_CLASS
200 else:
201 tag_type = TYPE_FUNCTION
202 args = args.strip()
203 tag = '%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args)
204 if not tagname in self.tags and not tagname_is_like_keyword(tagname):
205 self.tags[tagname] = tag
206 filep.close()
208 #----------------------------------------------------------------------
209 def add_builtins(self):
211 Add the contents of __builtins__ as simple tags
213 for tag_name in dir(__builtins__):
214 # check if the tag name starts with upper case, then we assume it is a class
215 # note that this is a very very simple heuristic to determine the type and will give
216 # false positives
217 if tag_name[0].isupper():
218 tag_type = TYPE_CLASS
219 else:
220 tag_type = TYPE_FUNCTION
222 self._add_tag(tag_name, tag_type)
224 #----------------------------------------------------------------------
225 def write_to_file(self, filename):
227 Sort the found tags and write them into the file specified by filename
229 @param filename (str)
231 result = self.tags.values()
232 # sort the tags
233 result.sort()
234 # write them
235 target_file = open(filename, 'wb')
236 target_file.write(
237 '# format=tagmanager - Automatically generated file - do not edit (created on %s)\n' % \
238 datetime.datetime.now().ctime())
239 for symbol in result:
240 if not symbol == '\n': # skip empty lines
241 target_file.write(symbol)
242 target_file.close()
245 #----------------------------------------------------------------------
246 def tagname_is_like_keyword(tagname):
247 """ignore tags which start with a keyword to avoid annoying completions of 'pass_' and similar ones"""
248 # this is not really efficient but in this script speed doesn't really matter
249 for keyword in PYTHON_KEYWORDS:
250 if tagname.startswith(keyword):
251 return True
252 return False
255 #----------------------------------------------------------------------
256 def is_private_identifier(tagname):
257 return tagname.startswith('_') or tagname.endswith('_')
260 #----------------------------------------------------------------------
261 def get_module_filenames(path):
262 def ignore_package(package):
263 for ignore in PYTHON_LIB_IGNORE_PACKAGES:
264 if ignore in package:
265 return True
266 return False
268 # the loop is quite slow but it doesn't matter for this script
269 filenames = list()
270 python_lib_directory_len = len(PYTHON_LIB_DIRECTORY)
271 for base, dirs, files in os.walk(path):
272 package = base[(python_lib_directory_len + 1):]
273 if ignore_package(package):
274 continue
275 for filename in files:
276 module_name = os.path.join(package, filename)
277 if module_name in PYTHON_LIB_IGNORE_MODULES:
278 continue
279 if filename.endswith('.py'):
280 module_filename = os.path.join(base, filename)
281 filenames.append(module_filename)
282 return filenames
285 #----------------------------------------------------------------------
286 def main():
287 # process files given on command line
288 args = sys.argv[1:]
289 if not args:
290 args = get_module_filenames(PYTHON_LIB_DIRECTORY)
292 parser = Parser()
293 parser.add_builtins()
295 for filename in args:
296 parser.process_file(filename)
298 parser.write_to_file(tag_filename)
301 if __name__ == '__main__':
302 main()