Replace tabs by spaces.
[geany-mirror.git] / scripts / create_py_tags.py
blob04d96cd130f89c5dea0cd40c8243551fd4a8c4af
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Author: Enrico Tröger
5 # License: GPL v2 or later
7 # (based on the script at http://svn.python.org/view/*checkout*/python/trunk/Tools/scripts/ptags.py)
9 # This script should be run in the top source directory.
11 # Parses all files given on command line for Python classes or functions and write
12 # them into data/python.tags (internal tagmanager format).
13 # If called without command line arguments, a preset of common Python libs is used.
15 import datetime
16 import imp
17 import inspect
18 import re
19 import sys
20 import types
23 # (from tagmanager/tm_tag.c:32)
24 TA_NAME = '%c' % 200,
25 TA_TYPE = '%c' % 204
26 TA_ARGLIST = '%c' % 205
27 TA_SCOPE = '%c' % 206
29 # TMTagType (tagmanager/tm_tag.h:47)
30 TYPE_CLASS = '%d' % 1
31 TYPE_FUNCTION = '%d' % 128
33 tag_filename = 'data/python.tags'
34 tag_regexp = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]'
37 class Parser:
39 #----------------------------------------------------------------------
40 def __init__(self):
41 self.tags = {}
42 self.re_matcher = re.compile(tag_regexp)
44 #----------------------------------------------------------------------
45 def _get_superclass(self, _object):
46 """
47 Python class base-finder
48 (found on http://mail.python.org/pipermail/python-list/2002-November/173949.html)
50 @param _object (object)
51 @return superclass (object)
52 """
53 try:
54 #~ TODO print inspect.getmro(c)
55 if type(_object) == types.ClassType:
56 return _object.__bases__[0].__name__
57 else:
58 return _object.__mro__[1].__name__
59 except IndexError:
60 return ''
62 #----------------------------------------------------------------------
63 def _formatargspec(self, args, varargs=None, varkw=None, defaults=None,
64 formatarg=str,
65 formatvarargs=lambda name: '*' + name,
66 formatvarkw=lambda name: '**' + name,
67 formatvalue=lambda value: '=' + repr(value),
68 join=inspect.joinseq):
69 """Format an argument spec from the 4 values returned by getargspec.
71 The first four arguments are (args, varargs, varkw, defaults). The
72 other four arguments are the corresponding optional formatting functions
73 that are called to turn names and values into strings. The ninth
74 argument is an optional function to format the sequence of arguments."""
75 specs = []
76 if defaults:
77 firstdefault = len(args) - len(defaults)
78 for i in range(len(args)):
79 spec = inspect.strseq(args[i], formatarg, join)
80 if defaults and i >= firstdefault:
81 d = defaults[i - firstdefault]
82 # this is the difference from the original formatargspec() function
83 # to use nicer names then the default repr() output
84 if hasattr(d, '__name__'):
85 d = d.__name__
86 spec = spec + formatvalue(d)
87 specs.append(spec)
88 if varargs is not None:
89 specs.append(formatvarargs(varargs))
90 if varkw is not None:
91 specs.append(formatvarkw(varkw))
92 return ', '.join(specs)
94 #----------------------------------------------------------------------
95 def _add_tag(self, obj, tag_type, parent=''):
96 """
97 Verify the found tag name and if it is valid, add it to the list
99 @param obj (instance)
100 @param tag_type (str)
101 @param parent (str)
103 args = ''
104 scope = ''
105 try:
106 args = apply(self._formatargspec, inspect.getargspec(obj))
107 except (TypeError, KeyError):
108 pass
109 if parent:
110 if tag_type == TYPE_CLASS:
111 args = '(%s)' % parent
112 else:
113 scope = '%s%s' % (TA_SCOPE, parent)
114 tagname = obj.__name__
115 # check for duplicates
116 if len(tagname) < 4:
117 # skip short tags
118 return
119 tag = '%s%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args, scope)
120 if not tagname in self.tags:
121 self.tags[tagname] = tag
123 #----------------------------------------------------------------------
124 def process_file(self, filename):
126 Read the file specified by filename and look for class and function definitions
128 @param filename (str)
130 try:
131 module = imp.load_source('tags_file_module', filename)
132 except IOError, e:
133 # file not found
134 print '%s: %s' % (filename, e)
135 return
136 except Exception:
137 module = None
139 if module:
140 symbols = inspect.getmembers(module, callable)
141 for obj_name, obj in symbols:
142 try:
143 name = obj.__name__
144 except AttributeError:
145 name = obj_name
146 if not name or not isinstance(name, basestring) or name.startswith('_'):
147 # skip non-public tags
148 continue
149 if inspect.isfunction(obj):
150 self._add_tag(obj, TYPE_FUNCTION)
151 elif inspect.isclass(obj):
152 self._add_tag(obj, TYPE_CLASS, self._get_superclass(obj))
153 try:
154 methods = inspect.getmembers(obj, inspect.ismethod)
155 except AttributeError:
156 methods = []
157 for m_name, m_obj in methods:
158 # skip non-public tags
159 if m_name.startswith('_') or not inspect.ismethod(m_obj):
160 continue
161 self._add_tag(m_obj, TYPE_FUNCTION, name)
162 else:
163 # plain regular expression based parsing
164 filep = open(filename)
165 for line in filep:
166 m = self.re_matcher.match(line)
167 if m:
168 tag_type_str, tagname, args = m.groups()
169 if not tagname or tagname.startswith('_'):
170 # skip non-public tags
171 continue
172 if tag_type_str == 'class':
173 tag_type = TYPE_CLASS
174 else:
175 tag_type = TYPE_FUNCTION
176 args = args.strip()
177 tag = '%s%s%s%s%s\n' % (tagname, TA_TYPE, tag_type, TA_ARGLIST, args)
178 if not tagname in self.tags:
179 self.tags[tagname] = tag
180 filep.close()
182 #----------------------------------------------------------------------
183 def write_to_file(self, filename):
185 Sort the found tags and write them into the file specified by filename
187 @param filename (str)
189 result = self.tags.values()
190 # sort the tags
191 result.sort()
192 # write them
193 target_file = open(filename, 'wb')
194 target_file.write(
195 '# format=tagmanager - Automatically generated file - do not edit (created on %s)\n' % \
196 datetime.datetime.now().ctime())
197 for symbol in result:
198 if not symbol == '\n': # skip empty lines
199 target_file.write(symbol)
200 target_file.close()
204 # files to include if none were specified on command line
205 # (this list was created manually and probably needs review for sensible input files)
206 default_files = map(lambda x: '/usr/lib/python2.5/' + x,
207 [ 'anydbm.py', 'asynchat.py', 'asyncore.py', 'audiodev.py', 'base64.py', 'BaseHTTPServer.py',
208 'Bastion.py', 'bdb.py', 'binhex.py', 'bisect.py', 'calendar.py', 'CGIHTTPServer.py',
209 'cgi.py', 'cgitb.py', 'chunk.py', 'cmd.py', 'codecs.py', 'codeop.py', 'code.py', 'colorsys.py',
210 'commands.py', 'compileall.py', 'ConfigParser.py', 'contextlib.py', 'cookielib.py', 'Cookie.py',
211 'copy.py', 'copy_reg.py', 'cProfile.py', 'csv.py', 'dbhash.py', 'decimal.py', 'difflib.py',
212 'dircache.py', 'dis.py', 'DocXMLRPCServer.py', 'filecmp.py', 'fileinput.py', 'fnmatch.py',
213 'formatter.py', 'fpformat.py', 'ftplib.py', 'functools.py', 'getopt.py', 'getpass.py', 'gettext.py',
214 'glob.py', 'gopherlib.py', 'gzip.py', 'hashlib.py', 'heapq.py', 'hmac.py', 'htmlentitydefs.py',
215 'htmllib.py', 'HTMLParser.py', 'httplib.py', 'ihooks.py', 'imaplib.py', 'imghdr.py', 'imputil.py',
216 'inspect.py', 'keyword.py', 'linecache.py', 'locale.py', 'mailbox.py', 'mailcap.py', 'markupbase.py',
217 'md5.py', 'mhlib.py', 'mimetools.py', 'mimetypes.py', 'MimeWriter.py', 'mimify.py',
218 'modulefinder.py', 'multifile.py', 'mutex.py', 'netrc.py', 'nntplib.py', 'ntpath.py',
219 'nturl2path.py', 'opcode.py', 'optparse.py', 'os2emxpath.py', 'os.py', 'pdb.py', 'pickle.py',
220 'pickletools.py', 'pipes.py', 'pkgutil.py', 'platform.py', 'plistlib.py', 'popen2.py',
221 'poplib.py', 'posixfile.py', 'posixpath.py', 'pprint.py', 'pty.py', 'py_compile.py', 'pydoc.py',
222 'Queue.py', 'quopri.py', 'random.py', 'repr.py', 're.py', 'rexec.py', 'rfc822.py', 'rlcompleter.py',
223 'robotparser.py', 'runpy.py', 'sched.py', 'sets.py', 'sha.py', 'shelve.py', 'shlex.py', 'shutil.py',
224 'SimpleHTTPServer.py', 'SimpleXMLRPCServer.py', 'site.py', 'smtpd.py', 'smtplib.py', 'sndhdr.py',
225 'socket.py', 'SocketServer.py', 'stat.py', 'statvfs.py', 'StringIO.py', 'stringold.py',
226 'stringprep.py', 'string.py', '_strptime.py', 'struct.py', 'subprocess.py', 'sunaudio.py',
227 'sunau.py', 'symbol.py', 'symtable.py', 'tabnanny.py', 'tarfile.py', 'telnetlib.py', 'tempfile.py',
228 'textwrap.py', 'this.py', 'threading.py', 'timeit.py', 'toaiff.py', 'tokenize.py', 'token.py',
229 'traceback.py', 'trace.py', 'tty.py', 'types.py', 'unittest.py', 'urllib2.py', 'urllib.py',
230 'urlparse.py', 'UserDict.py', 'UserList.py', 'user.py', 'UserString.py', 'uuid.py', 'uu.py',
231 'warnings.py', 'wave.py', 'weakref.py', 'webbrowser.py', 'whichdb.py', 'xdrlib.py', 'zipfile.py'
235 def main():
236 # process files given on command line
237 args = sys.argv[1:]
238 if not args:
239 args = default_files
241 parser = Parser()
243 for filename in args:
244 parser.process_file(filename)
246 parser.write_to_file(tag_filename)
249 if __name__ == '__main__':
250 main()