Remove unused code
[geany-mirror.git] / scripts / create_py_tags.py
blobdb733ae07f59f6d05c1f87d2d4186681112c9f9f
1 #!/usr/bin/env python3
3 # Author: Enrico Tröger
4 # License: GPL v2 or later
6 # (based on the script at https://github.com/python/cpython/blob/83eb827247dd28b13fd816936c74c162e9f52a2d/Tools/scripts/ptags.py)
8 # This script should be run in the top source directory.
10 # Parses all files in the directories given on command line for Python classes or functions and
11 # write them into data/tags/std.py.tags (ctags format).
12 # If called without command line arguments, a preset of common Python libs is used.
14 # WARNING
15 # Be aware that running this script will actually *import* all modules given on the command line
16 # or in the standard library path of your Python installation. Dependent on what Python modules
17 # you have installed, this might not be what you want and can have weird side effects.
18 # You have been warned.
20 # It should be however relatively safe to execute this script from a fresh Python installation
21 # installed into a dedicated prefix or from an empty virtualenv or ideally in a Docker container
22 # in the Geany project directory:
23 # docker run --rm -it --user $(id -u):$(id -g) -v $(pwd):/data --workdir /data python:3.11-alpine python scripts/create_py_tags.py
26 import importlib.util
27 import inspect
28 import os
29 import re
30 import sys
31 import sysconfig
32 import warnings
33 from pathlib import Path
35 from create_tags_helper import format_tag, write_ctags_file
37 # treat all DeprecationWarnings as errors so we can catch them to ignore the corresponding modules
38 warnings.filterwarnings('error', category=DeprecationWarning)
40 PYTHON_LIB_DIRECTORY = Path(os.__file__).parent
41 PYTHON_LIB_IGNORE_PACKAGES = ['dist-packages', 'distutils', 'encodings', 'idlelib', 'lib2to3',
42 'site-packages', 'test', 'turtledemo', 'Tools']
43 # some modules/classes are deprecated or execute funky code when they are imported
44 # which we really don't want here (though if you feel funny, try: 'import antigravity')
45 PYTHON_LIB_IGNORE_MODULES = ('__phello__.foo', 'antigravity', 'asyncio.windows_events',
46 'asyncio.windows_utils', 'ctypes.wintypes', 'ensurepip._bundled',
47 'lib2to3', 'multiprocessing.popen_spawn_win32', 'this', 'turtle')
48 PYTHON_LIB_IGNORE_CLASSES = ('typing.io', 'typing.re')
50 # Python kinds
51 KIND_CLASS = 'class'
52 KIND_FUNCTION = 'function'
53 KIND_MEMBER = 'member'
55 TAG_FILENAME = 'data/tags/std.py.tags'
56 TAG_REGEXP = re.compile(r'^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]')
57 OBJECT_MEMORY_ADDRESS_REGEXP = re.compile(r'<(.+?) at 0x[0-9a-f]+(?:.+)>', flags=re.IGNORECASE)
59 # pylint: disable=no-else-return,no-self-use
62 class Parser:
64 def __init__(self):
65 self.tags = {}
67 def _add_tag(self, object_name, object_, kind, module_path=None, parent=''):
68 """
69 Verify the found tag name and if it is valid, add it to the list
71 @param object_ (instance)
72 @param tag_type (str)
73 @param parent (str)
74 """
75 if len(object_name) < 4 or is_private_identifier(object_name):
76 return # skip short and private tags
77 if object_ is not None and not is_relevant_identifier(object_):
78 return
80 tag_key = (module_path, parent, object_name)
81 if tag_key not in self.tags:
82 signature = self._create_signature(object_) if object_ is not None else None
83 self.tags[tag_key] = format_tag(object_name, kind, signature, parent)
85 def _get_safe_parameter_default_value(self, value):
86 """
87 Replace possibly sensitive or just much information from the default value
88 """
89 # prevent evaluating of `os.environ` in cgi.print_environ(environ=os.environ) which
90 # would lead to include the current full environment variables to be included
91 # in the tags file
92 if isinstance(value, (dict, os._Environ)) and value: # pylint: disable=protected-access
93 return f'<default-value-stripped {type(value)}>'
94 if isinstance(value, str):
95 # remove interpreter paths
96 if sys.executable in value:
97 return '/nonexistent/bin/python3'
98 # remove interpreter paths
99 if sys.prefix in value:
100 return '/nonexistent'
102 # for all other default values, return the string representation,
103 # assuming it is shorter than repr()
104 value_str = str(value)
106 # remove object hex addresses, e.g
107 # subTest(self, msg='<object object at 0x7f14bdfcd5a0>', **params)
108 if OBJECT_MEMORY_ADDRESS_REGEXP.search(value_str):
109 return OBJECT_MEMORY_ADDRESS_REGEXP.sub(r'<\1>', value_str)
111 return value_str
113 def _stringify_parameter_default_if_necessary(self, parameter):
115 Replace default values of the parameters with their string variants if they are not
116 basic types. This is to avoid signatures like (`ssl.SSLContext.load_default_certs`):
117 create_default_contextÌ128Í(purpose=<Purpose.SERVER_AUTH: _ASN1Object(nid=129, shortname='serverAuth', longname='TLS Web Server Authentication', oid='1.3.6.1.5.5.7.3.1')>, *, cafile=None, capath=None, cadata=None)ÎSSLContext # noqa pylint: disable=line-too-long
118 and create instead:
119 create_default_contextÌ128Í(purpose='Purpose.SERVER_AUTH', *, cafile=None, capath=None, cadata=None)
121 This is not perfect as it might suggest that the `purpose` parameter accepts a string.
122 But having the full `repr()` result is even worse.
124 if not parameter.default or parameter.default is parameter.empty:
125 return parameter
126 if isinstance(parameter.default, (bool, int, float)):
127 return parameter
129 new_default = self._get_safe_parameter_default_value(parameter.default)
130 return parameter.replace(default=new_default)
132 def _create_signature(self, object_):
134 Create signature for the given `object_`.
136 try:
137 signature = inspect.signature(object_)
138 except (ValueError, TypeError):
139 # inspect.signature() throws ValueError and TypeError for unsupported callables,
140 # so we need to ignore the signature for this callable
141 return ''
143 new_parameters = []
144 for parameter_name in signature.parameters:
145 parameter = signature.parameters[parameter_name]
146 if parameter.default and not isinstance(parameter.default, parameter.empty):
147 new_parameter = self._stringify_parameter_default_if_necessary(parameter)
148 new_parameters.append(new_parameter)
149 else:
150 new_parameters.append(parameter)
152 return signature.replace(parameters=new_parameters)
154 def process_module(self, module_path, module_filename):
156 Import the given module path and look for class and function definitions
158 module = None
159 symbols = None
160 module_error = None
162 if module_path.endswith('__main__'):
163 return # ignore any executable modules, importing them would execute the module
165 try:
166 module = importlib.import_module(module_path)
167 except DeprecationWarning as exc:
168 print(f'Ignoring deprecated module "{module_path}" (reason: {exc})')
169 return
170 except Exception as exc:
171 module_error = str(exc)
172 else:
173 symbols = inspect.getmembers(module)
175 if symbols:
176 self._process_module_with_inspect(symbols, module_path)
177 else:
178 # If error is empty, there are probably just no symbols in the module, e.g. on empty
179 # __init__.py files. Try to parse them anyway. But log module_errors.
180 if module_error:
181 print(f'Using fallback parser for: {module_path} ({module_filename}, reason: {module_error})')
183 self._process_module_with_fallback_parser(module_filename)
185 def _process_module_with_inspect(self, symbols, module_path):
187 Try to analyse all symbols in the module as found by `inspect.getmembers`.
189 for obj_name, obj in symbols:
190 if is_import(obj, module_path):
191 continue
193 # function and similar callables
194 if inspect.isroutine(obj):
195 self._add_tag(obj_name, obj, KIND_FUNCTION, module_path)
196 # class
197 elif inspect.isclass(obj):
198 if _ignore_class(module_path, obj_name):
199 continue
200 self._add_tag(obj_name, obj, KIND_CLASS, module_path)
201 methods = inspect.getmembers(obj)
202 # methods
203 for m_name, m_obj in methods:
204 self._add_tag(m_name, m_obj, KIND_MEMBER, module_path, parent=obj_name)
206 def _process_module_with_fallback_parser(self, module_filename):
208 Plain regular expression based parsing, used as fallback if `inspect`'ing the module is not possible
210 with open(module_filename, encoding='utf-8') as filep:
211 for line_number, line in enumerate(filep):
212 match = TAG_REGEXP.match(line)
213 if match:
214 tag_type_str, tagname, args = match.groups()
215 if not tagname or is_private_identifier(tagname):
216 continue
217 if tagname in self.tags:
218 continue
220 kind = KIND_CLASS if tag_type_str == 'class' else KIND_FUNCTION
221 signature = args.strip()
222 self.tags[tagname] = format_tag(tagname, kind, signature, parent=None)
224 def add_builtins(self):
226 Add the contents of __builtins__ as simple tags
228 builtins = inspect.getmembers(__builtins__)
229 for b_name, b_obj in builtins:
230 if inspect.isclass(b_obj):
231 self._add_tag(b_name, b_obj, KIND_CLASS)
232 elif is_relevant_identifier(b_obj):
233 self._add_tag(b_name, b_obj, KIND_FUNCTION)
235 def write_to_file(self, filename):
237 Sort the found tags and write them into the file specified by filename
239 @param filename (str)
241 write_ctags_file(filename, self.tags.values(), sys.argv[0])
244 def is_import(object_, module_path):
245 object_module = getattr(object_, '__module__', None)
246 if object_module and object_module != module_path:
247 return True
249 return False
252 def is_private_identifier(tagname):
253 return tagname.startswith('_') or tagname.endswith('_')
256 def is_relevant_identifier(object_):
257 # TODO add "inspect.isdatadescriptor" for properties
258 # TODO maybe also consider attributes, e.g. by checking against __dict__ or so
259 return \
260 inspect.ismethod(object_) or \
261 inspect.isclass(object_) or \
262 inspect.isfunction(object_) or \
263 inspect.isgeneratorfunction(object_) or \
264 inspect.isgenerator(object_) or \
265 inspect.iscoroutinefunction(object_) or \
266 inspect.iscoroutine(object_) or \
267 inspect.isawaitable(object_) or \
268 inspect.isasyncgenfunction(object_) or \
269 inspect.isasyncgen(object_) or \
270 inspect.isroutine(object_) or \
271 inspect.isabstract(object_)
274 def _setup_global_package_ignore_list():
275 """Read the python-config path from LIBPL and strip the prefix part
276 (e.g. /usr/lib/python3.8/config-3.8-x86_64-linux-gnu gets config-3.8-x86_64-linux-gnu)
278 python_config_dir = Path(sysconfig.get_config_var('LIBPL'))
279 try:
280 python_config_package = python_config_dir.relative_to(PYTHON_LIB_DIRECTORY)
281 except ValueError:
282 python_config_package = python_config_dir
284 PYTHON_LIB_IGNORE_PACKAGES.append(python_config_package.as_posix())
287 def _ignore_package(package):
288 for ignore in PYTHON_LIB_IGNORE_PACKAGES:
289 if ignore in package:
290 return True
291 return False
294 def _ignore_module(module):
295 return module in PYTHON_LIB_IGNORE_MODULES
298 def _ignore_class(module, class_):
299 return f'{module}.{class_}' in PYTHON_LIB_IGNORE_CLASSES
302 def _get_module_list(*paths):
303 # the loop is quite slow but it doesn't matter for this script
304 modules = []
305 for path in paths:
306 for module_filename in path.rglob('*.py'):
307 module_name = module_filename.stem
308 package_path = module_filename.relative_to(path)
309 package = '.'.join(package_path.parent.parts)
310 # construct full module path (e.g. xml.sax.xmlreader)
311 if module_name == '__init__':
312 module_path = package
313 elif package:
314 module_path = f'{package}.{module_name}'
315 else:
316 module_path = module_name
318 # ignore unwanted modules and packages
319 if _ignore_package(package):
320 continue
321 if _ignore_module(module_path):
322 continue
324 modules.append((module_path, module_filename))
326 # sort module list for nicer output
327 return sorted(modules)
330 def main():
331 _setup_global_package_ignore_list()
332 # process modules given on command line
333 args = sys.argv[1:]
334 if args:
335 modules = _get_module_list(*args)
336 else:
337 modules = _get_module_list(PYTHON_LIB_DIRECTORY)
339 parser = Parser()
340 parser.add_builtins()
342 for module_path, module_filename in modules:
343 try:
344 parser.process_module(module_path, module_filename)
345 except Exception as exc:
346 print(f'{exc.__class__.__name__} in {module_path}: {exc}')
347 raise
349 parser.write_to_file(TAG_FILENAME)
352 if __name__ == '__main__':
353 main()