3 # Author: Enrico Tröger
4 # License: GPL v2 or later
6 # (based on the script at https://github.com/python/cpython/blob/83eb827247dd28b13fd816936c74c162e9f52a2d/Tools/scripts/ptags.py)
8 # This script should be run in the top source directory.
10 # Parses all files in the directories given on command line for Python classes or functions and
11 # write them into data/tags/std.py.tags (ctags format).
12 # If called without command line arguments, a preset of common Python libs is used.
15 # Be aware that running this script will actually *import* all modules given on the command line
16 # or in the standard library path of your Python installation. Dependent on what Python modules
17 # you have installed, this might not be what you want and can have weird side effects.
18 # You have been warned.
20 # It should be however relatively safe to execute this script from a fresh Python installation
21 # installed into a dedicated prefix or from an empty virtualenv or ideally in a Docker container
22 # in the Geany project directory:
23 # docker run --rm -it --user $(id -u):$(id -g) -v $(pwd):/data --workdir /data python:3.11-alpine python scripts/create_py_tags.py
33 from pathlib
import Path
35 from create_tags_helper
import format_tag
, write_ctags_file
37 # treat all DeprecationWarnings as errors so we can catch them to ignore the corresponding modules
38 warnings
.filterwarnings('error', category
=DeprecationWarning)
40 PYTHON_LIB_DIRECTORY
= Path(os
.__file
__).parent
41 PYTHON_LIB_IGNORE_PACKAGES
= ['dist-packages', 'distutils', 'encodings', 'idlelib', 'lib2to3',
42 'site-packages', 'test', 'turtledemo', 'Tools']
43 # some modules/classes are deprecated or execute funky code when they are imported
44 # which we really don't want here (though if you feel funny, try: 'import antigravity')
45 PYTHON_LIB_IGNORE_MODULES
= ('__phello__.foo', 'antigravity', 'asyncio.windows_events',
46 'asyncio.windows_utils', 'ctypes.wintypes', 'ensurepip._bundled',
47 'lib2to3', 'multiprocessing.popen_spawn_win32', 'this', 'turtle')
48 PYTHON_LIB_IGNORE_CLASSES
= ('typing.io', 'typing.re')
52 KIND_FUNCTION
= 'function'
53 KIND_MEMBER
= 'member'
55 TAG_FILENAME
= 'data/tags/std.py.tags'
56 TAG_REGEXP
= re
.compile(r
'^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*(\(.*\))[:]')
57 OBJECT_MEMORY_ADDRESS_REGEXP
= re
.compile(r
'<(.+?) at 0x[0-9a-f]+(?:.+)>', flags
=re
.IGNORECASE
)
59 # pylint: disable=no-else-return,no-self-use
67 def _add_tag(self
, object_name
, object_
, kind
, module_path
=None, parent
=''):
69 Verify the found tag name and if it is valid, add it to the list
71 @param object_ (instance)
75 if len(object_name
) < 4 or is_private_identifier(object_name
):
76 return # skip short and private tags
77 if object_
is not None and not is_relevant_identifier(object_
):
80 tag_key
= (module_path
, parent
, object_name
)
81 if tag_key
not in self
.tags
:
82 signature
= self
._create
_signature
(object_
) if object_
is not None else None
83 self
.tags
[tag_key
] = format_tag(object_name
, kind
, signature
, parent
)
85 def _get_safe_parameter_default_value(self
, value
):
87 Replace possibly sensitive or just much information from the default value
89 # prevent evaluating of `os.environ` in cgi.print_environ(environ=os.environ) which
90 # would lead to include the current full environment variables to be included
92 if isinstance(value
, (dict, os
._Environ
)) and value
: # pylint: disable=protected-access
93 return f
'<default-value-stripped {type(value)}>'
94 if isinstance(value
, str):
95 # remove interpreter paths
96 if sys
.executable
in value
:
97 return '/nonexistent/bin/python3'
98 # remove interpreter paths
99 if sys
.prefix
in value
:
100 return '/nonexistent'
102 # for all other default values, return the string representation,
103 # assuming it is shorter than repr()
104 value_str
= str(value
)
106 # remove object hex addresses, e.g
107 # subTest(self, msg='<object object at 0x7f14bdfcd5a0>', **params)
108 if OBJECT_MEMORY_ADDRESS_REGEXP
.search(value_str
):
109 return OBJECT_MEMORY_ADDRESS_REGEXP
.sub(r
'<\1>', value_str
)
113 def _stringify_parameter_default_if_necessary(self
, parameter
):
115 Replace default values of the parameters with their string variants if they are not
116 basic types. This is to avoid signatures like (`ssl.SSLContext.load_default_certs`):
117 create_default_contextÌ128Í(purpose=<Purpose.SERVER_AUTH: _ASN1Object(nid=129, shortname='serverAuth', longname='TLS Web Server Authentication', oid='1.3.6.1.5.5.7.3.1')>, *, cafile=None, capath=None, cadata=None)ÎSSLContext # noqa pylint: disable=line-too-long
119 create_default_contextÌ128Í(purpose='Purpose.SERVER_AUTH', *, cafile=None, capath=None, cadata=None)
121 This is not perfect as it might suggest that the `purpose` parameter accepts a string.
122 But having the full `repr()` result is even worse.
124 if not parameter
.default
or parameter
.default
is parameter
.empty
:
126 if isinstance(parameter
.default
, (bool, int, float)):
129 new_default
= self
._get
_safe
_parameter
_default
_value
(parameter
.default
)
130 return parameter
.replace(default
=new_default
)
132 def _create_signature(self
, object_
):
134 Create signature for the given `object_`.
137 signature
= inspect
.signature(object_
)
138 except (ValueError, TypeError):
139 # inspect.signature() throws ValueError and TypeError for unsupported callables,
140 # so we need to ignore the signature for this callable
144 for parameter_name
in signature
.parameters
:
145 parameter
= signature
.parameters
[parameter_name
]
146 if parameter
.default
and not isinstance(parameter
.default
, parameter
.empty
):
147 new_parameter
= self
._stringify
_parameter
_default
_if
_necessary
(parameter
)
148 new_parameters
.append(new_parameter
)
150 new_parameters
.append(parameter
)
152 return signature
.replace(parameters
=new_parameters
)
154 def process_module(self
, module_path
, module_filename
):
156 Import the given module path and look for class and function definitions
162 if module_path
.endswith('__main__'):
163 return # ignore any executable modules, importing them would execute the module
166 module
= importlib
.import_module(module_path
)
167 except DeprecationWarning as exc
:
168 print(f
'Ignoring deprecated module "{module_path}" (reason: {exc})')
170 except Exception as exc
:
171 module_error
= str(exc
)
173 symbols
= inspect
.getmembers(module
)
176 self
._process
_module
_with
_inspect
(symbols
, module_path
)
178 # If error is empty, there are probably just no symbols in the module, e.g. on empty
179 # __init__.py files. Try to parse them anyway. But log module_errors.
181 print(f
'Using fallback parser for: {module_path} ({module_filename}, reason: {module_error})')
183 self
._process
_module
_with
_fallback
_parser
(module_filename
)
185 def _process_module_with_inspect(self
, symbols
, module_path
):
187 Try to analyse all symbols in the module as found by `inspect.getmembers`.
189 for obj_name
, obj
in symbols
:
190 if is_import(obj
, module_path
):
193 # function and similar callables
194 if inspect
.isroutine(obj
):
195 self
._add
_tag
(obj_name
, obj
, KIND_FUNCTION
, module_path
)
197 elif inspect
.isclass(obj
):
198 if _ignore_class(module_path
, obj_name
):
200 self
._add
_tag
(obj_name
, obj
, KIND_CLASS
, module_path
)
201 methods
= inspect
.getmembers(obj
)
203 for m_name
, m_obj
in methods
:
204 self
._add
_tag
(m_name
, m_obj
, KIND_MEMBER
, module_path
, parent
=obj_name
)
206 def _process_module_with_fallback_parser(self
, module_filename
):
208 Plain regular expression based parsing, used as fallback if `inspect`'ing the module is not possible
210 with
open(module_filename
, encoding
='utf-8') as filep
:
211 for line_number
, line
in enumerate(filep
):
212 match
= TAG_REGEXP
.match(line
)
214 tag_type_str
, tagname
, args
= match
.groups()
215 if not tagname
or is_private_identifier(tagname
):
217 if tagname
in self
.tags
:
220 kind
= KIND_CLASS
if tag_type_str
== 'class' else KIND_FUNCTION
221 signature
= args
.strip()
222 self
.tags
[tagname
] = format_tag(tagname
, kind
, signature
, parent
=None)
224 def add_builtins(self
):
226 Add the contents of __builtins__ as simple tags
228 builtins
= inspect
.getmembers(__builtins__
)
229 for b_name
, b_obj
in builtins
:
230 if inspect
.isclass(b_obj
):
231 self
._add
_tag
(b_name
, b_obj
, KIND_CLASS
)
232 elif is_relevant_identifier(b_obj
):
233 self
._add
_tag
(b_name
, b_obj
, KIND_FUNCTION
)
235 def write_to_file(self
, filename
):
237 Sort the found tags and write them into the file specified by filename
239 @param filename (str)
241 write_ctags_file(filename
, self
.tags
.values(), sys
.argv
[0])
244 def is_import(object_
, module_path
):
245 object_module
= getattr(object_
, '__module__', None)
246 if object_module
and object_module
!= module_path
:
252 def is_private_identifier(tagname
):
253 return tagname
.startswith('_') or tagname
.endswith('_')
256 def is_relevant_identifier(object_
):
257 # TODO add "inspect.isdatadescriptor" for properties
258 # TODO maybe also consider attributes, e.g. by checking against __dict__ or so
260 inspect
.ismethod(object_
) or \
261 inspect
.isclass(object_
) or \
262 inspect
.isfunction(object_
) or \
263 inspect
.isgeneratorfunction(object_
) or \
264 inspect
.isgenerator(object_
) or \
265 inspect
.iscoroutinefunction(object_
) or \
266 inspect
.iscoroutine(object_
) or \
267 inspect
.isawaitable(object_
) or \
268 inspect
.isasyncgenfunction(object_
) or \
269 inspect
.isasyncgen(object_
) or \
270 inspect
.isroutine(object_
) or \
271 inspect
.isabstract(object_
)
274 def _setup_global_package_ignore_list():
275 """Read the python-config path from LIBPL and strip the prefix part
276 (e.g. /usr/lib/python3.8/config-3.8-x86_64-linux-gnu gets config-3.8-x86_64-linux-gnu)
278 python_config_dir
= Path(sysconfig
.get_config_var('LIBPL'))
280 python_config_package
= python_config_dir
.relative_to(PYTHON_LIB_DIRECTORY
)
282 python_config_package
= python_config_dir
284 PYTHON_LIB_IGNORE_PACKAGES
.append(python_config_package
.as_posix())
287 def _ignore_package(package
):
288 for ignore
in PYTHON_LIB_IGNORE_PACKAGES
:
289 if ignore
in package
:
294 def _ignore_module(module
):
295 return module
in PYTHON_LIB_IGNORE_MODULES
298 def _ignore_class(module
, class_
):
299 return f
'{module}.{class_}' in PYTHON_LIB_IGNORE_CLASSES
302 def _get_module_list(*paths
):
303 # the loop is quite slow but it doesn't matter for this script
306 for module_filename
in path
.rglob('*.py'):
307 module_name
= module_filename
.stem
308 package_path
= module_filename
.relative_to(path
)
309 package
= '.'.join(package_path
.parent
.parts
)
310 # construct full module path (e.g. xml.sax.xmlreader)
311 if module_name
== '__init__':
312 module_path
= package
314 module_path
= f
'{package}.{module_name}'
316 module_path
= module_name
318 # ignore unwanted modules and packages
319 if _ignore_package(package
):
321 if _ignore_module(module_path
):
324 modules
.append((module_path
, module_filename
))
326 # sort module list for nicer output
327 return sorted(modules
)
331 _setup_global_package_ignore_list()
332 # process modules given on command line
335 modules
= _get_module_list(*args
)
337 modules
= _get_module_list(PYTHON_LIB_DIRECTORY
)
340 parser
.add_builtins()
342 for module_path
, module_filename
in modules
:
344 parser
.process_module(module_path
, module_filename
)
345 except Exception as exc
:
346 print(f
'{exc.__class__.__name__} in {module_path}: {exc}')
349 parser
.write_to_file(TAG_FILENAME
)
352 if __name__
== '__main__':