One more Document Tree documentation review.
[docutils.git] / docutils / tools / buildhtml.py
bloba3e653777f2e010ef7b6bdbec47d6b41f768ce91
1 #!/usr/bin/env python3
3 # $Id$
4 # Author: David Goodger <goodger@python.org>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Generate .html from all reStructuredText files in a directory.
10 Source files are understood to be standalone reStructuredText documents.
11 Files with names starting ``pep-`` are interpreted as reStructuredText PEPs.
12 """
14 from __future__ import annotations
16 __docformat__ = 'reStructuredText'
18 from pathlib import Path
20 try:
21 import locale
22 locale.setlocale(locale.LC_ALL, '')
23 except Exception:
24 pass
26 import os
27 import os.path
28 import sys
29 import warnings
30 from fnmatch import fnmatch
31 from types import SimpleNamespace
32 from typing import TYPE_CHECKING
34 import docutils
35 import docutils.io
36 from docutils import core, frontend, ApplicationError
37 from docutils.parsers import rst
38 from docutils.utils import relative_path
39 from docutils.readers import standalone, pep
40 from docutils.writers import html4css1, html5_polyglot, pep_html
42 if TYPE_CHECKING:
43 from typing import Literal
45 from docutils.frontend import Values
47 usage = '%prog [options] [<directory> ...]'
48 description = ('Generate .html from all reStructuredText files '
49 'in each <directory> (default is the current directory).')
52 class SettingsSpec(docutils.SettingsSpec):
54 """
55 Runtime settings & command-line options for the "buildhtml" front end.
56 """
58 prune_default = ['/*/.hg', '/*/.bzr', '/*/.git', '/*/.svn',
59 '/*/.venv', '/*/__pycache__']
60 sources_default = ['*.rst', '*.txt']
62 # Can't be included in OptionParser below because we don't want to
63 # override the base class.
64 settings_spec = (
65 'Build-HTML Options',
66 None,
67 (('Process all files matching any of the given '
68 'glob-style patterns (separated by colons). '
69 'This option overwrites the default or config-file values. '
70 f'Default: "{":".join(sources_default)}".',
71 ['--sources'],
72 {'metavar': '<patterns>',
73 'default': sources_default,
74 'validator': frontend.validate_colon_separated_string_list}),
75 ('Recursively ignore files matching any of the given '
76 'glob-style patterns (separated by colons). '
77 'This option may be used more than once to add more patterns.',
78 ['--ignore'],
79 {'metavar': '<patterns>', 'action': 'append',
80 'default': [],
81 'validator': frontend.validate_colon_separated_string_list}),
82 ('Do not scan subdirectories for files to process.',
83 ['--local'], {'dest': 'recurse', 'action': 'store_false'}),
84 ('Recursively scan subdirectories for files to process. This is '
85 'the default.',
86 ['--recurse'],
87 {'action': 'store_true', 'default': 1,
88 'validator': frontend.validate_boolean}),
89 ('Do not process files in <directory> (glob-style patterns, '
90 'separated by colons). This option may be used '
91 'more than once to add more patterns. Default: "%s".'
92 % ':'.join(prune_default),
93 ['--prune'],
94 {'metavar': '<directory>', 'action': 'append',
95 'validator': frontend.validate_colon_separated_string_list,
96 'default': prune_default}),
97 ('Docutils writer, one of "html", "html4", "html5". '
98 'Default: "html" (use Docutils\' default HTML writer).',
99 ['--writer'],
100 {'metavar': '<writer>',
101 'choices': ['html', 'html4', 'html5'],
102 # 'default': 'html' (set below)
104 (frontend.SUPPRESS_HELP, # Obsoleted by "--writer"
105 ['--html-writer'],
106 {'metavar': '<writer>',
107 'choices': ['html', 'html4', 'html5']}),
108 ('Work silently (no progress messages). Independent of "--quiet".',
109 ['--silent'],
110 {'action': 'store_true', 'validator': frontend.validate_boolean}),
111 ('Do not process files, show files that would be processed.',
112 ['--dry-run'],
113 {'action': 'store_true', 'validator': frontend.validate_boolean}),))
115 relative_path_settings = ('prune',)
116 config_section = 'buildhtml application'
117 config_section_dependencies = ('applications',)
120 class OptionParser(frontend.OptionParser):
123 Command-line option processing for the ``buildhtml.py`` front end.
126 def check_values(self, values: Values, args: list[str]) -> Values:
127 super().check_values(values, args)
128 values._source = None
129 return values
131 def check_args(self, args: list[str]) -> tuple[None, None]:
132 self.values._directories = args or [os.getcwd()]
133 # backwards compatibility:
134 return None, None
137 class Struct(SimpleNamespace):
138 components: tuple[docutils.SettingsSpec, ...]
139 reader: str
140 writer: str
141 option_parser: OptionParser
142 setting_defaults: Values
143 config_settings: Values
146 class Builder:
147 publishers: dict[str, Struct] = {
148 '': Struct(
149 components=(
150 pep.Reader, rst.Parser, pep_html.Writer, SettingsSpec,
153 'html4': Struct(
154 components=(
155 rst.Parser, standalone.Reader, html4css1.Writer, SettingsSpec,
157 reader='standalone',
158 writer='html4',
160 'html5': Struct(
161 components=(
162 rst.Parser, standalone.Reader, html5_polyglot.Writer,
163 SettingsSpec,
165 reader='standalone',
166 writer='html5',
168 'PEPs': Struct(
169 components=(
170 rst.Parser, pep.Reader, pep_html.Writer, SettingsSpec,
172 reader='pep',
173 writer='pep_html',
176 """Publisher-specific settings. Key '' is for the front-end script
177 itself. ``self.publishers[''].components`` must contain a superset of
178 all components used by individual publishers."""
180 def __init__(self) -> None:
181 self.publishers = self.publishers.copy()
182 with warnings.catch_warnings():
183 warnings.filterwarnings('ignore', category=DeprecationWarning)
184 self.settings_spec = frontend.Values()
185 self.initial_settings = frontend.Values()
186 self.directories = []
188 self.setup_publishers()
189 # default html writer (may change to html5 some time):
190 self.publishers['html'] = self.publishers['html4']
192 def setup_publishers(self) -> None:
194 Manage configurations for individual publishers.
196 Each publisher (combination of parser, reader, and writer) may have
197 its own configuration defaults, which must be kept separate from those
198 of the other publishers. Setting defaults are combined with the
199 config file settings and command-line options by
200 `self.get_settings()`.
202 with warnings.catch_warnings():
203 warnings.filterwarnings('ignore', category=DeprecationWarning)
204 for publisher in self.publishers.values():
205 option_parser = OptionParser(
206 components=publisher.components, read_config_files=True,
207 usage=usage, description=description)
208 publisher.option_parser = option_parser
209 publisher.setting_defaults = option_parser.get_default_values()
210 frontend.make_paths_absolute(
211 publisher.setting_defaults.__dict__,
212 list(option_parser.relative_path_settings))
213 publisher.config_settings = (
214 option_parser.get_standard_config_settings())
215 self.settings_spec = self.publishers[''].option_parser.parse_args(
216 values=frontend.Values()) # no defaults; just the cmdline opts
217 self.initial_settings = self.get_settings('')
219 if self.initial_settings.html_writer is not None:
220 warnings.warn('The configuration setting "html_writer" '
221 'will be removed in Docutils 2.0. '
222 'Use setting "writer" instead.',
223 FutureWarning, stacklevel=5)
224 if self.initial_settings.writer is None:
225 self.initial_settings.writer = (self.initial_settings.html_writer
226 or 'html')
228 def get_settings(
229 self,
230 publisher_name: Literal['', 'html', 'html5', 'html4', 'PEPs'],
231 directory: str | os.PathLike[str] | None = None,
232 ) -> Values:
234 Return a settings object, from multiple sources.
236 Copy the setting defaults, overlay the startup config file settings,
237 then the local config file settings, then the command-line options.
239 If `directory` is not None, it is searched for a file "docutils.conf"
240 which is parsed after standard configuration files.
241 Path settings in this configuration file are resolved relative
242 to `directory`, not the current working directory.
244 publisher = self.publishers[publisher_name]
245 with warnings.catch_warnings():
246 warnings.filterwarnings('ignore', category=DeprecationWarning)
247 settings = frontend.Values(publisher.setting_defaults.__dict__)
248 settings.update(publisher.config_settings, publisher.option_parser)
249 if directory:
250 local_config = publisher.option_parser.get_config_file_settings(
251 os.path.join(directory, 'docutils.conf'))
252 frontend.make_paths_absolute(
253 local_config,
254 list(publisher.option_parser.relative_path_settings),
255 directory)
256 settings.update(local_config, publisher.option_parser)
257 settings.update(self.settings_spec.__dict__, publisher.option_parser)
258 # remove duplicate entries from "appending" settings:
259 settings.ignore = list(set(settings.ignore))
260 settings.prune = list(set(settings.prune))
261 return settings
263 def run(
264 self,
265 directory: str | os.PathLike[str] | None = None,
266 recurse: bool = True,
267 ) -> None:
268 recurse = recurse and self.initial_settings.recurse
269 if directory:
270 self.directories = [directory]
271 elif self.settings_spec._directories:
272 self.directories = self.settings_spec._directories
273 else:
274 self.directories = [os.getcwd()]
275 for directory in self.directories:
276 dir_abs = Path(directory).resolve()
277 for dirpath, dirnames, filenames in os.walk(dir_abs):
278 # `os.walk()` by default recurses down the tree,
279 # we modify `dirnames` in-place to control the behaviour.
280 if recurse:
281 dirnames.sort()
282 else:
283 del dirnames[:]
284 self.visit(Path(dirpath), dirnames, filenames)
286 def visit(
287 self,
288 dirpath: Path,
289 dirnames: list[str],
290 filenames: list[str],
291 ) -> None:
292 settings = self.get_settings('', dirpath)
293 errout = docutils.io.ErrorOutput(encoding=settings.error_encoding)
294 if match_patterns(dirpath, settings.prune):
295 errout.write('/// ...Skipping directory (pruned): %s\n'
296 % relative_path(None, dirpath))
297 sys.stderr.flush()
298 dirnames.clear() # modify in-place to control `os.walk()` run
299 return
300 if not self.initial_settings.silent:
301 errout.write('/// Processing directory: %s\n'
302 % relative_path(None, dirpath))
303 sys.stderr.flush()
304 for name in sorted(filenames):
305 if match_patterns(name, settings.ignore):
306 continue
307 if match_patterns(name, settings.sources):
308 self.process_rst_source_file(dirpath, name)
310 def process_rst_source_file(self, directory: Path, name: str) -> None:
311 if name.startswith('pep-'):
312 publisher = 'PEPs'
313 else:
314 publisher = self.initial_settings.writer
315 settings = self.get_settings(publisher, directory)
316 errout = docutils.io.ErrorOutput(encoding=settings.error_encoding)
317 pub_struct = self.publishers[publisher]
318 settings._source = str(directory / name)
319 settings._destination = os.path.splitext(settings._source)[0] + '.html'
320 if not self.initial_settings.silent:
321 errout.write(' ::: Processing: %s\n' % name)
322 sys.stderr.flush()
323 if not settings.dry_run:
324 try:
325 core.publish_file(source_path=settings._source,
326 destination_path=settings._destination,
327 reader=pub_struct.reader,
328 parser='restructuredtext',
329 writer=pub_struct.writer,
330 settings=settings)
331 except ApplicationError as err:
332 errout.write(f' {type(err).__name__}: {err}\n')
335 def match_patterns(name: str | os.PathLike[str], patterns: str) -> bool:
336 """Return True, if `name` matches any item of the sequence `patterns`.
338 Matching is done with `fnmatch.fnmatch`. It resembles shell-style
339 globbing, but without special treatment of path separators and '.'
340 (in contrast to the `glob module` and `pathlib.PurePath.match()`).
341 For example, "``/*.py``" matches "/a/b/c.py".
343 PROVISIONAL.
344 TODO: use `pathlib.PurePath.match()` once this supports "**".
346 name = os.fspath(name)
347 for pattern in patterns:
348 if fnmatch(name, pattern):
349 return True
350 return False
353 if __name__ == "__main__":
354 Builder().run()