docstring fix
[docutils.git] / docutils / docutils / utils.py
blobf925164738af4246c30a4a0ac585123e63f48bb3
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Miscellaneous utilities for the documentation utilities.
7 """
9 __docformat__ = 'reStructuredText'
11 import sys
12 import os
13 import os.path
14 import warnings
15 import unicodedata
16 from docutils import ApplicationError, DataError
17 from docutils import nodes
20 class SystemMessage(ApplicationError):
22 def __init__(self, system_message, level):
23 Exception.__init__(self, system_message.astext())
24 self.level = level
27 class SystemMessagePropagation(ApplicationError): pass
30 class Reporter:
32 """
33 Info/warning/error reporter and ``system_message`` element generator.
35 Five levels of system messages are defined, along with corresponding
36 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
38 There is typically one Reporter object per process. A Reporter object is
39 instantiated with thresholds for reporting (generating warnings) and
40 halting processing (raising exceptions), a switch to turn debug output on
41 or off, and an I/O stream for warnings. These are stored as instance
42 attributes.
44 When a system message is generated, its level is compared to the stored
45 thresholds, and a warning or error is generated as appropriate. Debug
46 messages are produced iff the stored debug switch is on, independently of
47 other thresholds. Message output is sent to the stored warning stream if
48 not set to ''.
50 The Reporter class also employs a modified form of the "Observer" pattern
51 [GoF95]_ to track system messages generated. The `attach_observer` method
52 should be called before parsing, with a bound method or function which
53 accepts system messages. The observer can be removed with
54 `detach_observer`, and another added in its place.
56 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
57 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
58 1995.
59 """
61 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
62 """List of names for system message levels, indexed by level."""
64 # system message level constants:
65 (DEBUG_LEVEL,
66 INFO_LEVEL,
67 WARNING_LEVEL,
68 ERROR_LEVEL,
69 SEVERE_LEVEL) = range(5)
71 def __init__(self, source, report_level, halt_level, stream=None,
72 debug=0, encoding='ascii', error_handler='replace'):
73 """
74 :Parameters:
75 - `source`: The path to or description of the source data.
76 - `report_level`: The level at or above which warning output will
77 be sent to `stream`.
78 - `halt_level`: The level at or above which `SystemMessage`
79 exceptions will be raised, halting execution.
80 - `debug`: Show debug (level=0) system messages?
81 - `stream`: Where warning output is sent. Can be file-like (has a
82 ``.write`` method), a string (file name, opened for writing),
83 '' (empty string, for discarding all stream messages) or
84 `None` (implies `sys.stderr`; default).
85 - `encoding`: The encoding for stderr output.
86 - `error_handler`: The error handler for stderr output encoding.
87 """
89 self.source = source
90 """The path to or description of the source data."""
92 self.encoding = encoding
93 """The character encoding for the stderr output."""
95 self.error_handler = error_handler
96 """The character encoding error handler."""
98 self.debug_flag = debug
99 """Show debug (level=0) system messages?"""
101 self.report_level = report_level
102 """The level at or above which warning output will be sent
103 to `self.stream`."""
105 self.halt_level = halt_level
106 """The level at or above which `SystemMessage` exceptions
107 will be raised, halting execution."""
109 if stream is None:
110 stream = sys.stderr
111 elif type(stream) in (str, unicode):
112 # Leave stream untouched if it's ''.
113 if stream != '':
114 if type(stream) == str:
115 stream = open(stream, 'w')
116 elif type(stream) == unicode:
117 stream = open(stream.encode(), 'w')
119 self.stream = stream
120 """Where warning output is sent."""
122 self.observers = []
123 """List of bound methods or functions to call with each system_message
124 created."""
126 self.max_level = -1
127 """The highest level system message generated so far."""
129 def set_conditions(self, category, report_level, halt_level,
130 stream=None, debug=0):
131 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
132 'set attributes via configuration settings or directly',
133 DeprecationWarning, stacklevel=2)
134 self.report_level = report_level
135 self.halt_level = halt_level
136 if stream is None:
137 stream = sys.stderr
138 self.stream = stream
139 self.debug_flag = debug
141 def attach_observer(self, observer):
143 The `observer` parameter is a function or bound method which takes one
144 argument, a `nodes.system_message` instance.
146 self.observers.append(observer)
148 def detach_observer(self, observer):
149 self.observers.remove(observer)
151 def notify_observers(self, message):
152 for observer in self.observers:
153 observer(message)
155 def system_message(self, level, message, *children, **kwargs):
157 Return a system_message object.
159 Raise an exception or generate a warning if appropriate.
161 attributes = kwargs.copy()
162 if 'base_node' in kwargs:
163 source, line = get_source_line(kwargs['base_node'])
164 del attributes['base_node']
165 if source is not None:
166 attributes.setdefault('source', source)
167 if line is not None:
168 attributes.setdefault('line', line)
169 attributes.setdefault('source', self.source)
170 msg = nodes.system_message(message, level=level,
171 type=self.levels[level],
172 *children, **attributes)
173 if self.stream and (level >= self.report_level
174 or self.debug_flag and level == self.DEBUG_LEVEL):
175 msgtext = msg.astext().encode(self.encoding, self.error_handler)
176 print >>self.stream, msgtext
177 if level >= self.halt_level:
178 raise SystemMessage(msg, level)
179 if level > self.DEBUG_LEVEL or self.debug_flag:
180 self.notify_observers(msg)
181 self.max_level = max(level, self.max_level)
182 return msg
184 def debug(self, *args, **kwargs):
186 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
187 effect on the processing. Level-0 system messages are handled
188 separately from the others.
190 if self.debug_flag:
191 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
193 def info(self, *args, **kwargs):
195 Level-1, "INFO": a minor issue that can be ignored. Typically there is
196 no effect on processing, and level-1 system messages are not reported.
198 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
200 def warning(self, *args, **kwargs):
202 Level-2, "WARNING": an issue that should be addressed. If ignored,
203 there may be unpredictable problems with the output.
205 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
207 def error(self, *args, **kwargs):
209 Level-3, "ERROR": an error that should be addressed. If ignored, the
210 output will contain errors.
212 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
214 def severe(self, *args, **kwargs):
216 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
217 the output will contain severe errors. Typically level-4 system
218 messages are turned into exceptions which halt processing.
220 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
223 class ExtensionOptionError(DataError): pass
224 class BadOptionError(ExtensionOptionError): pass
225 class BadOptionDataError(ExtensionOptionError): pass
226 class DuplicateOptionError(ExtensionOptionError): pass
229 def extract_extension_options(field_list, options_spec):
231 Return a dictionary mapping extension option names to converted values.
233 :Parameters:
234 - `field_list`: A flat field list without field arguments, where each
235 field body consists of a single paragraph only.
236 - `options_spec`: Dictionary mapping known option names to a
237 conversion function such as `int` or `float`.
239 :Exceptions:
240 - `KeyError` for unknown option names.
241 - `ValueError` for invalid option values (raised by the conversion
242 function).
243 - `TypeError` for invalid option value types (raised by conversion
244 function).
245 - `DuplicateOptionError` for duplicate options.
246 - `BadOptionError` for invalid fields.
247 - `BadOptionDataError` for invalid option data (missing name,
248 missing data, bad quotes, etc.).
250 option_list = extract_options(field_list)
251 option_dict = assemble_option_dict(option_list, options_spec)
252 return option_dict
254 def extract_options(field_list):
256 Return a list of option (name, value) pairs from field names & bodies.
258 :Parameter:
259 `field_list`: A flat field list, where each field name is a single
260 word and each field body consists of a single paragraph only.
262 :Exceptions:
263 - `BadOptionError` for invalid fields.
264 - `BadOptionDataError` for invalid option data (missing name,
265 missing data, bad quotes, etc.).
267 option_list = []
268 for field in field_list:
269 if len(field[0].astext().split()) != 1:
270 raise BadOptionError(
271 'extension option field name may not contain multiple words')
272 name = str(field[0].astext().lower())
273 body = field[1]
274 if len(body) == 0:
275 data = None
276 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
277 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
278 raise BadOptionDataError(
279 'extension option field body may contain\n'
280 'a single paragraph only (option "%s")' % name)
281 else:
282 data = body[0][0].astext()
283 option_list.append((name, data))
284 return option_list
286 def assemble_option_dict(option_list, options_spec):
288 Return a mapping of option names to values.
290 :Parameters:
291 - `option_list`: A list of (name, value) pairs (the output of
292 `extract_options()`).
293 - `options_spec`: Dictionary mapping known option names to a
294 conversion function such as `int` or `float`.
296 :Exceptions:
297 - `KeyError` for unknown option names.
298 - `DuplicateOptionError` for duplicate options.
299 - `ValueError` for invalid option values (raised by conversion
300 function).
301 - `TypeError` for invalid option value types (raised by conversion
302 function).
304 options = {}
305 for name, value in option_list:
306 convertor = options_spec[name] # raises KeyError if unknown
307 if convertor is None:
308 raise KeyError(name) # or if explicitly disabled
309 if name in options:
310 raise DuplicateOptionError('duplicate option "%s"' % name)
311 try:
312 options[name] = convertor(value)
313 except (ValueError, TypeError), detail:
314 raise detail.__class__('(option: "%s"; value: %r)\n%s'
315 % (name, value, ' '.join(detail.args)))
316 return options
319 class NameValueError(DataError): pass
322 def extract_name_value(line):
324 Return a list of (name, value) from a line of the form "name=value ...".
326 :Exception:
327 `NameValueError` for invalid input (missing name, missing data, bad
328 quotes, etc.).
330 attlist = []
331 while line:
332 equals = line.find('=')
333 if equals == -1:
334 raise NameValueError('missing "="')
335 attname = line[:equals].strip()
336 if equals == 0 or not attname:
337 raise NameValueError(
338 'missing attribute name before "="')
339 line = line[equals+1:].lstrip()
340 if not line:
341 raise NameValueError(
342 'missing value after "%s="' % attname)
343 if line[0] in '\'"':
344 endquote = line.find(line[0], 1)
345 if endquote == -1:
346 raise NameValueError(
347 'attribute "%s" missing end quote (%s)'
348 % (attname, line[0]))
349 if len(line) > endquote + 1 and line[endquote + 1].strip():
350 raise NameValueError(
351 'attribute "%s" end quote (%s) not followed by '
352 'whitespace' % (attname, line[0]))
353 data = line[1:endquote]
354 line = line[endquote+1:].lstrip()
355 else:
356 space = line.find(' ')
357 if space == -1:
358 data = line
359 line = ''
360 else:
361 data = line[:space]
362 line = line[space+1:].lstrip()
363 attlist.append((attname.lower(), data))
364 return attlist
366 def new_reporter(source_path, settings):
368 Return a new Reporter object.
370 :Parameters:
371 `source` : string
372 The path to or description of the source text of the document.
373 `settings` : optparse.Values object
374 Runtime settings.
376 reporter = Reporter(
377 source_path, settings.report_level, settings.halt_level,
378 stream=settings.warning_stream, debug=settings.debug,
379 encoding=settings.error_encoding,
380 error_handler=settings.error_encoding_error_handler)
381 return reporter
383 def new_document(source_path, settings=None):
385 Return a new empty document object.
387 :Parameters:
388 `source_path` : string
389 The path to or description of the source text of the document.
390 `settings` : optparse.Values object
391 Runtime settings. If none provided, a default set will be used.
393 from docutils import frontend
394 if settings is None:
395 settings = frontend.OptionParser().get_default_values()
396 reporter = new_reporter(source_path, settings)
397 document = nodes.document(settings, reporter, source=source_path)
398 document.note_source(source_path, -1)
399 return document
401 def clean_rcs_keywords(paragraph, keyword_substitutions):
402 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
403 textnode = paragraph[0]
404 for pattern, substitution in keyword_substitutions:
405 match = pattern.search(textnode)
406 if match:
407 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
408 return
410 def relative_path(source, target):
412 Build and return a path to `target`, relative to `source` (both files).
414 If there is no common prefix, return the absolute path to `target`.
416 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
417 target_parts = os.path.abspath(target).split(os.sep)
418 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
419 if source_parts[:2] != target_parts[:2]:
420 # Nothing in common between paths.
421 # Return absolute path, using '/' for URLs:
422 return '/'.join(target_parts)
423 source_parts.reverse()
424 target_parts.reverse()
425 while (source_parts and target_parts
426 and source_parts[-1] == target_parts[-1]):
427 # Remove path components in common:
428 source_parts.pop()
429 target_parts.pop()
430 target_parts.reverse()
431 parts = ['..'] * (len(source_parts) - 1) + target_parts
432 return '/'.join(parts)
434 def get_stylesheet_reference(settings, relative_to=None):
436 Retrieve a stylesheet reference from the settings object.
438 Deprecated. Use get_stylesheet_reference_list() instead to
439 enable specification of multiple stylesheets as a comma-separated
440 list.
442 if settings.stylesheet_path:
443 assert not settings.stylesheet, (
444 'stylesheet and stylesheet_path are mutually exclusive.')
445 if relative_to == None:
446 relative_to = settings._destination
447 return relative_path(relative_to, settings.stylesheet_path)
448 else:
449 return settings.stylesheet
451 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
453 # The original settings arguments are kept unchanged: you can test
454 # with e.g. ``if settings.stylesheet_path:``
456 # Differences to ``get_stylesheet_reference``:
457 # * return value is a list
458 # * no re-writing of the path (and therefore no optional argument)
459 # (if required, use ``utils.relative_path(source, target)``
460 # in the calling script)
461 def get_stylesheet_list(settings):
463 Retrieve list of stylesheet references from the settings object.
465 if settings.stylesheet_path:
466 assert not settings.stylesheet, (
467 'stylesheet and stylesheet_path are mutually exclusive.')
468 return settings.stylesheet_path.split(",")
469 elif settings.stylesheet:
470 return settings.stylesheet.split(",")
471 else:
472 return []
474 def get_trim_footnote_ref_space(settings):
476 Return whether or not to trim footnote space.
478 If trim_footnote_reference_space is not None, return it.
480 If trim_footnote_reference_space is None, return False unless the
481 footnote reference style is 'superscript'.
483 if settings.trim_footnote_reference_space is None:
484 return hasattr(settings, 'footnote_references') and \
485 settings.footnote_references == 'superscript'
486 else:
487 return settings.trim_footnote_reference_space
489 def get_source_line(node):
491 Return the "source" and "line" attributes from the `node` given or from
492 its closest ancestor.
494 while node:
495 if node.source or node.line:
496 return node.source, node.line
497 node = node.parent
498 return None, None
500 def escape2null(text):
501 """Return a string with escape-backslashes converted to nulls."""
502 parts = []
503 start = 0
504 while 1:
505 found = text.find('\\', start)
506 if found == -1:
507 parts.append(text[start:])
508 return ''.join(parts)
509 parts.append(text[start:found])
510 parts.append('\x00' + text[found+1:found+2])
511 start = found + 2 # skip character after escape
513 def unescape(text, restore_backslashes=0):
515 Return a string with nulls removed or restored to backslashes.
516 Backslash-escaped spaces are also removed.
518 if restore_backslashes:
519 return text.replace('\x00', '\\')
520 else:
521 for sep in ['\x00 ', '\x00\n', '\x00']:
522 text = ''.join(text.split(sep))
523 return text
525 east_asian_widths = {'W': 2, # Wide
526 'F': 2, # Full-width (wide)
527 'Na': 1, # Narrow
528 'H': 1, # Half-width (narrow)
529 'N': 1, # Neutral (not East Asian, treated as narrow)
530 'A': 1} # Ambiguous (s/b wide in East Asian context,
531 # narrow otherwise, but that doesn't work)
532 """Mapping of result codes from `unicodedata.east_asian_width()` to character
533 column widths."""
535 def east_asian_column_width(text):
536 if isinstance(text, unicode):
537 total = 0
538 for c in text:
539 total += east_asian_widths[unicodedata.east_asian_width(c)]
540 return total
541 else:
542 return len(text)
544 if hasattr(unicodedata, 'east_asian_width'):
545 column_width = east_asian_column_width
546 else:
547 column_width = len
549 def uniq(L):
550 r = []
551 for item in L:
552 if not item in r:
553 r.append(item)
554 return r
557 class DependencyList:
560 List of dependencies, with file recording support.
562 Note that the output file is not automatically closed. You have
563 to explicitly call the close() method.
566 def __init__(self, output_file=None, dependencies=[]):
568 Initialize the dependency list, automatically setting the
569 output file to `output_file` (see `set_output()`) and adding
570 all supplied dependencies.
572 self.set_output(output_file)
573 for i in dependencies:
574 self.add(i)
576 def set_output(self, output_file):
578 Set the output file and clear the list of already added
579 dependencies.
581 `output_file` must be a string. The specified file is
582 immediately overwritten.
584 If output_file is '-', the output will be written to stdout.
585 If it is None, no file output is done when calling add().
587 self.list = []
588 if output_file == '-':
589 self.file = sys.stdout
590 elif output_file:
591 self.file = open(output_file, 'w')
592 else:
593 self.file = None
595 def add(self, *filenames):
597 If the dependency `filename` has not already been added,
598 append it to self.list and print it to self.file if self.file
599 is not None.
601 for filename in filenames:
602 if not filename in self.list:
603 self.list.append(filename)
604 if self.file is not None:
605 print >>self.file, filename
607 def close(self):
609 Close the output file.
611 self.file.close()
612 self.file = None
614 def __repr__(self):
615 if self.file:
616 output_file = self.file.name
617 else:
618 output_file = None
619 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)