Add custom role with two classes.
[docutils.git] / docutils / utils.py
blob24c4986f085a254305fbc535fdf6f03aa748fcd6
1 # Author: David Goodger
2 # Contact: goodger@users.sourceforge.net
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 Miscellaneous utilities for the documentation utilities.
9 """
11 __docformat__ = 'reStructuredText'
13 import sys
14 import os
15 import os.path
16 import types
17 import warnings
18 import unicodedata
19 from types import StringType, UnicodeType
20 from docutils import ApplicationError, DataError
21 from docutils import frontend, nodes
24 class SystemMessage(ApplicationError):
26 def __init__(self, system_message, level):
27 Exception.__init__(self, system_message.astext())
28 self.level = level
31 class SystemMessagePropagation(ApplicationError): pass
34 class Reporter:
36 """
37 Info/warning/error reporter and ``system_message`` element generator.
39 Five levels of system messages are defined, along with corresponding
40 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
42 There is typically one Reporter object per process. A Reporter object is
43 instantiated with thresholds for reporting (generating warnings) and
44 halting processing (raising exceptions), a switch to turn debug output on
45 or off, and an I/O stream for warnings. These are stored as instance
46 attributes.
48 When a system message is generated, its level is compared to the stored
49 thresholds, and a warning or error is generated as appropriate. Debug
50 messages are produced iff the stored debug switch is on, independently of
51 other thresholds. Message output is sent to the stored warning stream if
52 not set to ''.
54 The Reporter class also employs a modified form of the "Observer" pattern
55 [GoF95]_ to track system messages generated. The `attach_observer` method
56 should be called before parsing, with a bound method or function which
57 accepts system messages. The observer can be removed with
58 `detach_observer`, and another added in its place.
60 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
61 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
62 1995.
63 """
65 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
66 """List of names for system message levels, indexed by level."""
68 def __init__(self, source, report_level, halt_level, stream=None,
69 debug=0, encoding='ascii', error_handler='replace'):
70 """
71 :Parameters:
72 - `source`: The path to or description of the source data.
73 - `report_level`: The level at or above which warning output will
74 be sent to `stream`.
75 - `halt_level`: The level at or above which `SystemMessage`
76 exceptions will be raised, halting execution.
77 - `debug`: Show debug (level=0) system messages?
78 - `stream`: Where warning output is sent. Can be file-like (has a
79 ``.write`` method), a string (file name, opened for writing),
80 '' (empty string, for discarding all stream messages) or
81 `None` (implies `sys.stderr`; default).
82 - `encoding`: The encoding for stderr output.
83 - `error_handler`: The error handler for stderr output encoding.
84 """
86 self.source = source
87 """The path to or description of the source data."""
89 self.encoding = encoding
90 """The character encoding for the stderr output."""
92 self.error_handler = error_handler
93 """The character encoding error handler."""
95 self.debug_flag = debug
96 """Show debug (level=0) system messages?"""
98 self.report_level = report_level
99 """The level at or above which warning output will be sent
100 to `self.stream`."""
102 self.halt_level = halt_level
103 """The level at or above which `SystemMessage` exceptions
104 will be raised, halting execution."""
106 if stream is None:
107 stream = sys.stderr
108 elif type(stream) in (StringType, UnicodeType):
109 # Leave stream untouched if it's ''.
110 if stream != '':
111 if type(stream) == StringType:
112 stream = open(stream, 'w')
113 elif type(stream) == UnicodeType:
114 stream = open(stream.encode(), 'w')
116 self.stream = stream
117 """Where warning output is sent."""
119 self.observers = []
120 """List of bound methods or functions to call with each system_message
121 created."""
123 self.max_level = -1
124 """The highest level system message generated so far."""
126 def set_conditions(self, category, report_level, halt_level,
127 stream=None, debug=0):
128 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
129 'set attributes via configuration settings or directly',
130 DeprecationWarning, stacklevel=2)
131 self.report_level = report_level
132 self.halt_level = halt_level
133 if stream is None:
134 stream = sys.stderr
135 self.stream = stream
136 self.debug_flag = debug
138 def attach_observer(self, observer):
140 The `observer` parameter is a function or bound method which takes one
141 argument, a `nodes.system_message` instance.
143 self.observers.append(observer)
145 def detach_observer(self, observer):
146 self.observers.remove(observer)
148 def notify_observers(self, message):
149 for observer in self.observers:
150 observer(message)
152 def system_message(self, level, message, *children, **kwargs):
154 Return a system_message object.
156 Raise an exception or generate a warning if appropriate.
158 attributes = kwargs.copy()
159 if kwargs.has_key('base_node'):
160 source, line = get_source_line(kwargs['base_node'])
161 del attributes['base_node']
162 if source is not None:
163 attributes.setdefault('source', source)
164 if line is not None:
165 attributes.setdefault('line', line)
166 attributes.setdefault('source', self.source)
167 msg = nodes.system_message(message, level=level,
168 type=self.levels[level],
169 *children, **attributes)
170 if self.stream and (level >= self.report_level
171 or self.debug_flag and level == 0):
172 msgtext = msg.astext().encode(self.encoding, self.error_handler)
173 print >>self.stream, msgtext
174 if level >= self.halt_level:
175 raise SystemMessage(msg, level)
176 if level > 0 or self.debug_flag:
177 self.notify_observers(msg)
178 self.max_level = max(level, self.max_level)
179 return msg
181 def debug(self, *args, **kwargs):
183 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
184 effect on the processing. Level-0 system messages are handled
185 separately from the others.
187 if self.debug_flag:
188 return self.system_message(0, *args, **kwargs)
190 def info(self, *args, **kwargs):
192 Level-1, "INFO": a minor issue that can be ignored. Typically there is
193 no effect on processing, and level-1 system messages are not reported.
195 return self.system_message(1, *args, **kwargs)
197 def warning(self, *args, **kwargs):
199 Level-2, "WARNING": an issue that should be addressed. If ignored,
200 there may be unpredictable problems with the output.
202 return self.system_message(2, *args, **kwargs)
204 def error(self, *args, **kwargs):
206 Level-3, "ERROR": an error that should be addressed. If ignored, the
207 output will contain errors.
209 return self.system_message(3, *args, **kwargs)
211 def severe(self, *args, **kwargs):
213 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
214 the output will contain severe errors. Typically level-4 system
215 messages are turned into exceptions which halt processing.
217 return self.system_message(4, *args, **kwargs)
220 class ExtensionOptionError(DataError): pass
221 class BadOptionError(ExtensionOptionError): pass
222 class BadOptionDataError(ExtensionOptionError): pass
223 class DuplicateOptionError(ExtensionOptionError): pass
226 def extract_extension_options(field_list, options_spec):
228 Return a dictionary mapping extension option names to converted values.
230 :Parameters:
231 - `field_list`: A flat field list without field arguments, where each
232 field body consists of a single paragraph only.
233 - `options_spec`: Dictionary mapping known option names to a
234 conversion function such as `int` or `float`.
236 :Exceptions:
237 - `KeyError` for unknown option names.
238 - `ValueError` for invalid option values (raised by the conversion
239 function).
240 - `TypeError` for invalid option value types (raised by conversion
241 function).
242 - `DuplicateOptionError` for duplicate options.
243 - `BadOptionError` for invalid fields.
244 - `BadOptionDataError` for invalid option data (missing name,
245 missing data, bad quotes, etc.).
247 option_list = extract_options(field_list)
248 option_dict = assemble_option_dict(option_list, options_spec)
249 return option_dict
251 def extract_options(field_list):
253 Return a list of option (name, value) pairs from field names & bodies.
255 :Parameter:
256 `field_list`: A flat field list, where each field name is a single
257 word and each field body consists of a single paragraph only.
259 :Exceptions:
260 - `BadOptionError` for invalid fields.
261 - `BadOptionDataError` for invalid option data (missing name,
262 missing data, bad quotes, etc.).
264 option_list = []
265 for field in field_list:
266 if len(field[0].astext().split()) != 1:
267 raise BadOptionError(
268 'extension option field name may not contain multiple words')
269 name = str(field[0].astext().lower())
270 body = field[1]
271 if len(body) == 0:
272 data = None
273 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
274 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
275 raise BadOptionDataError(
276 'extension option field body may contain\n'
277 'a single paragraph only (option "%s")' % name)
278 else:
279 data = body[0][0].astext()
280 option_list.append((name, data))
281 return option_list
283 def assemble_option_dict(option_list, options_spec):
285 Return a mapping of option names to values.
287 :Parameters:
288 - `option_list`: A list of (name, value) pairs (the output of
289 `extract_options()`).
290 - `options_spec`: Dictionary mapping known option names to a
291 conversion function such as `int` or `float`.
293 :Exceptions:
294 - `KeyError` for unknown option names.
295 - `DuplicateOptionError` for duplicate options.
296 - `ValueError` for invalid option values (raised by conversion
297 function).
298 - `TypeError` for invalid option value types (raised by conversion
299 function).
301 options = {}
302 for name, value in option_list:
303 convertor = options_spec[name] # raises KeyError if unknown
304 if convertor is None:
305 raise KeyError(name) # or if explicitly disabled
306 if options.has_key(name):
307 raise DuplicateOptionError('duplicate option "%s"' % name)
308 try:
309 options[name] = convertor(value)
310 except (ValueError, TypeError), detail:
311 raise detail.__class__('(option: "%s"; value: %r)\n%s'
312 % (name, value, ' '.join(detail.args)))
313 return options
316 class NameValueError(DataError): pass
319 def extract_name_value(line):
321 Return a list of (name, value) from a line of the form "name=value ...".
323 :Exception:
324 `NameValueError` for invalid input (missing name, missing data, bad
325 quotes, etc.).
327 attlist = []
328 while line:
329 equals = line.find('=')
330 if equals == -1:
331 raise NameValueError('missing "="')
332 attname = line[:equals].strip()
333 if equals == 0 or not attname:
334 raise NameValueError(
335 'missing attribute name before "="')
336 line = line[equals+1:].lstrip()
337 if not line:
338 raise NameValueError(
339 'missing value after "%s="' % attname)
340 if line[0] in '\'"':
341 endquote = line.find(line[0], 1)
342 if endquote == -1:
343 raise NameValueError(
344 'attribute "%s" missing end quote (%s)'
345 % (attname, line[0]))
346 if len(line) > endquote + 1 and line[endquote + 1].strip():
347 raise NameValueError(
348 'attribute "%s" end quote (%s) not followed by '
349 'whitespace' % (attname, line[0]))
350 data = line[1:endquote]
351 line = line[endquote+1:].lstrip()
352 else:
353 space = line.find(' ')
354 if space == -1:
355 data = line
356 line = ''
357 else:
358 data = line[:space]
359 line = line[space+1:].lstrip()
360 attlist.append((attname.lower(), data))
361 return attlist
363 def new_reporter(source_path, settings):
365 Return a new Reporter object.
367 :Parameters:
368 `source` : string
369 The path to or description of the source text of the document.
370 `settings` : optparse.Values object
371 Runtime settings.
373 reporter = Reporter(
374 source_path, settings.report_level, settings.halt_level,
375 stream=settings.warning_stream, debug=settings.debug,
376 encoding=settings.error_encoding,
377 error_handler=settings.error_encoding_error_handler)
378 return reporter
380 def new_document(source_path, settings=None):
382 Return a new empty document object.
384 :Parameters:
385 `source` : string
386 The path to or description of the source text of the document.
387 `settings` : optparse.Values object
388 Runtime settings. If none provided, a default set will be used.
390 if settings is None:
391 settings = frontend.OptionParser().get_default_values()
392 reporter = new_reporter(source_path, settings)
393 document = nodes.document(settings, reporter, source=source_path)
394 document.note_source(source_path, -1)
395 return document
397 def clean_rcs_keywords(paragraph, keyword_substitutions):
398 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
399 textnode = paragraph[0]
400 for pattern, substitution in keyword_substitutions:
401 match = pattern.search(textnode.data)
402 if match:
403 textnode.data = pattern.sub(substitution, textnode.data)
404 return
406 def relative_path(source, target):
408 Build and return a path to `target`, relative to `source` (both files).
410 If there is no common prefix, return the absolute path to `target`.
412 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
413 target_parts = os.path.abspath(target).split(os.sep)
414 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
415 if source_parts[:2] != target_parts[:2]:
416 # Nothing in common between paths.
417 # Return absolute path, using '/' for URLs:
418 return '/'.join(target_parts)
419 source_parts.reverse()
420 target_parts.reverse()
421 while (source_parts and target_parts
422 and source_parts[-1] == target_parts[-1]):
423 # Remove path components in common:
424 source_parts.pop()
425 target_parts.pop()
426 target_parts.reverse()
427 parts = ['..'] * (len(source_parts) - 1) + target_parts
428 return '/'.join(parts)
430 def get_stylesheet_reference(settings, relative_to=None):
432 Retrieve a stylesheet reference from the settings object.
434 if settings.stylesheet_path:
435 assert not settings.stylesheet, \
436 'stylesheet and stylesheet_path are mutually exclusive.'
437 if relative_to == None:
438 relative_to = settings._destination
439 return relative_path(relative_to, settings.stylesheet_path)
440 else:
441 return settings.stylesheet
443 def get_trim_footnote_ref_space(settings):
445 Return whether or not to trim footnote space.
447 If trim_footnote_reference_space is not None, return it.
449 If trim_footnote_reference_space is None, return False unless the
450 footnote reference style is 'superscript'.
452 if settings.trim_footnote_reference_space is None:
453 return hasattr(settings, 'footnote_references') and \
454 settings.footnote_references == 'superscript'
455 else:
456 return settings.trim_footnote_reference_space
458 def get_source_line(node):
460 Return the "source" and "line" attributes from the `node` given or from
461 its closest ancestor.
463 while node:
464 if node.source or node.line:
465 return node.source, node.line
466 node = node.parent
467 return None, None
469 def escape2null(text):
470 """Return a string with escape-backslashes converted to nulls."""
471 parts = []
472 start = 0
473 while 1:
474 found = text.find('\\', start)
475 if found == -1:
476 parts.append(text[start:])
477 return ''.join(parts)
478 parts.append(text[start:found])
479 parts.append('\x00' + text[found+1:found+2])
480 start = found + 2 # skip character after escape
482 def unescape(text, restore_backslashes=0):
484 Return a string with nulls removed or restored to backslashes.
485 Backslash-escaped spaces are also removed.
487 if restore_backslashes:
488 return text.replace('\x00', '\\')
489 else:
490 for sep in ['\x00 ', '\x00\n', '\x00']:
491 text = ''.join(text.split(sep))
492 return text
494 east_asian_widths = {'W': 2, # Wide
495 'F': 2, # Full-width (wide)
496 'Na': 1, # Narrow
497 'H': 1, # Half-width (narrow)
498 'N': 1, # Neutral (not East Asian, treated as narrow)
499 'A': 1} # Ambiguous (s/b wide in East Asian context,
500 # narrow otherwise, but that doesn't work)
501 """Mapping of result codes from `unicodedata.east_asian_width()` to character
502 column widths."""
504 def east_asian_column_width(text):
505 if isinstance(text, types.UnicodeType):
506 total = 0
507 for c in text:
508 total += east_asian_widths[unicodedata.east_asian_width(c)]
509 return total
510 else:
511 return len(text)
513 if hasattr(unicodedata, 'east_asian_width'):
514 column_width = east_asian_column_width
515 else:
516 column_width = len
519 class DependencyList:
522 List of dependencies, with file recording support.
524 Note that the output file is not automatically closed. You have
525 to explicitly call the close() method.
528 def __init__(self, output_file=None, dependencies=[]):
530 Initialize the dependency list, automatically setting the
531 output file to `output_file` (see `set_output()`) and adding
532 all supplied dependencies.
534 self.set_output(output_file)
535 for i in dependencies:
536 self.add(i)
538 def set_output(self, output_file):
540 Set the output file and clear the list of already added
541 dependencies.
543 `output_file` must be a string. The specified file is
544 immediately overwritten.
546 If output_file is '-', the output will be written to stdout.
547 If it is None, no file output is done when calling add().
549 self.list = []
550 if output_file == '-':
551 self.file = sys.stdout
552 elif output_file:
553 self.file = open(output_file, 'w')
554 else:
555 self.file = None
557 def add(self, filename):
559 If the dependency `filename` has not already been added,
560 append it to self.list and print it to self.file if self.file
561 is not None.
563 if not filename in self.list:
564 self.list.append(filename)
565 if self.file is not None:
566 print >>self.file, filename
568 def close(self):
570 Close the output file.
572 self.file.close()
573 self.file = None
575 def __repr__(self):
576 if self.file:
577 output_file = self.file.name
578 else:
579 output_file = None
580 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)