removed CVS reference
[docutils.git] / docutils / utils.py
bloba29b3754fbd0f02f8bddfd7671ab02fba8ae5701
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Miscellaneous utilities for the documentation utilities.
7 """
9 __docformat__ = 'reStructuredText'
11 import sys
12 import os
13 import os.path
14 import types
15 import warnings
16 import unicodedata
17 from types import StringType, UnicodeType
18 from docutils import ApplicationError, DataError
19 from docutils import frontend, nodes
22 class SystemMessage(ApplicationError):
24 def __init__(self, system_message, level):
25 Exception.__init__(self, system_message.astext())
26 self.level = level
29 class SystemMessagePropagation(ApplicationError): pass
32 class Reporter:
34 """
35 Info/warning/error reporter and ``system_message`` element generator.
37 Five levels of system messages are defined, along with corresponding
38 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
40 There is typically one Reporter object per process. A Reporter object is
41 instantiated with thresholds for reporting (generating warnings) and
42 halting processing (raising exceptions), a switch to turn debug output on
43 or off, and an I/O stream for warnings. These are stored as instance
44 attributes.
46 When a system message is generated, its level is compared to the stored
47 thresholds, and a warning or error is generated as appropriate. Debug
48 messages are produced iff the stored debug switch is on, independently of
49 other thresholds. Message output is sent to the stored warning stream if
50 not set to ''.
52 The Reporter class also employs a modified form of the "Observer" pattern
53 [GoF95]_ to track system messages generated. The `attach_observer` method
54 should be called before parsing, with a bound method or function which
55 accepts system messages. The observer can be removed with
56 `detach_observer`, and another added in its place.
58 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
59 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
60 1995.
61 """
63 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
64 """List of names for system message levels, indexed by level."""
66 def __init__(self, source, report_level, halt_level, stream=None,
67 debug=0, encoding='ascii', error_handler='replace'):
68 """
69 :Parameters:
70 - `source`: The path to or description of the source data.
71 - `report_level`: The level at or above which warning output will
72 be sent to `stream`.
73 - `halt_level`: The level at or above which `SystemMessage`
74 exceptions will be raised, halting execution.
75 - `debug`: Show debug (level=0) system messages?
76 - `stream`: Where warning output is sent. Can be file-like (has a
77 ``.write`` method), a string (file name, opened for writing),
78 '' (empty string, for discarding all stream messages) or
79 `None` (implies `sys.stderr`; default).
80 - `encoding`: The encoding for stderr output.
81 - `error_handler`: The error handler for stderr output encoding.
82 """
84 self.source = source
85 """The path to or description of the source data."""
87 self.encoding = encoding
88 """The character encoding for the stderr output."""
90 self.error_handler = error_handler
91 """The character encoding error handler."""
93 self.debug_flag = debug
94 """Show debug (level=0) system messages?"""
96 self.report_level = report_level
97 """The level at or above which warning output will be sent
98 to `self.stream`."""
100 self.halt_level = halt_level
101 """The level at or above which `SystemMessage` exceptions
102 will be raised, halting execution."""
104 if stream is None:
105 stream = sys.stderr
106 elif type(stream) in (StringType, UnicodeType):
107 # Leave stream untouched if it's ''.
108 if stream != '':
109 if type(stream) == StringType:
110 stream = open(stream, 'w')
111 elif type(stream) == UnicodeType:
112 stream = open(stream.encode(), 'w')
114 self.stream = stream
115 """Where warning output is sent."""
117 self.observers = []
118 """List of bound methods or functions to call with each system_message
119 created."""
121 self.max_level = -1
122 """The highest level system message generated so far."""
124 def set_conditions(self, category, report_level, halt_level,
125 stream=None, debug=0):
126 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
127 'set attributes via configuration settings or directly',
128 DeprecationWarning, stacklevel=2)
129 self.report_level = report_level
130 self.halt_level = halt_level
131 if stream is None:
132 stream = sys.stderr
133 self.stream = stream
134 self.debug_flag = debug
136 def attach_observer(self, observer):
138 The `observer` parameter is a function or bound method which takes one
139 argument, a `nodes.system_message` instance.
141 self.observers.append(observer)
143 def detach_observer(self, observer):
144 self.observers.remove(observer)
146 def notify_observers(self, message):
147 for observer in self.observers:
148 observer(message)
150 def system_message(self, level, message, *children, **kwargs):
152 Return a system_message object.
154 Raise an exception or generate a warning if appropriate.
156 attributes = kwargs.copy()
157 if kwargs.has_key('base_node'):
158 source, line = get_source_line(kwargs['base_node'])
159 del attributes['base_node']
160 if source is not None:
161 attributes.setdefault('source', source)
162 if line is not None:
163 attributes.setdefault('line', line)
164 attributes.setdefault('source', self.source)
165 msg = nodes.system_message(message, level=level,
166 type=self.levels[level],
167 *children, **attributes)
168 if self.stream and (level >= self.report_level
169 or self.debug_flag and level == 0):
170 msgtext = msg.astext().encode(self.encoding, self.error_handler)
171 print >>self.stream, msgtext
172 if level >= self.halt_level:
173 raise SystemMessage(msg, level)
174 if level > 0 or self.debug_flag:
175 self.notify_observers(msg)
176 self.max_level = max(level, self.max_level)
177 return msg
179 def debug(self, *args, **kwargs):
181 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
182 effect on the processing. Level-0 system messages are handled
183 separately from the others.
185 if self.debug_flag:
186 return self.system_message(0, *args, **kwargs)
188 def info(self, *args, **kwargs):
190 Level-1, "INFO": a minor issue that can be ignored. Typically there is
191 no effect on processing, and level-1 system messages are not reported.
193 return self.system_message(1, *args, **kwargs)
195 def warning(self, *args, **kwargs):
197 Level-2, "WARNING": an issue that should be addressed. If ignored,
198 there may be unpredictable problems with the output.
200 return self.system_message(2, *args, **kwargs)
202 def error(self, *args, **kwargs):
204 Level-3, "ERROR": an error that should be addressed. If ignored, the
205 output will contain errors.
207 return self.system_message(3, *args, **kwargs)
209 def severe(self, *args, **kwargs):
211 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
212 the output will contain severe errors. Typically level-4 system
213 messages are turned into exceptions which halt processing.
215 return self.system_message(4, *args, **kwargs)
218 class ExtensionOptionError(DataError): pass
219 class BadOptionError(ExtensionOptionError): pass
220 class BadOptionDataError(ExtensionOptionError): pass
221 class DuplicateOptionError(ExtensionOptionError): pass
224 def extract_extension_options(field_list, options_spec):
226 Return a dictionary mapping extension option names to converted values.
228 :Parameters:
229 - `field_list`: A flat field list without field arguments, where each
230 field body consists of a single paragraph only.
231 - `options_spec`: Dictionary mapping known option names to a
232 conversion function such as `int` or `float`.
234 :Exceptions:
235 - `KeyError` for unknown option names.
236 - `ValueError` for invalid option values (raised by the conversion
237 function).
238 - `TypeError` for invalid option value types (raised by conversion
239 function).
240 - `DuplicateOptionError` for duplicate options.
241 - `BadOptionError` for invalid fields.
242 - `BadOptionDataError` for invalid option data (missing name,
243 missing data, bad quotes, etc.).
245 option_list = extract_options(field_list)
246 option_dict = assemble_option_dict(option_list, options_spec)
247 return option_dict
249 def extract_options(field_list):
251 Return a list of option (name, value) pairs from field names & bodies.
253 :Parameter:
254 `field_list`: A flat field list, where each field name is a single
255 word and each field body consists of a single paragraph only.
257 :Exceptions:
258 - `BadOptionError` for invalid fields.
259 - `BadOptionDataError` for invalid option data (missing name,
260 missing data, bad quotes, etc.).
262 option_list = []
263 for field in field_list:
264 if len(field[0].astext().split()) != 1:
265 raise BadOptionError(
266 'extension option field name may not contain multiple words')
267 name = str(field[0].astext().lower())
268 body = field[1]
269 if len(body) == 0:
270 data = None
271 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
272 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
273 raise BadOptionDataError(
274 'extension option field body may contain\n'
275 'a single paragraph only (option "%s")' % name)
276 else:
277 data = body[0][0].astext()
278 option_list.append((name, data))
279 return option_list
281 def assemble_option_dict(option_list, options_spec):
283 Return a mapping of option names to values.
285 :Parameters:
286 - `option_list`: A list of (name, value) pairs (the output of
287 `extract_options()`).
288 - `options_spec`: Dictionary mapping known option names to a
289 conversion function such as `int` or `float`.
291 :Exceptions:
292 - `KeyError` for unknown option names.
293 - `DuplicateOptionError` for duplicate options.
294 - `ValueError` for invalid option values (raised by conversion
295 function).
296 - `TypeError` for invalid option value types (raised by conversion
297 function).
299 options = {}
300 for name, value in option_list:
301 convertor = options_spec[name] # raises KeyError if unknown
302 if convertor is None:
303 raise KeyError(name) # or if explicitly disabled
304 if options.has_key(name):
305 raise DuplicateOptionError('duplicate option "%s"' % name)
306 try:
307 options[name] = convertor(value)
308 except (ValueError, TypeError), detail:
309 raise detail.__class__('(option: "%s"; value: %r)\n%s'
310 % (name, value, ' '.join(detail.args)))
311 return options
314 class NameValueError(DataError): pass
317 def extract_name_value(line):
319 Return a list of (name, value) from a line of the form "name=value ...".
321 :Exception:
322 `NameValueError` for invalid input (missing name, missing data, bad
323 quotes, etc.).
325 attlist = []
326 while line:
327 equals = line.find('=')
328 if equals == -1:
329 raise NameValueError('missing "="')
330 attname = line[:equals].strip()
331 if equals == 0 or not attname:
332 raise NameValueError(
333 'missing attribute name before "="')
334 line = line[equals+1:].lstrip()
335 if not line:
336 raise NameValueError(
337 'missing value after "%s="' % attname)
338 if line[0] in '\'"':
339 endquote = line.find(line[0], 1)
340 if endquote == -1:
341 raise NameValueError(
342 'attribute "%s" missing end quote (%s)'
343 % (attname, line[0]))
344 if len(line) > endquote + 1 and line[endquote + 1].strip():
345 raise NameValueError(
346 'attribute "%s" end quote (%s) not followed by '
347 'whitespace' % (attname, line[0]))
348 data = line[1:endquote]
349 line = line[endquote+1:].lstrip()
350 else:
351 space = line.find(' ')
352 if space == -1:
353 data = line
354 line = ''
355 else:
356 data = line[:space]
357 line = line[space+1:].lstrip()
358 attlist.append((attname.lower(), data))
359 return attlist
361 def new_reporter(source_path, settings):
363 Return a new Reporter object.
365 :Parameters:
366 `source` : string
367 The path to or description of the source text of the document.
368 `settings` : optparse.Values object
369 Runtime settings.
371 reporter = Reporter(
372 source_path, settings.report_level, settings.halt_level,
373 stream=settings.warning_stream, debug=settings.debug,
374 encoding=settings.error_encoding,
375 error_handler=settings.error_encoding_error_handler)
376 return reporter
378 def new_document(source_path, settings=None):
380 Return a new empty document object.
382 :Parameters:
383 `source_path` : string
384 The path to or description of the source text of the document.
385 `settings` : optparse.Values object
386 Runtime settings. If none provided, a default set will be used.
388 if settings is None:
389 settings = frontend.OptionParser().get_default_values()
390 reporter = new_reporter(source_path, settings)
391 document = nodes.document(settings, reporter, source=source_path)
392 document.note_source(source_path, -1)
393 return document
395 def clean_rcs_keywords(paragraph, keyword_substitutions):
396 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
397 textnode = paragraph[0]
398 for pattern, substitution in keyword_substitutions:
399 match = pattern.search(textnode.data)
400 if match:
401 textnode.data = pattern.sub(substitution, textnode.data)
402 return
404 def relative_path(source, target):
406 Build and return a path to `target`, relative to `source` (both files).
408 If there is no common prefix, return the absolute path to `target`.
410 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
411 target_parts = os.path.abspath(target).split(os.sep)
412 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
413 if source_parts[:2] != target_parts[:2]:
414 # Nothing in common between paths.
415 # Return absolute path, using '/' for URLs:
416 return '/'.join(target_parts)
417 source_parts.reverse()
418 target_parts.reverse()
419 while (source_parts and target_parts
420 and source_parts[-1] == target_parts[-1]):
421 # Remove path components in common:
422 source_parts.pop()
423 target_parts.pop()
424 target_parts.reverse()
425 parts = ['..'] * (len(source_parts) - 1) + target_parts
426 return '/'.join(parts)
428 def get_stylesheet_reference(settings, relative_to=None):
430 Retrieve a stylesheet reference from the settings object.
432 if settings.stylesheet_path:
433 assert not settings.stylesheet, \
434 'stylesheet and stylesheet_path are mutually exclusive.'
435 if relative_to == None:
436 relative_to = settings._destination
437 return relative_path(relative_to, settings.stylesheet_path)
438 else:
439 return settings.stylesheet
441 def get_trim_footnote_ref_space(settings):
443 Return whether or not to trim footnote space.
445 If trim_footnote_reference_space is not None, return it.
447 If trim_footnote_reference_space is None, return False unless the
448 footnote reference style is 'superscript'.
450 if settings.trim_footnote_reference_space is None:
451 return hasattr(settings, 'footnote_references') and \
452 settings.footnote_references == 'superscript'
453 else:
454 return settings.trim_footnote_reference_space
456 def get_source_line(node):
458 Return the "source" and "line" attributes from the `node` given or from
459 its closest ancestor.
461 while node:
462 if node.source or node.line:
463 return node.source, node.line
464 node = node.parent
465 return None, None
467 def escape2null(text):
468 """Return a string with escape-backslashes converted to nulls."""
469 parts = []
470 start = 0
471 while 1:
472 found = text.find('\\', start)
473 if found == -1:
474 parts.append(text[start:])
475 return ''.join(parts)
476 parts.append(text[start:found])
477 parts.append('\x00' + text[found+1:found+2])
478 start = found + 2 # skip character after escape
480 def unescape(text, restore_backslashes=0):
482 Return a string with nulls removed or restored to backslashes.
483 Backslash-escaped spaces are also removed.
485 if restore_backslashes:
486 return text.replace('\x00', '\\')
487 else:
488 for sep in ['\x00 ', '\x00\n', '\x00']:
489 text = ''.join(text.split(sep))
490 return text
492 east_asian_widths = {'W': 2, # Wide
493 'F': 2, # Full-width (wide)
494 'Na': 1, # Narrow
495 'H': 1, # Half-width (narrow)
496 'N': 1, # Neutral (not East Asian, treated as narrow)
497 'A': 1} # Ambiguous (s/b wide in East Asian context,
498 # narrow otherwise, but that doesn't work)
499 """Mapping of result codes from `unicodedata.east_asian_width()` to character
500 column widths."""
502 def east_asian_column_width(text):
503 if isinstance(text, types.UnicodeType):
504 total = 0
505 for c in text:
506 total += east_asian_widths[unicodedata.east_asian_width(c)]
507 return total
508 else:
509 return len(text)
511 if hasattr(unicodedata, 'east_asian_width'):
512 column_width = east_asian_column_width
513 else:
514 column_width = len
517 class DependencyList:
520 List of dependencies, with file recording support.
522 Note that the output file is not automatically closed. You have
523 to explicitly call the close() method.
526 def __init__(self, output_file=None, dependencies=[]):
528 Initialize the dependency list, automatically setting the
529 output file to `output_file` (see `set_output()`) and adding
530 all supplied dependencies.
532 self.set_output(output_file)
533 for i in dependencies:
534 self.add(i)
536 def set_output(self, output_file):
538 Set the output file and clear the list of already added
539 dependencies.
541 `output_file` must be a string. The specified file is
542 immediately overwritten.
544 If output_file is '-', the output will be written to stdout.
545 If it is None, no file output is done when calling add().
547 self.list = []
548 if output_file == '-':
549 self.file = sys.stdout
550 elif output_file:
551 self.file = open(output_file, 'w')
552 else:
553 self.file = None
555 def add(self, filename):
557 If the dependency `filename` has not already been added,
558 append it to self.list and print it to self.file if self.file
559 is not None.
561 if not filename in self.list:
562 self.list.append(filename)
563 if self.file is not None:
564 print >>self.file, filename
566 def close(self):
568 Close the output file.
570 self.file.close()
571 self.file = None
573 def __repr__(self):
574 if self.file:
575 output_file = self.file.name
576 else:
577 output_file = None
578 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)