latex2e writer : Move usepackage hyperref after stylesheet inclusion.
[docutils.git] / docutils / utils.py
blob456f3eed9e80c45d278ddeab5fcaf045a3bf69cf
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Miscellaneous utilities for the documentation utilities.
7 """
9 __docformat__ = 'reStructuredText'
11 import sys
12 import os
13 import os.path
14 import types
15 import warnings
16 import unicodedata
17 from types import StringType, UnicodeType
18 from docutils import ApplicationError, DataError
19 from docutils import nodes
22 class SystemMessage(ApplicationError):
24 def __init__(self, system_message, level):
25 Exception.__init__(self, system_message.astext())
26 self.level = level
29 class SystemMessagePropagation(ApplicationError): pass
32 class Reporter:
34 """
35 Info/warning/error reporter and ``system_message`` element generator.
37 Five levels of system messages are defined, along with corresponding
38 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
40 There is typically one Reporter object per process. A Reporter object is
41 instantiated with thresholds for reporting (generating warnings) and
42 halting processing (raising exceptions), a switch to turn debug output on
43 or off, and an I/O stream for warnings. These are stored as instance
44 attributes.
46 When a system message is generated, its level is compared to the stored
47 thresholds, and a warning or error is generated as appropriate. Debug
48 messages are produced iff the stored debug switch is on, independently of
49 other thresholds. Message output is sent to the stored warning stream if
50 not set to ''.
52 The Reporter class also employs a modified form of the "Observer" pattern
53 [GoF95]_ to track system messages generated. The `attach_observer` method
54 should be called before parsing, with a bound method or function which
55 accepts system messages. The observer can be removed with
56 `detach_observer`, and another added in its place.
58 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
59 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
60 1995.
61 """
63 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
64 """List of names for system message levels, indexed by level."""
66 # system message level constants:
67 (DEBUG_LEVEL,
68 INFO_LEVEL,
69 WARNING_LEVEL,
70 ERROR_LEVEL,
71 SEVERE_LEVEL) = range(5)
73 def __init__(self, source, report_level, halt_level, stream=None,
74 debug=0, encoding='ascii', error_handler='replace'):
75 """
76 :Parameters:
77 - `source`: The path to or description of the source data.
78 - `report_level`: The level at or above which warning output will
79 be sent to `stream`.
80 - `halt_level`: The level at or above which `SystemMessage`
81 exceptions will be raised, halting execution.
82 - `debug`: Show debug (level=0) system messages?
83 - `stream`: Where warning output is sent. Can be file-like (has a
84 ``.write`` method), a string (file name, opened for writing),
85 '' (empty string, for discarding all stream messages) or
86 `None` (implies `sys.stderr`; default).
87 - `encoding`: The encoding for stderr output.
88 - `error_handler`: The error handler for stderr output encoding.
89 """
91 self.source = source
92 """The path to or description of the source data."""
94 self.encoding = encoding
95 """The character encoding for the stderr output."""
97 self.error_handler = error_handler
98 """The character encoding error handler."""
100 self.debug_flag = debug
101 """Show debug (level=0) system messages?"""
103 self.report_level = report_level
104 """The level at or above which warning output will be sent
105 to `self.stream`."""
107 self.halt_level = halt_level
108 """The level at or above which `SystemMessage` exceptions
109 will be raised, halting execution."""
111 if stream is None:
112 stream = sys.stderr
113 elif type(stream) in (StringType, UnicodeType):
114 # Leave stream untouched if it's ''.
115 if stream != '':
116 if type(stream) == StringType:
117 stream = open(stream, 'w')
118 elif type(stream) == UnicodeType:
119 stream = open(stream.encode(), 'w')
121 self.stream = stream
122 """Where warning output is sent."""
124 self.observers = []
125 """List of bound methods or functions to call with each system_message
126 created."""
128 self.max_level = -1
129 """The highest level system message generated so far."""
131 def set_conditions(self, category, report_level, halt_level,
132 stream=None, debug=0):
133 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
134 'set attributes via configuration settings or directly',
135 DeprecationWarning, stacklevel=2)
136 self.report_level = report_level
137 self.halt_level = halt_level
138 if stream is None:
139 stream = sys.stderr
140 self.stream = stream
141 self.debug_flag = debug
143 def attach_observer(self, observer):
145 The `observer` parameter is a function or bound method which takes one
146 argument, a `nodes.system_message` instance.
148 self.observers.append(observer)
150 def detach_observer(self, observer):
151 self.observers.remove(observer)
153 def notify_observers(self, message):
154 for observer in self.observers:
155 observer(message)
157 def system_message(self, level, message, *children, **kwargs):
159 Return a system_message object.
161 Raise an exception or generate a warning if appropriate.
163 attributes = kwargs.copy()
164 if kwargs.has_key('base_node'):
165 source, line = get_source_line(kwargs['base_node'])
166 del attributes['base_node']
167 if source is not None:
168 attributes.setdefault('source', source)
169 if line is not None:
170 attributes.setdefault('line', line)
171 attributes.setdefault('source', self.source)
172 msg = nodes.system_message(message, level=level,
173 type=self.levels[level],
174 *children, **attributes)
175 if self.stream and (level >= self.report_level
176 or self.debug_flag and level == self.DEBUG_LEVEL):
177 msgtext = msg.astext().encode(self.encoding, self.error_handler)
178 print >>self.stream, msgtext
179 if level >= self.halt_level:
180 raise SystemMessage(msg, level)
181 if level > self.DEBUG_LEVEL or self.debug_flag:
182 self.notify_observers(msg)
183 self.max_level = max(level, self.max_level)
184 return msg
186 def debug(self, *args, **kwargs):
188 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
189 effect on the processing. Level-0 system messages are handled
190 separately from the others.
192 if self.debug_flag:
193 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
195 def info(self, *args, **kwargs):
197 Level-1, "INFO": a minor issue that can be ignored. Typically there is
198 no effect on processing, and level-1 system messages are not reported.
200 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
202 def warning(self, *args, **kwargs):
204 Level-2, "WARNING": an issue that should be addressed. If ignored,
205 there may be unpredictable problems with the output.
207 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
209 def error(self, *args, **kwargs):
211 Level-3, "ERROR": an error that should be addressed. If ignored, the
212 output will contain errors.
214 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
216 def severe(self, *args, **kwargs):
218 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
219 the output will contain severe errors. Typically level-4 system
220 messages are turned into exceptions which halt processing.
222 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
225 class ExtensionOptionError(DataError): pass
226 class BadOptionError(ExtensionOptionError): pass
227 class BadOptionDataError(ExtensionOptionError): pass
228 class DuplicateOptionError(ExtensionOptionError): pass
231 def extract_extension_options(field_list, options_spec):
233 Return a dictionary mapping extension option names to converted values.
235 :Parameters:
236 - `field_list`: A flat field list without field arguments, where each
237 field body consists of a single paragraph only.
238 - `options_spec`: Dictionary mapping known option names to a
239 conversion function such as `int` or `float`.
241 :Exceptions:
242 - `KeyError` for unknown option names.
243 - `ValueError` for invalid option values (raised by the conversion
244 function).
245 - `TypeError` for invalid option value types (raised by conversion
246 function).
247 - `DuplicateOptionError` for duplicate options.
248 - `BadOptionError` for invalid fields.
249 - `BadOptionDataError` for invalid option data (missing name,
250 missing data, bad quotes, etc.).
252 option_list = extract_options(field_list)
253 option_dict = assemble_option_dict(option_list, options_spec)
254 return option_dict
256 def extract_options(field_list):
258 Return a list of option (name, value) pairs from field names & bodies.
260 :Parameter:
261 `field_list`: A flat field list, where each field name is a single
262 word and each field body consists of a single paragraph only.
264 :Exceptions:
265 - `BadOptionError` for invalid fields.
266 - `BadOptionDataError` for invalid option data (missing name,
267 missing data, bad quotes, etc.).
269 option_list = []
270 for field in field_list:
271 if len(field[0].astext().split()) != 1:
272 raise BadOptionError(
273 'extension option field name may not contain multiple words')
274 name = str(field[0].astext().lower())
275 body = field[1]
276 if len(body) == 0:
277 data = None
278 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
279 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
280 raise BadOptionDataError(
281 'extension option field body may contain\n'
282 'a single paragraph only (option "%s")' % name)
283 else:
284 data = body[0][0].astext()
285 option_list.append((name, data))
286 return option_list
288 def assemble_option_dict(option_list, options_spec):
290 Return a mapping of option names to values.
292 :Parameters:
293 - `option_list`: A list of (name, value) pairs (the output of
294 `extract_options()`).
295 - `options_spec`: Dictionary mapping known option names to a
296 conversion function such as `int` or `float`.
298 :Exceptions:
299 - `KeyError` for unknown option names.
300 - `DuplicateOptionError` for duplicate options.
301 - `ValueError` for invalid option values (raised by conversion
302 function).
303 - `TypeError` for invalid option value types (raised by conversion
304 function).
306 options = {}
307 for name, value in option_list:
308 convertor = options_spec[name] # raises KeyError if unknown
309 if convertor is None:
310 raise KeyError(name) # or if explicitly disabled
311 if options.has_key(name):
312 raise DuplicateOptionError('duplicate option "%s"' % name)
313 try:
314 options[name] = convertor(value)
315 except (ValueError, TypeError), detail:
316 raise detail.__class__('(option: "%s"; value: %r)\n%s'
317 % (name, value, ' '.join(detail.args)))
318 return options
321 class NameValueError(DataError): pass
324 def extract_name_value(line):
326 Return a list of (name, value) from a line of the form "name=value ...".
328 :Exception:
329 `NameValueError` for invalid input (missing name, missing data, bad
330 quotes, etc.).
332 attlist = []
333 while line:
334 equals = line.find('=')
335 if equals == -1:
336 raise NameValueError('missing "="')
337 attname = line[:equals].strip()
338 if equals == 0 or not attname:
339 raise NameValueError(
340 'missing attribute name before "="')
341 line = line[equals+1:].lstrip()
342 if not line:
343 raise NameValueError(
344 'missing value after "%s="' % attname)
345 if line[0] in '\'"':
346 endquote = line.find(line[0], 1)
347 if endquote == -1:
348 raise NameValueError(
349 'attribute "%s" missing end quote (%s)'
350 % (attname, line[0]))
351 if len(line) > endquote + 1 and line[endquote + 1].strip():
352 raise NameValueError(
353 'attribute "%s" end quote (%s) not followed by '
354 'whitespace' % (attname, line[0]))
355 data = line[1:endquote]
356 line = line[endquote+1:].lstrip()
357 else:
358 space = line.find(' ')
359 if space == -1:
360 data = line
361 line = ''
362 else:
363 data = line[:space]
364 line = line[space+1:].lstrip()
365 attlist.append((attname.lower(), data))
366 return attlist
368 def new_reporter(source_path, settings):
370 Return a new Reporter object.
372 :Parameters:
373 `source` : string
374 The path to or description of the source text of the document.
375 `settings` : optparse.Values object
376 Runtime settings.
378 reporter = Reporter(
379 source_path, settings.report_level, settings.halt_level,
380 stream=settings.warning_stream, debug=settings.debug,
381 encoding=settings.error_encoding,
382 error_handler=settings.error_encoding_error_handler)
383 return reporter
385 def new_document(source_path, settings=None):
387 Return a new empty document object.
389 :Parameters:
390 `source_path` : string
391 The path to or description of the source text of the document.
392 `settings` : optparse.Values object
393 Runtime settings. If none provided, a default set will be used.
395 from docutils import frontend
396 if settings is None:
397 settings = frontend.OptionParser().get_default_values()
398 reporter = new_reporter(source_path, settings)
399 document = nodes.document(settings, reporter, source=source_path)
400 document.note_source(source_path, -1)
401 return document
403 def clean_rcs_keywords(paragraph, keyword_substitutions):
404 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
405 textnode = paragraph[0]
406 for pattern, substitution in keyword_substitutions:
407 match = pattern.search(textnode.data)
408 if match:
409 textnode.data = pattern.sub(substitution, textnode.data)
410 return
412 def relative_path(source, target):
414 Build and return a path to `target`, relative to `source` (both files).
416 If there is no common prefix, return the absolute path to `target`.
418 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
419 target_parts = os.path.abspath(target).split(os.sep)
420 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
421 if source_parts[:2] != target_parts[:2]:
422 # Nothing in common between paths.
423 # Return absolute path, using '/' for URLs:
424 return '/'.join(target_parts)
425 source_parts.reverse()
426 target_parts.reverse()
427 while (source_parts and target_parts
428 and source_parts[-1] == target_parts[-1]):
429 # Remove path components in common:
430 source_parts.pop()
431 target_parts.pop()
432 target_parts.reverse()
433 parts = ['..'] * (len(source_parts) - 1) + target_parts
434 return '/'.join(parts)
436 def get_stylesheet_reference(settings, relative_to=None):
438 Retrieve a stylesheet reference from the settings object.
440 if settings.stylesheet_path:
441 assert not settings.stylesheet, \
442 'stylesheet and stylesheet_path are mutually exclusive.'
443 if relative_to == None:
444 relative_to = settings._destination
445 return relative_path(relative_to, settings.stylesheet_path)
446 else:
447 return settings.stylesheet
449 def get_trim_footnote_ref_space(settings):
451 Return whether or not to trim footnote space.
453 If trim_footnote_reference_space is not None, return it.
455 If trim_footnote_reference_space is None, return False unless the
456 footnote reference style is 'superscript'.
458 if settings.trim_footnote_reference_space is None:
459 return hasattr(settings, 'footnote_references') and \
460 settings.footnote_references == 'superscript'
461 else:
462 return settings.trim_footnote_reference_space
464 def get_source_line(node):
466 Return the "source" and "line" attributes from the `node` given or from
467 its closest ancestor.
469 while node:
470 if node.source or node.line:
471 return node.source, node.line
472 node = node.parent
473 return None, None
475 def escape2null(text):
476 """Return a string with escape-backslashes converted to nulls."""
477 parts = []
478 start = 0
479 while 1:
480 found = text.find('\\', start)
481 if found == -1:
482 parts.append(text[start:])
483 return ''.join(parts)
484 parts.append(text[start:found])
485 parts.append('\x00' + text[found+1:found+2])
486 start = found + 2 # skip character after escape
488 def unescape(text, restore_backslashes=0):
490 Return a string with nulls removed or restored to backslashes.
491 Backslash-escaped spaces are also removed.
493 if restore_backslashes:
494 return text.replace('\x00', '\\')
495 else:
496 for sep in ['\x00 ', '\x00\n', '\x00']:
497 text = ''.join(text.split(sep))
498 return text
500 east_asian_widths = {'W': 2, # Wide
501 'F': 2, # Full-width (wide)
502 'Na': 1, # Narrow
503 'H': 1, # Half-width (narrow)
504 'N': 1, # Neutral (not East Asian, treated as narrow)
505 'A': 1} # Ambiguous (s/b wide in East Asian context,
506 # narrow otherwise, but that doesn't work)
507 """Mapping of result codes from `unicodedata.east_asian_width()` to character
508 column widths."""
510 def east_asian_column_width(text):
511 if isinstance(text, types.UnicodeType):
512 total = 0
513 for c in text:
514 total += east_asian_widths[unicodedata.east_asian_width(c)]
515 return total
516 else:
517 return len(text)
519 if hasattr(unicodedata, 'east_asian_width'):
520 column_width = east_asian_column_width
521 else:
522 column_width = len
524 def uniq(L):
525 r = []
526 for item in L:
527 if not item in r:
528 r.append(item)
529 return r
532 class DependencyList:
535 List of dependencies, with file recording support.
537 Note that the output file is not automatically closed. You have
538 to explicitly call the close() method.
541 def __init__(self, output_file=None, dependencies=[]):
543 Initialize the dependency list, automatically setting the
544 output file to `output_file` (see `set_output()`) and adding
545 all supplied dependencies.
547 self.set_output(output_file)
548 for i in dependencies:
549 self.add(i)
551 def set_output(self, output_file):
553 Set the output file and clear the list of already added
554 dependencies.
556 `output_file` must be a string. The specified file is
557 immediately overwritten.
559 If output_file is '-', the output will be written to stdout.
560 If it is None, no file output is done when calling add().
562 self.list = []
563 if output_file == '-':
564 self.file = sys.stdout
565 elif output_file:
566 self.file = open(output_file, 'w')
567 else:
568 self.file = None
570 def add(self, filename):
572 If the dependency `filename` has not already been added,
573 append it to self.list and print it to self.file if self.file
574 is not None.
576 if not filename in self.list:
577 self.list.append(filename)
578 if self.file is not None:
579 print >>self.file, filename
581 def close(self):
583 Close the output file.
585 self.file.close()
586 self.file = None
588 def __repr__(self):
589 if self.file:
590 output_file = self.file.name
591 else:
592 output_file = None
593 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)