Documentation for stylesheet usage.
[docutils.git] / docutils / utils.py
blob01c365cb98066a3e6a22587ee31c6cbd54cf91e9
1 # Author: David Goodger
2 # Contact: goodger@users.sourceforge.net
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 Miscellaneous utilities for the documentation utilities.
9 """
11 __docformat__ = 'reStructuredText'
13 import sys
14 import os
15 import os.path
16 from types import StringType, UnicodeType
17 from docutils import ApplicationError, DataError
18 from docutils import frontend, nodes
21 class SystemMessage(ApplicationError):
23 def __init__(self, system_message):
24 Exception.__init__(self, system_message.astext())
27 class Reporter:
29 """
30 Info/warning/error reporter and ``system_message`` element generator.
32 Five levels of system messages are defined, along with corresponding
33 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
35 There is typically one Reporter object per process. A Reporter object is
36 instantiated with thresholds for reporting (generating warnings) and
37 halting processing (raising exceptions), a switch to turn debug output on
38 or off, and an I/O stream for warnings. These are stored in the default
39 reporting category, '' (zero-length string).
41 Multiple reporting categories [#]_ may be set, each with its own reporting
42 and halting thresholds, debugging switch, and warning stream
43 (collectively a `ConditionSet`). Categories are hierarchical dotted-name
44 strings that look like attribute references: 'spam', 'spam.eggs',
45 'neeeow.wum.ping'. The 'spam' category is the ancestor of
46 'spam.bacon.eggs'. Unset categories inherit stored conditions from their
47 closest ancestor category that has been set.
49 When a system message is generated, the stored conditions from its
50 category (or ancestor if unset) are retrieved. The system message level
51 is compared to the thresholds stored in the category, and a warning or
52 error is generated as appropriate. Debug messages are produced iff the
53 stored debug switch is on. Message output is sent to the stored warning
54 stream.
56 The default category is '' (empty string). By convention, Writers should
57 retrieve reporting conditions from the 'writer' category (which, unless
58 explicitly set, defaults to the conditions of the default category).
60 The Reporter class also employs a modified form of the "Observer" pattern
61 [GoF95]_ to track system messages generated. The `attach_observer` method
62 should be called before parsing, with a bound method or function which
63 accepts system messages. The observer can be removed with
64 `detach_observer`, and another added in its place.
66 .. [#] The concept of "categories" was inspired by the log4j project:
67 http://jakarta.apache.org/log4j/.
69 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
70 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
71 1995.
72 """
74 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
75 """List of names for system message levels, indexed by level."""
77 def __init__(self, source, report_level, halt_level, stream=None,
78 debug=0):
79 """
80 Initialize the `ConditionSet` forthe `Reporter`'s default category.
82 :Parameters:
84 - `source`: The path to or description of the source data.
85 - `report_level`: The level at or above which warning output will
86 be sent to `stream`.
87 - `halt_level`: The level at or above which `SystemMessage`
88 exceptions will be raised, halting execution.
89 - `debug`: Show debug (level=0) system messages?
90 - `stream`: Where warning output is sent. Can be file-like (has a
91 ``.write`` method), a string (file name, opened for writing), or
92 `None` (implies `sys.stderr`; default).
93 """
94 self.source = source
95 """The path to or description of the source data."""
97 if stream is None:
98 stream = sys.stderr
99 elif type(stream) in (StringType, UnicodeType):
100 raise NotImplementedError('This should open a file for writing.')
102 self.categories = {'': ConditionSet(debug, report_level, halt_level,
103 stream)}
104 """Mapping of category names to conditions. Default category is ''."""
106 self.observers = []
107 """List of bound methods or functions to call with each system_message
108 created."""
110 def set_conditions(self, category, report_level, halt_level,
111 stream=None, debug=0):
112 if stream is None:
113 stream = sys.stderr
114 self.categories[category] = ConditionSet(debug, report_level,
115 halt_level, stream)
117 def unset_conditions(self, category):
118 if category and self.categories.has_key(category):
119 del self.categories[category]
121 __delitem__ = unset_conditions
123 def get_conditions(self, category):
124 while not self.categories.has_key(category):
125 category = category[:category.rfind('.') + 1][:-1]
126 return self.categories[category]
128 __getitem__ = get_conditions
130 def attach_observer(self, observer):
132 The `observer` parameter is a function or bound method which takes one
133 argument, a `nodes.system_message` instance.
135 self.observers.append(observer)
137 def detach_observer(self, observer):
138 self.observers.remove(observer)
140 def notify_observers(self, message):
141 for observer in self.observers:
142 observer(message)
144 def system_message(self, level, message, *children, **kwargs):
146 Return a system_message object.
148 Raise an exception or generate a warning if appropriate.
150 attributes = kwargs.copy()
151 category = kwargs.get('category', '')
152 if kwargs.has_key('category'):
153 del attributes['category']
154 if kwargs.has_key('base_node'):
155 source, line = get_source_line(kwargs['base_node'])
156 del attributes['base_node']
157 if source is not None:
158 attributes.setdefault('source', source)
159 if line is not None:
160 attributes.setdefault('line', line)
161 attributes.setdefault('source', self.source)
162 msg = nodes.system_message(message, level=level,
163 type=self.levels[level],
164 *children, **attributes)
165 debug, report_level, halt_level, stream = self[category].astuple()
166 if level >= report_level or debug and level == 0:
167 if category:
168 print >>stream, msg.astext(), '[%s]' % category
169 else:
170 print >>stream, msg.astext()
171 if level >= halt_level:
172 raise SystemMessage(msg)
173 if level > 0 or debug:
174 self.notify_observers(msg)
175 return msg
177 def debug(self, *args, **kwargs):
179 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
180 effect on the processing. Level-0 system messages are handled
181 separately from the others.
183 return self.system_message(0, *args, **kwargs)
185 def info(self, *args, **kwargs):
187 Level-1, "INFO": a minor issue that can be ignored. Typically there is
188 no effect on processing, and level-1 system messages are not reported.
190 return self.system_message(1, *args, **kwargs)
192 def warning(self, *args, **kwargs):
194 Level-2, "WARNING": an issue that should be addressed. If ignored,
195 there may be unpredictable problems with the output.
197 return self.system_message(2, *args, **kwargs)
199 def error(self, *args, **kwargs):
201 Level-3, "ERROR": an error that should be addressed. If ignored, the
202 output will contain errors.
204 return self.system_message(3, *args, **kwargs)
206 def severe(self, *args, **kwargs):
208 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
209 the output will contain severe errors. Typically level-4 system
210 messages are turned into exceptions which halt processing.
212 return self.system_message(4, *args, **kwargs)
215 class ConditionSet:
218 A set of two thresholds (`report_level` & `halt_level`), a switch
219 (`debug`), and an I/O stream (`stream`), corresponding to one `Reporter`
220 category.
223 def __init__(self, debug, report_level, halt_level, stream):
224 self.debug = debug
225 self.report_level = report_level
226 self.halt_level = halt_level
227 self.stream = stream
229 def astuple(self):
230 return (self.debug, self.report_level, self.halt_level,
231 self.stream)
234 class ExtensionOptionError(DataError): pass
235 class BadOptionError(ExtensionOptionError): pass
236 class BadOptionDataError(ExtensionOptionError): pass
237 class DuplicateOptionError(ExtensionOptionError): pass
240 def extract_extension_options(field_list, options_spec):
242 Return a dictionary mapping extension option names to converted values.
244 :Parameters:
245 - `field_list`: A flat field list without field arguments, where each
246 field body consists of a single paragraph only.
247 - `options_spec`: Dictionary mapping known option names to a
248 conversion function such as `int` or `float`.
250 :Exceptions:
251 - `KeyError` for unknown option names.
252 - `ValueError` for invalid option values (raised by the conversion
253 function).
254 - `DuplicateOptionError` for duplicate options.
255 - `BadOptionError` for invalid fields.
256 - `BadOptionDataError` for invalid option data (missing name,
257 missing data, bad quotes, etc.).
259 option_list = extract_options(field_list)
260 option_dict = assemble_option_dict(option_list, options_spec)
261 return option_dict
263 def extract_options(field_list):
265 Return a list of option (name, value) pairs from field names & bodies.
267 :Parameter:
268 `field_list`: A flat field list, where each field name is a single
269 word and each field body consists of a single paragraph only.
271 :Exceptions:
272 - `BadOptionError` for invalid fields.
273 - `BadOptionDataError` for invalid option data (missing name,
274 missing data, bad quotes, etc.).
276 option_list = []
277 for field in field_list:
278 if len(field[0].astext().split()) != 1:
279 raise BadOptionError(
280 'extension option field name may not contain multiple words')
281 name = str(field[0].astext().lower())
282 body = field[1]
283 if len(body) == 0:
284 data = None
285 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
286 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
287 raise BadOptionDataError(
288 'extension option field body may contain\n'
289 'a single paragraph only (option "%s")' % name)
290 else:
291 data = body[0][0].astext()
292 option_list.append((name, data))
293 return option_list
295 def assemble_option_dict(option_list, options_spec):
297 Return a mapping of option names to values.
299 :Parameters:
300 - `option_list`: A list of (name, value) pairs (the output of
301 `extract_options()`).
302 - `options_spec`: Dictionary mapping known option names to a
303 conversion function such as `int` or `float`.
305 :Exceptions:
306 - `KeyError` for unknown option names.
307 - `DuplicateOptionError` for duplicate options.
308 - `ValueError` for invalid option values (raised by conversion
309 function).
311 options = {}
312 for name, value in option_list:
313 convertor = options_spec[name] # raises KeyError if unknown
314 if options.has_key(name):
315 raise DuplicateOptionError('duplicate option "%s"' % name)
316 try:
317 options[name] = convertor(value)
318 except (ValueError, TypeError), detail:
319 raise detail.__class__('(option: "%s"; value: %r)\n%s'
320 % (name, value, detail))
321 return options
324 class NameValueError(DataError): pass
327 def extract_name_value(line):
329 Return a list of (name, value) from a line of the form "name=value ...".
331 :Exception:
332 `NameValueError` for invalid input (missing name, missing data, bad
333 quotes, etc.).
335 attlist = []
336 while line:
337 equals = line.find('=')
338 if equals == -1:
339 raise NameValueError('missing "="')
340 attname = line[:equals].strip()
341 if equals == 0 or not attname:
342 raise NameValueError(
343 'missing attribute name before "="')
344 line = line[equals+1:].lstrip()
345 if not line:
346 raise NameValueError(
347 'missing value after "%s="' % attname)
348 if line[0] in '\'"':
349 endquote = line.find(line[0], 1)
350 if endquote == -1:
351 raise NameValueError(
352 'attribute "%s" missing end quote (%s)'
353 % (attname, line[0]))
354 if len(line) > endquote + 1 and line[endquote + 1].strip():
355 raise NameValueError(
356 'attribute "%s" end quote (%s) not followed by '
357 'whitespace' % (attname, line[0]))
358 data = line[1:endquote]
359 line = line[endquote+1:].lstrip()
360 else:
361 space = line.find(' ')
362 if space == -1:
363 data = line
364 line = ''
365 else:
366 data = line[:space]
367 line = line[space+1:].lstrip()
368 attlist.append((attname.lower(), data))
369 return attlist
371 def new_document(source, settings=None):
373 Return a new empty document object.
375 :Parameters:
376 `source` : string
377 The path to or description of the source text of the document.
378 `settings` : optparse.Values object
379 Runtime settings. If none provided, a default set will be used.
381 if settings is None:
382 settings = frontend.OptionParser().get_default_values()
383 reporter = Reporter(source, settings.report_level, settings.halt_level,
384 settings.warning_stream, settings.debug)
385 document = nodes.document(settings, reporter, source=source)
386 document.note_source(source, -1)
387 return document
389 def clean_rcs_keywords(paragraph, keyword_substitutions):
390 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
391 textnode = paragraph[0]
392 for pattern, substitution in keyword_substitutions:
393 match = pattern.match(textnode.data)
394 if match:
395 textnode.data = pattern.sub(substitution, textnode.data)
396 return
398 def relative_path(source, target):
400 Build and return a path to `target`, relative to `source`.
402 If there is no common prefix, return the absolute path to `target`.
404 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
405 target_parts = os.path.abspath(target).split(os.sep)
406 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
407 if source_parts[:2] != target_parts[:2]:
408 # Nothing in common between paths.
409 # Return absolute path, using '/' for URLs:
410 return '/'.join(target_parts)
411 source_parts.reverse()
412 target_parts.reverse()
413 while (source_parts and target_parts
414 and source_parts[-1] == target_parts[-1]):
415 # Remove path components in common:
416 source_parts.pop()
417 target_parts.pop()
418 target_parts.reverse()
419 parts = ['..'] * (len(source_parts) - 1) + target_parts
420 return '/'.join(parts)
422 def get_source_line(node):
424 Return the "source" and "line" attributes from the `node` given or from
425 it's closest ancestor.
427 while node:
428 if node.source or node.line:
429 return node.source, node.line
430 node = node.parent
431 return None, None