minor fixes
[pylit.git] / src / pylit.py
blob20819920cb732f97212766347565b8e44a674250
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
4 # pylit.py
5 # ********
6 # Literate programming with reStructuredText
7 # ++++++++++++++++++++++++++++++++++++++++++
9 # :Date: $Date$
10 # :Revision: $Revision$
11 # :URL: $URL$
12 # :Copyright: © 2005, 2007 Günter Milde.
13 # Released without warranty under the terms of the
14 # GNU General Public License (v. 2 or later)
16 # ::
18 """pylit: bidirectional text <-> code converter
20 Covert between a *text source* with embedded computer code and a *code source*
21 with embedded documentation.
22 """
24 # .. contents::
26 # Frontmatter
27 # ===========
29 # Changelog
30 # ---------
32 # .. class:: borderless
34 # ====== ========== ===========================================================
35 # 0.1 2005-06-29 Initial version.
36 # 0.1.1 2005-06-30 First literate version.
37 # 0.1.2 2005-07-01 Object orientated script using generators.
38 # 0.1.3 2005-07-10 Two state machine (later added 'header' state).
39 # 0.2b 2006-12-04 Start of work on version 0.2 (code restructuring).
40 # 0.2 2007-01-23 Published at http://pylit.berlios.de.
41 # 0.2.1 2007-01-25 Outsourced non-core documentation to the PyLit pages.
42 # 0.2.2 2007-01-26 New behaviour of `diff` function.
43 # 0.2.3 2007-01-29 New `header` methods after suggestion by Riccardo Murri.
44 # 0.2.4 2007-01-31 Raise Error if code indent is too small.
45 # 0.2.5 2007-02-05 New command line option --comment-string.
46 # 0.2.6 2007-02-09 Add section with open questions,
47 # Code2Text: let only blank lines (no comment str)
48 # separate text and code,
49 # fix `Code2Text.header`.
50 # 0.2.7 2007-02-19 Simplify `Code2Text.header`,
51 # new `iter_strip` method replacing a lot of ``if``-s.
52 # 0.2.8 2007-02-22 Set `mtime` of outfile to the one of infile.
53 # 0.3 2007-02-27 New `Code2Text` converter after an idea by Riccardo Murri,
54 # explicit `option_defaults` dict for easier customisation.
55 # 0.3.1 2007-03-02 Expand hard-tabs to prevent errors in indentation,
56 # `Text2Code` now also works on blocks,
57 # removed dependency on SimpleStates module.
58 # 0.3.2 2007-03-06 Bug fix: do not set `language` in `option_defaults`
59 # renamed `code_languages` to `languages`.
60 # 0.3.3 2007-03-16 New language css,
61 # option_defaults -> defaults = optparse.Values(),
62 # simpler PylitOptions: don't store parsed values,
63 # don't parse at initialisation,
64 # OptionValues: return `None` for non-existing attributes,
65 # removed -infile and -outfile, use positional arguments.
66 # 0.3.4 2007-03-19 Documentation update,
67 # separate `execute` function.
68 # 2007-03-21 Code cleanup in `Text2Code.__iter__`.
69 # 0.3.5 2007-03-23 Removed "css" from known languages after learning that
70 # there is no C++ style "// " comment string in CSS2.
71 # 0.3.6 2007-04-24 Documentation update.
72 # 0.4 2007-05-18 Implement Converter.__iter__ as stack of iterator
73 # generators. Iterating over a converter instance now
74 # yields lines instead of blocks.
75 # Provide "hooks" for pre- and postprocessing filters.
76 # Rename states to reduce confusion with formats:
77 # "text" -> "documentation", "code" -> "code_block".
78 # 0.4.1 2007-05-22 Converter.__iter__: cleanup and reorganisation,
79 # rename parent class Converter -> TextCodeConverter.
80 # 0.4.2 2007-05-23 Merged Text2Code.converter and Code2Text.converter into
81 # TextCodeConverter.converter.
82 # 0.4.3 2007-05-30 Replaced use of defaults.code_extensions with
83 # values.languages.keys().
84 # Removed spurious `print` statement in code_block_handler.
85 # Added basic support for 'c' and 'css' languages
86 # with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
87 # 0.5 2007-06-06 Moved `collect_blocks`_ out of `TextCodeConverter`_,
88 # bug fix: collect all trailing blank lines into a block.
89 # Expand tabs with `expandtabs_filter`_.
90 # 0.6 2007-06-20 Configurable code-block marker (default ``::``)
91 # 0.6.1 2007-06-28 Bug fix: reset self.code_block_marker_missing.
92 # 0.7 2007-12-12 prepending an empty string to sys.path in run_doctest()
93 # to allow imports from the current working dir.
94 # 0.7.1 2008-01-07 If outfile does not exist, do a round-trip conversion
95 # and report differences (as with outfile=='-').
96 # 0.7.2 2008-01-28 Do not add missing code-block separators with
97 # `doctest_run` on the code source. Keeps lines consistent.
98 # 0.7.3 2008-04-07 Use value of code_block_marker for insertion of missing
99 # transition marker in Code2Text.code_block_handler
100 # Add "shell" to defaults.languages
101 # 0.7.4 2008-06-23 Add "latex" to defaults.languages
102 # 0.7.5 2009-05-14 Bugfix: ignore blank lines in test for end of code block
103 # 0.7.6 2009-12-15 language-dependent code-block markers (after a
104 # `feature request and patch by jrioux`_),
105 # use DefaultDict for language-dependent defaults,
106 # new defaults setting `add_missing_marker`_.
107 # 0.7.7 2010-06-23 New command line option --codeindent.
108 # 0.7.8 2011-03-30 bugfix: do not overwrite custom `add_missing_marker` value,
109 # allow directive options following the 'code' directive.
110 # 0.7.9 2011-04-05 Decode doctest string if 'magic comment' gives encoding.
111 # ====== ========== ===========================================================
113 # ::
115 _version = "0.7.9"
117 __docformat__ = 'restructuredtext'
120 # Introduction
121 # ------------
123 # PyLit is a bidirectional converter between two formats of a computer
124 # program source:
126 # * a (reStructured) text document with program code embedded in
127 # *code blocks*, and
128 # * a compilable (or executable) code source with *documentation*
129 # embedded in comment blocks
132 # Requirements
133 # ------------
135 # ::
137 import __builtin__, os, sys
138 import re, optparse
141 # DefaultDict
142 # ~~~~~~~~~~~
143 # As `collections.defaultdict` is only introduced in Python 2.5, we
144 # define a simplified version of the dictionary with default from
145 # http://code.activestate.com/recipes/389639/
146 # ::
148 class DefaultDict(dict):
149 """Minimalistic Dictionary with default value."""
150 def __init__(self, default=None, *args, **kwargs):
151 self.update(dict(*args, **kwargs))
152 self.default = default
154 def __getitem__(self, key):
155 return self.get(key, self.default)
158 # Defaults
159 # ========
161 # The `defaults` object provides a central repository for default
162 # values and their customisation. ::
164 defaults = optparse.Values()
166 # It is used for
168 # * the initialisation of data arguments in TextCodeConverter_ and
169 # PylitOptions_
171 # * completion of command line options in `PylitOptions.complete_values`_.
173 # This allows the easy creation of back-ends that customise the
174 # defaults and then call `main`_ e.g.:
176 # >>> import pylit
177 # >>> pylit.defaults.comment_string = "## "
178 # >>> pylit.defaults.codeindent = 4
179 # >>> pylit.main()
181 # The following default values are defined in pylit.py:
183 # languages
184 # ---------
186 # Mapping of code file extensions to code language::
188 defaults.languages = DefaultDict("python", # fallback language
189 {".c": "c",
190 ".cc": "c++",
191 ".css": "css",
192 ".py": "python",
193 ".sh": "shell",
194 ".sl": "slang",
195 ".sty": "latex",
196 ".tex": "latex"
199 # Will be overridden by the ``--language`` command line option.
201 # The first argument is the fallback language, used if there is no
202 # matching extension (e.g. if pylit is used as filter) and no
203 # ``--language`` is specified. It can be changed programmatically by
204 # assignment to the ``.default`` attribute, e.g.
206 # >>> defaults.languages.default='c++'
209 # .. _text_extension:
211 # text_extensions
212 # ---------------
214 # List of known extensions of (reStructured) text files. The first
215 # extension in this list is used by the `_get_outfile_name`_ method to
216 # generate a text output filename::
218 defaults.text_extensions = [".txt", ".rst"]
221 # comment_strings
222 # ---------------
224 # Comment strings for known languages. Used in Code2Text_ to recognise
225 # text blocks and in Text2Code_ to format text blocks as comments.
226 # Defaults to ``'# '``.
228 # **Comment strings include trailing whitespace.** ::
230 defaults.comment_strings = DefaultDict('# ',
231 {"css": '// ',
232 "c": '// ',
233 "c++": '// ',
234 "latex": '% ',
235 "python": '# ',
236 "shell": '# ',
237 "slang": '% '
241 # header_string
242 # -------------
244 # Marker string for a header code block in the text source. No trailing
245 # whitespace needed as indented code follows.
246 # Must be a valid rst directive that accepts code on the same line, e.g.
247 # ``'..admonition::'``.
249 # Default is a comment marker::
251 defaults.header_string = '..'
254 # .. _code_block_marker:
256 # code_block_markers
257 # ------------------
259 # Markup at the end of a documentation block.
260 # Default is Docutils' marker for a `literal block`_::
262 defaults.code_block_markers = DefaultDict('::')
264 # The `code_block_marker` string is `inserted into a regular expression`_.
265 # Language-specific markers can be defined programmatically, e.g. in a
266 # wrapper script.
268 # In a document where code examples are only one of several uses of
269 # literal blocks, it is more appropriate to single out the source code
270 # ,e.g. with the double colon at a separate line ("expanded form")
272 # ``defaults.code_block_marker.default = ':: *'``
274 # or a dedicated ``.. code-block::`` directive [#]_
276 # ``defaults.code_block_marker['c++'] = '.. code-block:: *c++'``
278 # The latter form also allows code in different languages kept together
279 # in one literate source file.
281 # .. [#] The ``.. code-block::`` directive is not (yet) supported by
282 # standard Docutils. It is provided by several add-ons, including
283 # the `code-block directive`_ project in the Docutils Sandbox and
284 # Sphinx_.
287 # strip
288 # -----
290 # Export to the output format stripping documentation or code blocks::
292 defaults.strip = False
294 # strip_marker
295 # ------------
297 # Strip literal marker from the end of documentation blocks when
298 # converting to code format. Makes the code more concise but looses the
299 # synchronisation of line numbers in text and code formats. Can also be used
300 # (together with the auto-completion of the code-text conversion) to change
301 # the `code_block_marker`::
303 defaults.strip_marker = False
305 # add_missing_marker
306 # ------------------
308 # When converting from code format to text format, add a `code_block_marker`
309 # at the end of documentation blocks if it is missing::
311 defaults.add_missing_marker = True
313 # Keep this at ``True``, if you want to re-convert to code format later!
316 # .. _defaults.preprocessors:
318 # preprocessors
319 # -------------
321 # Preprocess the data with language-specific filters_
322 # Set below in Filters_::
324 defaults.preprocessors = {}
326 # .. _defaults.postprocessors:
328 # postprocessors
329 # --------------
331 # Postprocess the data with language-specific filters_::
333 defaults.postprocessors = {}
335 # .. _defaults.codeindent:
337 # codeindent
338 # ----------
340 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
342 defaults.codeindent = 2
344 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
345 # first recognised code line (header or first indented literal block
346 # of the text source).
348 # overwrite
349 # ---------
351 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
353 defaults.overwrite = 'update'
355 # Recognised values:
357 # :'yes': overwrite eventually existing `outfile`,
358 # :'update': fail if the `outfile` is newer than `infile`,
359 # :'no': fail if `outfile` exists.
362 # Extensions
363 # ==========
365 # Try to import optional extensions::
367 try:
368 import pylit_elisp
369 except ImportError:
370 pass
373 # Converter Classes
374 # =================
376 # The converter classes implement a simple state machine to separate and
377 # transform documentation and code blocks. For this task, only a very limited
378 # parsing is needed. PyLit's parser assumes:
380 # * `indented literal blocks`_ in a text source are code blocks.
382 # * comment blocks in a code source where every line starts with a matching
383 # comment string are documentation blocks.
385 # TextCodeConverter
386 # -----------------
387 # ::
389 class TextCodeConverter(object):
390 """Parent class for the converters `Text2Code` and `Code2Text`.
393 # The parent class defines data attributes and functions used in both
394 # `Text2Code`_ converting a text source to executable code source, and
395 # `Code2Text`_ converting commented code to a text source.
397 # Data attributes
398 # ~~~~~~~~~~~~~~~
400 # Class default values are fetched from the `defaults`_ object and can be
401 # overridden by matching keyword arguments during class instantiation. This
402 # also works with keyword arguments to `get_converter`_ and `main`_, as these
403 # functions pass on unused keyword args to the instantiation of a converter
404 # class. ::
406 language = defaults.languages.default
407 comment_strings = defaults.comment_strings
408 comment_string = "" # set in __init__ (if empty)
409 codeindent = defaults.codeindent
410 header_string = defaults.header_string
411 code_block_markers = defaults.code_block_markers
412 code_block_marker = "" # set in __init__ (if empty)
413 strip = defaults.strip
414 strip_marker = defaults.strip_marker
415 add_missing_marker = defaults.add_missing_marker
416 directive_option_regexp = re.compile(r' +:(\w|[-._+:])+:( |$)')
417 state = "" # type of current block, see `TextCodeConverter.convert`_
419 # Interface methods
420 # ~~~~~~~~~~~~~~~~~
422 # .. _TextCodeConverter.__init__:
424 # __init__
425 # """"""""
427 # Initialising sets the `data` attribute, an iterable object yielding lines of
428 # the source to convert. [#]_
430 # .. [#] The most common choice of data is a `file` object with the text
431 # or code source.
433 # To convert a string into a suitable object, use its splitlines method
434 # like ``"2 lines\nof source".splitlines(True)``.
437 # Additional keyword arguments are stored as instance variables,
438 # overwriting the class defaults::
440 def __init__(self, data, **keyw):
441 """data -- iterable data object
442 (list, file, generator, string, ...)
443 **keyw -- remaining keyword arguments are
444 stored as data-attributes
446 self.data = data
447 self.__dict__.update(keyw)
449 # If empty, `code_block_marker` and `comment_string` are set according
450 # to the `language`::
452 if not self.code_block_marker:
453 self.code_block_marker = self.code_block_markers[self.language]
454 if not self.comment_string:
455 self.comment_string = self.comment_strings[self.language]
456 self.stripped_comment_string = self.comment_string.rstrip()
458 # Pre- and postprocessing filters are set (with
459 # `TextCodeConverter.get_filter`_)::
461 self.preprocessor = self.get_filter("preprocessors", self.language)
462 self.postprocessor = self.get_filter("postprocessors", self.language)
464 # .. _inserted into a regular expression:
466 # Finally, a regular_expression for the `code_block_marker` is compiled
467 # to find valid cases of `code_block_marker` in a given line and return
468 # the groups: ``\1 prefix, \2 code_block_marker, \3 remainder`` ::
470 marker = self.code_block_marker
471 if marker == '::':
472 # the default marker may occur at the end of a text line
473 self.marker_regexp = re.compile('^( *(?!\.\.).*)(::)([ \n]*)$')
474 else:
475 # marker must be on a separate line
476 self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
478 # .. _TextCodeConverter.__iter__:
480 # __iter__
481 # """"""""
483 # Return an iterator for the instance. Iteration yields lines of converted
484 # data.
486 # The iterator is a chain of iterators acting on `self.data` that does
488 # * preprocessing
489 # * text<->code format conversion
490 # * postprocessing
492 # Pre- and postprocessing are only performed, if filters for the current
493 # language are registered in `defaults.preprocessors`_ and|or
494 # `defaults.postprocessors`_. The filters must accept an iterable as first
495 # argument and yield the processed input data line-wise.
496 # ::
498 def __iter__(self):
499 """Iterate over input data source and yield converted lines
501 return self.postprocessor(self.convert(self.preprocessor(self.data)))
504 # .. _TextCodeConverter.__call__:
506 # __call__
507 # """"""""
508 # The special `__call__` method allows the use of class instances as callable
509 # objects. It returns the converted data as list of lines::
511 def __call__(self):
512 """Iterate over state-machine and return results as list of lines"""
513 return [line for line in self]
516 # .. _TextCodeConverter.__str__:
518 # __str__
519 # """""""
520 # Return converted data as string::
522 def __str__(self):
523 return "".join(self())
526 # Helpers and convenience methods
527 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
529 # .. _TextCodeConverter.convert:
531 # convert
532 # """""""
534 # The `convert` method generates an iterator that does the actual code <-->
535 # text format conversion. The converted data is yielded line-wise and the
536 # instance's `status` argument indicates whether the current line is "header",
537 # "documentation", or "code_block"::
539 def convert(self, lines):
540 """Iterate over lines of a program document and convert
541 between "text" and "code" format
544 # Initialise internal data arguments. (Done here, so that every new iteration
545 # re-initialises them.)
547 # `state`
548 # the "type" of the currently processed block of lines. One of
550 # :"": initial state: check for header,
551 # :"header": leading code block: strip `header_string`,
552 # :"documentation": documentation part: comment out,
553 # :"code_block": literal blocks containing source code: unindent.
555 # ::
557 self.state = ""
559 # `_codeindent`
560 # * Do not confuse the internal attribute `_codeindent` with the configurable
561 # `codeindent` (without the leading underscore).
562 # * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
563 # first non-blank "code_block" line and stripped from all "code_block" lines
564 # in the text-to-code conversion,
565 # * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
566 # "code_block" lines in the code-to-text conversion.
568 # ::
570 self._codeindent = 0
572 # `_textindent`
573 # * set by `Text2Code.documentation_handler`_ to the minimal indent of a
574 # documentation block,
575 # * used in `Text2Code.set_state`_ to find the end of a code block.
577 # ::
579 self._textindent = 0
581 # `_add_code_block_marker`
582 # If the last paragraph of a documentation block does not end with a
583 # code_block_marker_, it should be added (otherwise, the back-conversion
584 # fails.).
586 # `_add_code_block_marker` is set by `Code2Text.documentation_handler`_
587 # and evaluated by `Code2Text.code_block_handler`_, because the
588 # documentation_handler does not know whether the next block will be
589 # documentation (with no need for a code_block_marker) or a code block.
591 # ::
593 self._add_code_block_marker = False
597 # Determine the state of the block and convert with the matching "handler"::
599 for block in collect_blocks(expandtabs_filter(lines)):
600 self.set_state(block)
601 for line in getattr(self, self.state+"_handler")(block):
602 yield line
605 # .. _TextCodeConverter.get_filter:
607 # get_filter
608 # """"""""""
609 # ::
611 def get_filter(self, filter_set, language):
612 """Return language specific filter"""
613 if self.__class__ == Text2Code:
614 key = "text2"+language
615 elif self.__class__ == Code2Text:
616 key = language+"2text"
617 else:
618 key = ""
619 try:
620 return getattr(defaults, filter_set)[key]
621 except (AttributeError, KeyError):
622 # print "there is no %r filter in %r"%(key, filter_set)
623 pass
624 return identity_filter
627 # get_indent
628 # """"""""""
629 # Return the number of leading spaces in `line`::
631 def get_indent(self, line):
632 """Return the indentation of `string`.
634 return len(line) - len(line.lstrip())
637 # Text2Code
638 # ---------
640 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
641 # Code blocks are unindented, documentation is commented (or filtered, if the
642 # ``strip`` option is True).
644 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
645 # literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
646 # treated as part of the documentation. This allows the inclusion of
647 # examples:
649 # >>> 23 + 3
650 # 26
652 # Mark that there is no double colon before the doctest block in the
653 # text source.
655 # The class inherits the interface and helper functions from
656 # TextCodeConverter_ and adds functions specific to the text-to-code format
657 # conversion::
659 class Text2Code(TextCodeConverter):
660 """Convert a (reStructured) text source to code source
663 # .. _Text2Code.set_state:
665 # set_state
666 # ~~~~~~~~~
667 # ::
669 def set_state(self, block):
670 """Determine state of `block`. Set `self.state`
673 # `set_state` is used inside an iteration. Hence, if we are out of data, a
674 # StopItertion exception should be raised::
676 if not block:
677 raise StopIteration
679 # The new state depends on the active state (from the last block) and
680 # features of the current block. It is either "header", "documentation", or
681 # "code_block".
683 # If the current state is "" (first block), check for
684 # the `header_string` indicating a leading code block::
686 if self.state == "":
687 # print "set state for %r"%block
688 if block[0].startswith(self.header_string):
689 self.state = "header"
690 else:
691 self.state = "documentation"
693 # If the current state is "documentation", the next block is also
694 # documentation. The end of a documentation part is detected in the
695 # `Text2Code.documentation_handler`_::
697 # elif self.state == "documentation":
698 # self.state = "documentation"
700 # A "code_block" ends with the first less indented, non-blank line.
701 # `_textindent` is set by the documentation handler to the indent of the
702 # preceding documentation block::
704 elif self.state in ["code_block", "header"]:
705 indents = [self.get_indent(line) for line in block
706 if line.rstrip()]
707 # print "set_state:", indents, self._textindent
708 if indents and min(indents) <= self._textindent:
709 self.state = 'documentation'
710 else:
711 self.state = 'code_block'
713 # TODO: (or not to do?) insert blank line before the first line with too-small
714 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
715 # split and push-back of the documentation part)?
717 # .. _Text2Code.header_handler:
719 # header_handler
720 # ~~~~~~~~~~~~~~
722 # Sometimes code needs to remain on the first line(s) of the document to be
723 # valid. The most common example is the "shebang" line that tells a POSIX
724 # shell how to process an executable file::
726 #!/usr/bin/env python
728 # In Python, the special comment to indicate the encoding, e.g.
729 # ``# -*- coding: iso-8859-1 -*-``, must occur before any other comment
730 # or code too.
732 # If we want to keep the line numbers in sync for text and code source, the
733 # reStructured Text markup for these header lines must start at the same line
734 # as the first header line. Therefore, header lines could not be marked as
735 # literal block (this would require the ``::`` and an empty line above the
736 # code_block).
738 # OTOH, a comment may start at the same line as the comment marker and it
739 # includes subsequent indented lines. Comments are visible in the reStructured
740 # Text source but hidden in the pretty-printed output.
742 # With a header converted to comment in the text source, everything before
743 # the first documentation block (i.e. before the first paragraph using the
744 # matching comment string) will be hidden away (in HTML or PDF output).
746 # This seems a good compromise, the advantages
748 # * line numbers are kept
749 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
750 # * greater flexibility: you can hide a repeating header in a project
751 # consisting of many source files.
753 # set off the disadvantages
755 # - it may come as surprise if a part of the file is not "printed",
756 # - one more syntax element to learn for rst newbies to start with pylit,
757 # (however, starting from the code source, this will be auto-generated)
759 # In the case that there is no matching comment at all, the complete code
760 # source will become a comment -- however, in this case it is not very likely
761 # the source is a literate document anyway.
763 # If needed for the documentation, it is possible to quote the header in (or
764 # after) the first documentation block, e.g. as `parsed literal`.
765 # ::
767 def header_handler(self, lines):
768 """Format leading code block"""
769 # strip header string from first line
770 lines[0] = lines[0].replace(self.header_string, "", 1)
771 # yield remaining lines formatted as code-block
772 for line in self.code_block_handler(lines):
773 yield line
776 # .. _Text2Code.documentation_handler:
778 # documentation_handler
779 # ~~~~~~~~~~~~~~~~~~~~~
781 # The 'documentation' handler processes everything that is not recognised as
782 # "code_block". Documentation is quoted with `self.comment_string`
783 # (or filtered with `--strip=True`).
785 # If end-of-documentation marker is detected,
787 # * set state to 'code_block'
788 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
789 # next "documentation" block)
791 # ::
793 def documentation_handler(self, lines):
794 """Convert documentation blocks from text to code format
796 for line in lines:
797 # test lines following the code-block marker for false positives
798 if (self.state == "code_block" and line.rstrip()
799 and not self.directive_option_regexp.search(line)):
800 self.state = "documentation"
801 # test for end of documentation block
802 if self.marker_regexp.search(line):
803 self.state = "code_block"
804 self._textindent = self.get_indent(line)
805 # yield lines
806 if self.strip:
807 continue
808 # do not comment blank lines preceding a code block
809 if self.state == "code_block" and not line.rstrip():
810 yield line
811 else:
812 yield self.comment_string + line
817 # .. _Text2Code.code_block_handler:
819 # code_block_handler
820 # ~~~~~~~~~~~~~~~~~~
822 # The "code_block" handler is called with an indented literal block. It
823 # removes leading whitespace up to the indentation of the first code line in
824 # the file (this deviation from Docutils behaviour allows indented blocks of
825 # Python code). ::
827 def code_block_handler(self, block):
828 """Convert indented literal blocks to source code format
831 # If still unset, determine the indentation of code blocks from first non-blank
832 # code line::
834 if self._codeindent == 0:
835 self._codeindent = self.get_indent(block[0])
837 # Yield unindented lines after check whether we can safely unindent. If the
838 # line is less indented then `_codeindent`, something got wrong. ::
840 for line in block:
841 if line.lstrip() and self.get_indent(line) < self._codeindent:
842 raise ValueError, "code block contains line less indented " \
843 "than %d spaces \n%r"%(self._codeindent, block)
844 yield line.replace(" "*self._codeindent, "", 1)
847 # Code2Text
848 # ---------
850 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
851 # a source in "code format" (i.e. in a programming language), extracts
852 # documentation from comment blocks, and puts program code in literal blocks.
854 # The class inherits the interface and helper functions from
855 # TextCodeConverter_ and adds functions specific to the text-to-code format
856 # conversion::
858 class Code2Text(TextCodeConverter):
859 """Convert code source to text source
862 # set_state
863 # ~~~~~~~~~
865 # Check if block is "header", "documentation", or "code_block":
867 # A paragraph is "documentation", if every non-blank line starts with a
868 # matching comment string (including whitespace except for commented blank
869 # lines) ::
871 def set_state(self, block):
872 """Determine state of `block`."""
873 for line in block:
874 # skip documentation lines (commented, blank or blank comment)
875 if (line.startswith(self.comment_string)
876 or not line.rstrip()
877 or line.rstrip() == self.comment_string.rstrip()
879 continue
880 # non-commented line found:
881 if self.state == "":
882 self.state = "header"
883 else:
884 self.state = "code_block"
885 break
886 else:
887 # no code line found
888 # keep state if the block is just a blank line
889 # if len(block) == 1 and self._is_blank_codeline(line):
890 # return
891 self.state = "documentation"
894 # header_handler
895 # ~~~~~~~~~~~~~~
897 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
898 # discussion of the "header" state.) ::
900 def header_handler(self, lines):
901 """Format leading code block"""
902 if self.strip == True:
903 return
904 # get iterator over the lines that formats them as code-block
905 lines = iter(self.code_block_handler(lines))
906 # prepend header string to first line
907 yield self.header_string + lines.next()
908 # yield remaining lines
909 for line in lines:
910 yield line
912 # .. _Code2Text.documentation_handler:
914 # documentation_handler
915 # ~~~~~~~~~~~~~~~~~~~~~
917 # The *documentation state* handler converts a comment to a documentation
918 # block by stripping the leading `comment string` from every line::
920 def documentation_handler(self, block):
921 """Uncomment documentation blocks in source code
924 # Strip comment strings::
926 lines = [self.uncomment_line(line) for line in block]
928 # If the code block is stripped, the literal marker would lead to an
929 # error when the text is converted with Docutils. Strip it as well. ::
931 if self.strip or self.strip_marker:
932 self.strip_code_block_marker(lines)
934 # Otherwise, check for the `code_block_marker`_ at the end of the
935 # documentation block (skipping directive options that might follow it)::
937 elif self.add_missing_marker:
938 for line in lines[::-1]:
939 if self.marker_regexp.search(line):
940 self._add_code_block_marker = False
941 break
942 if (line.rstrip() and
943 not self.directive_option_regexp.search(line)):
944 self._add_code_block_marker = True
945 break
946 else:
947 self._add_code_block_marker = True
949 # Yield lines::
951 for line in lines:
952 yield line
954 # uncomment_line
955 # ~~~~~~~~~~~~~~
957 # Return documentation line after stripping comment string. Consider the
958 # case that a blank line has a comment string without trailing whitespace::
960 def uncomment_line(self, line):
961 """Return uncommented documentation line"""
962 line = line.replace(self.comment_string, "", 1)
963 if line.rstrip() == self.stripped_comment_string:
964 line = line.replace(self.stripped_comment_string, "", 1)
965 return line
967 # .. _Code2Text.code_block_handler:
969 # code_block_handler
970 # ~~~~~~~~~~~~~~~~~~
972 # The `code_block` handler returns the code block as indented literal
973 # block (or filters it, if ``self.strip == True``). The amount of the code
974 # indentation is controlled by `self.codeindent` (default 2). ::
976 def code_block_handler(self, lines):
977 """Covert code blocks to text format (indent or strip)
979 if self.strip == True:
980 return
981 # eventually insert transition marker
982 if self._add_code_block_marker:
983 self.state = "documentation"
984 yield self.code_block_marker + "\n"
985 yield "\n"
986 self._add_code_block_marker = False
987 self.state = "code_block"
988 for line in lines:
989 yield " "*self.codeindent + line
993 # strip_code_block_marker
994 # ~~~~~~~~~~~~~~~~~~~~~~~
996 # Replace the literal marker with the equivalent of Docutils replace rules
998 # * strip ``::``-line (and preceding blank line) if on a line on its own
999 # * strip ``::`` if it is preceded by whitespace.
1000 # * convert ``::`` to a single colon if preceded by text
1002 # `lines` is a list of documentation lines (with a trailing blank line).
1003 # It is modified in-place::
1005 def strip_code_block_marker(self, lines):
1006 try:
1007 line = lines[-2]
1008 except IndexError:
1009 return # just one line (no trailing blank line)
1011 # match with regexp: `match` is None or has groups
1012 # \1 leading text, \2 code_block_marker, \3 remainder
1013 match = self.marker_regexp.search(line)
1015 if not match: # no code_block_marker present
1016 return
1017 if not match.group(1): # `code_block_marker` on an extra line
1018 del(lines[-2])
1019 # delete preceding line if it is blank
1020 if len(lines) >= 2 and not lines[-2].lstrip():
1021 del(lines[-2])
1022 elif match.group(1).rstrip() < match.group(1):
1023 # '::' follows whitespace
1024 lines[-2] = match.group(1).rstrip() + match.group(3)
1025 else: # '::' follows text
1026 lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
1028 # Filters
1029 # =======
1031 # Filters allow pre- and post-processing of the data to bring it in a format
1032 # suitable for the "normal" text<->code conversion. An example is conversion
1033 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
1034 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
1035 # ``//`` comments (and back).
1037 # Filters are generator functions that return an iterator acting on a
1038 # `data` iterable and yielding processed `data` lines.
1040 # identity_filter
1041 # ---------------
1043 # The most basic filter is the identity filter, that returns its argument as
1044 # iterator::
1046 def identity_filter(data):
1047 """Return data iterator without any processing"""
1048 return iter(data)
1050 # expandtabs_filter
1051 # -----------------
1053 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
1055 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
1056 # hard tabs can lead to errors when the indentation is changed. ::
1058 def expandtabs_filter(data):
1059 """Yield data tokens with hard-tabs expanded"""
1060 for line in data:
1061 yield line.expandtabs()
1064 # collect_blocks
1065 # --------------
1067 # A filter to aggregate "paragraphs" (blocks separated by blank
1068 # lines). Yields lists of lines::
1070 def collect_blocks(lines):
1071 """collect lines in a list
1073 yield list for each paragraph, i.e. block of lines separated by a
1074 blank line (whitespace only).
1076 Trailing blank lines are collected as well.
1078 blank_line_reached = False
1079 block = []
1080 for line in lines:
1081 if blank_line_reached and line.rstrip():
1082 yield block
1083 blank_line_reached = False
1084 block = [line]
1085 continue
1086 if not line.rstrip():
1087 blank_line_reached = True
1088 block.append(line)
1089 yield block
1093 # dumb_c_preprocessor
1094 # -------------------
1096 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
1097 # only converts lines that
1099 # * start with "/\* " and end with " \*/" (followed by whitespace only)
1101 # A more sophisticated version would also
1103 # * convert multi-line comments
1105 # + Keep indentation or strip 3 leading spaces?
1107 # * account for nested comments
1109 # * only convert comments that are separated from code by a blank line
1111 # ::
1113 def dumb_c_preprocessor(data):
1114 """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1115 comment_string = defaults.comment_strings["c++"]
1116 boc_string = "/* "
1117 eoc_string = " */"
1118 for line in data:
1119 if (line.startswith(boc_string)
1120 and line.rstrip().endswith(eoc_string)
1122 line = line.replace(boc_string, comment_string, 1)
1123 line = "".join(line.rsplit(eoc_string, 1))
1124 yield line
1126 # Unfortunately, the `replace` method of strings does not support negative
1127 # numbers for the `count` argument:
1129 # >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1130 # False
1132 # However, there is the `rsplit` method, that can be used together with `join`:
1134 # >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1135 # True
1137 # dumb_c_postprocessor
1138 # --------------------
1140 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1141 # delimiters ::
1143 def dumb_c_postprocessor(data):
1144 """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1145 comment_string = defaults.comment_strings["c++"]
1146 boc_string = "/* "
1147 eoc_string = " */"
1148 for line in data:
1149 if line.rstrip() == comment_string.rstrip():
1150 line = line.replace(comment_string, "", 1)
1151 elif line.startswith(comment_string):
1152 line = line.replace(comment_string, boc_string, 1)
1153 line = line.rstrip() + eoc_string + "\n"
1154 yield line
1157 # register filters
1158 # ----------------
1160 # ::
1162 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1163 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1164 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1165 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1168 # Command line use
1169 # ================
1171 # Using this script from the command line will convert a file according to its
1172 # extension. This default can be overridden by a couple of options.
1174 # Dual source handling
1175 # --------------------
1177 # How to determine which source is up-to-date?
1178 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1180 # - set modification date of `outfile` to the one of `infile`
1182 # Points out that the source files are 'synchronised'.
1184 # * Are there problems to expect from "backdating" a file? Which?
1186 # Looking at http://www.unix.com/showthread.php?t=20526, it seems
1187 # perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1188 # description of the "actuality" of the data in the file.
1190 # * Should this become a default or an option?
1192 # - alternatively move input file to a backup copy (with option: `--replace`)
1194 # - check modification date before overwriting
1195 # (with option: `--overwrite=update`)
1197 # - check modification date before editing (implemented as `Jed editor`_
1198 # function `pylit_check()` in `pylit.sl`_)
1200 # .. _Jed editor: http://www.jedsoft.org/jed/
1201 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1203 # Recognised Filename Extensions
1204 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1206 # Instead of defining a new extension for "pylit" literate programs,
1207 # by default ``.txt`` will be appended for the text source and stripped by
1208 # the conversion to the code source. I.e. for a Python program foo:
1210 # * the code source is called ``foo.py``
1211 # * the text source is called ``foo.py.txt``
1212 # * the html rendering is called ``foo.py.html``
1215 # OptionValues
1216 # ------------
1218 # The following class adds `as_dict`_, `complete`_ and `__getattr__`_
1219 # methods to `optparse.Values`::
1221 class OptionValues(optparse.Values):
1223 # .. _OptionValues.as_dict:
1225 # as_dict
1226 # ~~~~~~~
1228 # For use as keyword arguments, it is handy to have the options in a
1229 # dictionary. `as_dict` returns a copy of the instances object dictionary::
1231 def as_dict(self):
1232 """Return options as dictionary object"""
1233 return self.__dict__.copy()
1235 # .. _OptionValues.complete:
1237 # complete
1238 # ~~~~~~~~
1240 # ::
1242 def complete(self, **keyw):
1244 Complete the option values with keyword arguments.
1246 Do not overwrite existing values. Only use arguments that do not
1247 have a corresponding attribute in `self`,
1249 for key in keyw:
1250 if not self.__dict__.has_key(key):
1251 setattr(self, key, keyw[key])
1253 # .. _OptionValues.__getattr__:
1255 # __getattr__
1256 # ~~~~~~~~~~~
1258 # To replace calls using ``options.ensure_value("OPTION", None)`` with the
1259 # more concise ``options.OPTION``, we define `__getattr__` [#]_ ::
1261 def __getattr__(self, name):
1262 """Return default value for non existing options"""
1263 return None
1266 # .. [#] The special method `__getattr__` is only called when an attribute
1267 # look-up has not found the attribute in the usual places (i.e. it is
1268 # not an instance attribute nor is it found in the class tree for
1269 # self).
1272 # PylitOptions
1273 # ------------
1275 # The `PylitOptions` class comprises an option parser and methods for parsing
1276 # and completion of command line options::
1278 class PylitOptions(object):
1279 """Storage and handling of command line options for pylit"""
1281 # Instantiation
1282 # ~~~~~~~~~~~~~
1284 # ::
1286 def __init__(self):
1287 """Set up an `OptionParser` instance for pylit command line options
1290 p = optparse.OptionParser(usage=main.__doc__, version=_version)
1292 # Conversion settings
1294 p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
1295 help="convert code source to text source")
1296 p.add_option("-t", "--txt2code", action="store_true",
1297 help="convert text source to code source")
1298 p.add_option("--language",
1299 choices = defaults.languages.values(),
1300 help="use LANGUAGE native comment style")
1301 p.add_option("--comment-string", dest="comment_string",
1302 help="documentation block marker in code source "
1303 "(including trailing whitespace, "
1304 "default: language dependent)")
1305 p.add_option("-m", "--code-block-marker", dest="code_block_marker",
1306 help="syntax token starting a code block. (default '::')")
1307 p.add_option("--codeindent", type="int",
1308 help="Number of spaces to indent code blocks with "
1309 "text2code (default %d)" % defaults.codeindent)
1311 # Output file handling
1313 p.add_option("--overwrite", action="store",
1314 choices = ["yes", "update", "no"],
1315 help="overwrite output file (default 'update')")
1316 p.add_option("--replace", action="store_true",
1317 help="move infile to a backup copy (appending '~')")
1318 p.add_option("-s", "--strip", action="store_true",
1319 help='"export" by stripping documentation or code')
1321 # Special actions
1323 p.add_option("-d", "--diff", action="store_true",
1324 help="test for differences to existing file")
1325 p.add_option("--doctest", action="store_true",
1326 help="run doctest.testfile() on the text version")
1327 p.add_option("-e", "--execute", action="store_true",
1328 help="execute code (Python only)")
1330 self.parser = p
1332 # .. _PylitOptions.parse_args:
1334 # parse_args
1335 # ~~~~~~~~~~
1337 # The `parse_args` method calls the `optparse.OptionParser` on command
1338 # line or provided args and returns the result as `PylitOptions.Values`
1339 # instance. Defaults can be provided as keyword arguments::
1341 def parse_args(self, args=sys.argv[1:], **keyw):
1342 """parse command line arguments using `optparse.OptionParser`
1344 parse_args(args, **keyw) -> OptionValues instance
1346 args -- list of command line arguments.
1347 keyw -- keyword arguments or dictionary of option defaults
1349 # parse arguments
1350 (values, args) = self.parser.parse_args(args, OptionValues(keyw))
1351 # Convert FILE and OUTFILE positional args to option values
1352 # (other positional arguments are ignored)
1353 try:
1354 values.infile = args[0]
1355 values.outfile = args[1]
1356 except IndexError:
1357 pass
1359 return values
1361 # .. _PylitOptions.complete_values:
1363 # complete_values
1364 # ~~~~~~~~~~~~~~~
1366 # Complete an OptionValues instance `values`. Use module-level defaults and
1367 # context information to set missing option values to sensible defaults (if
1368 # possible) ::
1370 def complete_values(self, values):
1371 """complete option values with module and context sensible defaults
1373 x.complete_values(values) -> values
1374 values -- OptionValues instance
1377 # Complete with module-level defaults_::
1379 values.complete(**defaults.__dict__)
1381 # Ensure infile is a string::
1383 values.ensure_value("infile", "")
1385 # Guess conversion direction from `infile` filename::
1387 if values.txt2code is None:
1388 in_extension = os.path.splitext(values.infile)[1]
1389 if in_extension in values.text_extensions:
1390 values.txt2code = True
1391 elif in_extension in values.languages.keys():
1392 values.txt2code = False
1394 # Auto-determine the output file name::
1396 values.ensure_value("outfile", self._get_outfile_name(values))
1398 # Second try: Guess conversion direction from outfile filename::
1400 if values.txt2code is None:
1401 out_extension = os.path.splitext(values.outfile)[1]
1402 values.txt2code = not (out_extension in values.text_extensions)
1404 # Set the language of the code::
1406 if values.txt2code is True:
1407 code_extension = os.path.splitext(values.outfile)[1]
1408 elif values.txt2code is False:
1409 code_extension = os.path.splitext(values.infile)[1]
1410 values.ensure_value("language", values.languages[code_extension])
1412 return values
1414 # _get_outfile_name
1415 # ~~~~~~~~~~~~~~~~~
1417 # Construct a matching filename for the output file. The output filename is
1418 # constructed from `infile` by the following rules:
1420 # * '-' (stdin) results in '-' (stdout)
1421 # * strip the `text_extension`_ (txt2code) or
1422 # * add the `text_extension`_ (code2txt)
1423 # * fallback: if no guess can be made, add ".out"
1425 # .. TODO: use values.outfile_extension if it exists?
1427 # ::
1429 def _get_outfile_name(self, values):
1430 """Return a matching output filename for `infile`
1432 # if input is stdin, default output is stdout
1433 if values.infile == '-':
1434 return '-'
1436 # Derive from `infile` name: strip or add text extension
1437 (base, ext) = os.path.splitext(values.infile)
1438 if ext in values.text_extensions:
1439 return base # strip
1440 if ext in values.languages.keys() or values.txt2code == False:
1441 return values.infile + values.text_extensions[0] # add
1442 # give up
1443 return values.infile + ".out"
1445 # .. _PylitOptions.__call__:
1447 # __call__
1448 # ~~~~~~~~
1450 # The special `__call__` method allows to use PylitOptions instances as
1451 # *callables*: Calling an instance parses the argument list to extract option
1452 # values and completes them based on "context-sensitive defaults". Keyword
1453 # arguments are passed to `PylitOptions.parse_args`_ as default values. ::
1455 def __call__(self, args=sys.argv[1:], **keyw):
1456 """parse and complete command line args return option values
1458 values = self.parse_args(args, **keyw)
1459 return self.complete_values(values)
1463 # Helper functions
1464 # ----------------
1466 # open_streams
1467 # ~~~~~~~~~~~~
1469 # Return file objects for in- and output. If the input path is missing,
1470 # write usage and abort. (An alternative would be to use stdin as default.
1471 # However, this leaves the uninitiated user with a non-responding application
1472 # if (s)he just tries the script without any arguments) ::
1474 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1475 """Open and return the input and output stream
1477 open_streams(infile, outfile) -> (in_stream, out_stream)
1479 in_stream -- file(infile) or sys.stdin
1480 out_stream -- file(outfile) or sys.stdout
1481 overwrite -- 'yes': overwrite eventually existing `outfile`,
1482 'update': fail if the `outfile` is newer than `infile`,
1483 'no': fail if `outfile` exists.
1485 Irrelevant if `outfile` == '-'.
1487 if not infile:
1488 strerror = "Missing input file name ('-' for stdin; -h for help)"
1489 raise IOError, (2, strerror, infile)
1490 if infile == '-':
1491 in_stream = sys.stdin
1492 else:
1493 in_stream = file(infile, 'r')
1494 if outfile == '-':
1495 out_stream = sys.stdout
1496 elif overwrite == 'no' and os.path.exists(outfile):
1497 raise IOError, (1, "Output file exists!", outfile)
1498 elif overwrite == 'update' and is_newer(outfile, infile):
1499 raise IOError, (1, "Output file is newer than input file!", outfile)
1500 else:
1501 out_stream = file(outfile, 'w')
1502 return (in_stream, out_stream)
1504 # is_newer
1505 # ~~~~~~~~
1507 # ::
1509 def is_newer(path1, path2):
1510 """Check if `path1` is newer than `path2` (using mtime)
1512 Compare modification time of files at path1 and path2.
1514 Non-existing files are considered oldest: Return False if path1 does not
1515 exist and True if path2 does not exist.
1517 Return None for equal modification time. (This evaluates to False in a
1518 Boolean context but allows a test for equality.)
1521 try:
1522 mtime1 = os.path.getmtime(path1)
1523 except OSError:
1524 mtime1 = -1
1525 try:
1526 mtime2 = os.path.getmtime(path2)
1527 except OSError:
1528 mtime2 = -1
1529 # print "mtime1", mtime1, path1, "\n", "mtime2", mtime2, path2
1531 if mtime1 == mtime2:
1532 return None
1533 return mtime1 > mtime2
1536 # get_converter
1537 # ~~~~~~~~~~~~~
1539 # Get an instance of the converter state machine::
1541 def get_converter(data, txt2code=True, **keyw):
1542 if txt2code:
1543 return Text2Code(data, **keyw)
1544 else:
1545 return Code2Text(data, **keyw)
1548 # Use cases
1549 # ---------
1551 # run_doctest
1552 # ~~~~~~~~~~~
1553 # ::
1555 def run_doctest(infile="-", txt2code=True,
1556 globs={}, verbose=False, optionflags=0, **keyw):
1557 """run doctest on the text source
1560 # Allow imports from the current working dir by prepending an empty string to
1561 # sys.path (see doc of sys.path())::
1563 sys.path.insert(0, '')
1565 # Import classes from the doctest module::
1567 from doctest import DocTestParser, DocTestRunner
1569 # Read in source. Make sure it is in text format, as tests in comments are not
1570 # found by doctest::
1572 (data, out_stream) = open_streams(infile, "-")
1573 if txt2code is False:
1574 keyw.update({'add_missing_marker': False})
1575 converter = Code2Text(data, **keyw)
1576 docstring = str(converter)
1577 else:
1578 docstring = data.read()
1580 # decode doc string if there is a "magic comment" in the first or second line
1581 # (http://docs.python.org/reference/lexical_analysis.html#encoding-declarations)
1582 # ::
1584 firstlines = ' '.join(docstring.splitlines()[:2])
1585 match = re.search('coding[=:]\s*([-\w.]+)', firstlines)
1586 if match:
1587 docencoding = match.group(1)
1588 docstring = docstring.decode(docencoding)
1590 # Use the doctest Advanced API to run all doctests in the source text::
1592 test = DocTestParser().get_doctest(docstring, globs, name="",
1593 filename=infile, lineno=0)
1594 runner = DocTestRunner(verbose, optionflags)
1595 runner.run(test)
1596 runner.summarize
1597 # give feedback also if no failures occurred
1598 if not runner.failures:
1599 print "%d failures in %d tests"%(runner.failures, runner.tries)
1600 return runner.failures, runner.tries
1603 # diff
1604 # ~~~~
1606 # ::
1608 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1609 """Report differences between converted infile and existing outfile
1611 If outfile does not exist or is '-', do a round-trip conversion and
1612 report differences.
1615 import difflib
1617 instream = file(infile)
1618 # for diffing, we need a copy of the data as list::
1619 data = instream.readlines()
1620 # convert
1621 converter = get_converter(data, txt2code, **keyw)
1622 new = converter()
1624 if outfile != '-' and os.path.exists(outfile):
1625 outstream = file(outfile)
1626 old = outstream.readlines()
1627 oldname = outfile
1628 newname = "<conversion of %s>"%infile
1629 else:
1630 old = data
1631 oldname = infile
1632 # back-convert the output data
1633 converter = get_converter(new, not txt2code)
1634 new = converter()
1635 newname = "<round-conversion of %s>"%infile
1637 # find and print the differences
1638 is_different = False
1639 # print type(old), old
1640 # print type(new), new
1641 delta = difflib.unified_diff(old, new,
1642 # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1643 fromfile=oldname, tofile=newname)
1644 for line in delta:
1645 is_different = True
1646 print line,
1647 if not is_different:
1648 print oldname
1649 print newname
1650 print "no differences found"
1651 return is_different
1654 # execute
1655 # ~~~~~~~
1657 # Works only for python code.
1659 # Does not work with `eval`, as code is not just one expression. ::
1661 def execute(infile="-", txt2code=True, **keyw):
1662 """Execute the input file. Convert first, if it is a text source.
1665 data = file(infile)
1666 if txt2code:
1667 data = str(Text2Code(data, **keyw))
1668 # print "executing " + options.infile
1669 exec data
1672 # main
1673 # ----
1675 # If this script is called from the command line, the `main` function will
1676 # convert the input (file or stdin) between text and code formats.
1678 # Option default values for the conversion can be given as keyword arguments
1679 # to `main`_. The option defaults will be updated by command line options and
1680 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1681 # helper functions and the converter instantiation.
1683 # This allows easy customisation for programmatic use -- just call `main`
1684 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1686 # ::
1688 def main(args=sys.argv[1:], **defaults):
1689 """%prog [options] INFILE [OUTFILE]
1691 Convert between (reStructured) text source with embedded code,
1692 and code source with embedded documentation (comment blocks)
1694 The special filename '-' stands for standard in and output.
1697 # Parse and complete the options::
1699 options = PylitOptions()(args, **defaults)
1700 # print "infile", repr(options.infile)
1702 # Special actions with early return::
1704 if options.doctest:
1705 return run_doctest(**options.as_dict())
1707 if options.diff:
1708 return diff(**options.as_dict())
1710 if options.execute:
1711 return execute(**options.as_dict())
1713 # Open in- and output streams::
1715 try:
1716 (data, out_stream) = open_streams(**options.as_dict())
1717 except IOError, ex:
1718 print "IOError: %s %s" % (ex.filename, ex.strerror)
1719 sys.exit(ex.errno)
1721 # Get a converter instance::
1723 converter = get_converter(data, **options.as_dict())
1725 # Convert and write to out_stream::
1727 out_stream.write(str(converter))
1729 if out_stream is not sys.stdout:
1730 print "extract written to", out_stream.name
1731 out_stream.close()
1733 # If input and output are from files, set the modification time (`mtime`) of
1734 # the output file to the one of the input file to indicate that the contained
1735 # information is equal. [#]_ ::
1737 try:
1738 os.utime(options.outfile, (os.path.getatime(options.outfile),
1739 os.path.getmtime(options.infile))
1741 except OSError:
1742 pass
1744 ## print "mtime", os.path.getmtime(options.infile), options.infile
1745 ## print "mtime", os.path.getmtime(options.outfile), options.outfile
1748 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1749 # closed, as otherwise the change will be overwritten when `close` is
1750 # called afterwards (either explicitly or at program exit).
1753 # Rename the infile to a backup copy if ``--replace`` is set::
1755 if options.replace:
1756 os.rename(options.infile, options.infile + "~")
1759 # Run main, if called from the command line::
1761 if __name__ == '__main__':
1762 main()
1765 # Open questions
1766 # ==============
1768 # Open questions and ideas for further development
1770 # Clean code
1771 # ----------
1773 # * can we gain from using "shutils" over "os.path" and "os"?
1774 # * use pylint or pyChecker to enforce a consistent style?
1776 # Options
1777 # -------
1779 # * Use templates for the "intelligent guesses" (with Python syntax for string
1780 # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1782 # * Is it sensible to offer the `header_string` option also as command line
1783 # option?
1785 # treatment of blank lines
1786 # ------------------------
1788 # Alternatives: Keep blank lines blank
1790 # - "never" (current setting) -> "visually merges" all documentation
1791 # if there is no interjacent code
1793 # - "always" -> disrupts documentation blocks,
1795 # - "if empty" (no whitespace). Comment if there is whitespace.
1797 # This would allow non-obstructing markup but unfortunately this is (in
1798 # most editors) also non-visible markup.
1800 # + "if double" (if there is more than one consecutive blank line)
1802 # With this handling, the "visual gap" remains in both, text and code
1803 # source.
1806 # Parsing Problems
1807 # ----------------
1809 # * Ignore "matching comments" in literal strings?
1811 # Too complicated: Would need a specific detection algorithm for every
1812 # language that supports multi-line literal strings (C++, PHP, Python)
1814 # * Warn if a comment in code will become documentation after round-trip?
1817 # docstrings in code blocks
1818 # -------------------------
1820 # * How to handle docstrings in code blocks? (it would be nice to convert them
1821 # to rst-text if ``__docformat__ == restructuredtext``)
1823 # TODO: Ask at Docutils users|developers
1825 # Plug-ins
1826 # --------
1828 # Specify a path for user additions and plug-ins. This would require to
1829 # convert Pylit from a pure module to a package...
1831 # 6.4.3 Packages in Multiple Directories
1833 # Packages support one more special attribute, __path__. This is initialized
1834 # to be a list containing the name of the directory holding the package's
1835 # __init__.py before the code in that file is executed. This
1836 # variable can be modified; doing so affects future searches for modules and
1837 # subpackages contained in the package.
1839 # While this feature is not often needed, it can be used to extend the set
1840 # of modules found in a package.
1843 # .. References
1845 # .. _Docutils: http://docutils.sourceforge.net/
1846 # .. _Sphinx: http://sphinx.pocoo.org
1847 # .. _Pygments: http://pygments.org/
1848 # .. _code-block directive:
1849 # http://docutils.sourceforge.net/sandbox/code-block-directive/
1850 # .. _literal block:
1851 # .. _literal blocks:
1852 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#literal-blocks
1853 # .. _indented literal block:
1854 # .. _indented literal blocks:
1855 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1856 # .. _quoted literal block:
1857 # .. _quoted literal blocks:
1858 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1859 # .. _parsed-literal blocks:
1860 # http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block
1861 # .. _doctest block:
1862 # .. _doctest blocks:
1863 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks
1865 # .. _feature request and patch by jrioux:
1866 # http://developer.berlios.de/feature/?func=detailfeature&feature_id=4890&group_id=7974