2 # -*- coding: iso-8859-1 -*-
4 # ===============================================================
5 # pylit.py: Literate programming with reStructuredText
6 # ===============================================================
9 # :Version: SVN-Revision $Revision$
11 # :Copyright: 2005, 2007 Guenter Milde.
12 # Released under the terms of the GNU General Public License
24 # :2005-06-29: Initial version.
25 # :2005-06-30: First literate version.
26 # :2005-07-01: Object orientated script using generators.
27 # :2005-07-10: Two state machine (later added 'header' state).
28 # :2006-12-04: Start of work on version 0.2 (code restructuring).
29 # :2007-01-23: 0.2 Published at http://pylit.berlios.de.
30 # :2007-01-25: 0.2.1 Outsourced non-core documentation to the PyLit pages.
31 # :2007-01-26: 0.2.2 New behaviour of `diff` function.
32 # :2007-01-29: 0.2.3 New `header` methods after suggestion by Riccardo Murri.
33 # :2007-01-31: 0.2.4 Raise Error if code indent is too small.
34 # :2007-02-05: 0.2.5 New command line option --comment-string.
35 # :2007-02-09: 0.2.6 Add section with open questions,
36 # Code2Text: let only blank lines (no comment str)
37 # separate text and code,
38 # fix `Code2Text.header`.
39 # :2007-02-19: 0.2.7 Simplify `Code2Text.header`,
40 # new `iter_strip` method replacing a lot of ``if``-s.
41 # :2007-02-22: 0.2.8 Set `mtime` of outfile to the one of infile.
42 # :2007-02-27: 0.3 New `Code2Text` converter after an idea by Riccardo Murri,
43 # explicite `option_defaults` dict for easier customization.
44 # :2007-03-02: 0.3.1 Expand hard-tabs to prevent errors in indentation,
45 # `Text2Code` now also works on blocks,
46 # removed dependency on SimpleStates module.
47 # :2007-03-06: 0.3.2 Bugfix: do not set `language` in `option_defaults`
48 # renamed `code_languages` to `languages`.
49 # :2007-03-16: 0.3.3 New language css,
50 # option_defaults -> defaults = optparse.Values(),
51 # simpler PylitOptions: don't store parsed values,
52 # don't parse at initialization,
53 # OptionValues: return `None` for non-existing attributes,
54 # removed -infile and -outfile, use positional arguments.
55 # :2007-03-19: 0.3.4 Documentation update,
56 # separate `execute` function.
57 # :2007-03-21: Code cleanup in `Text2Code.__iter__`.
58 # :2007-03-23: 0.3.5 Removed "css" from known languages after learning that
59 # there is no C++ style "// " comment string in CSS2.
60 # :2007-04-24: 0.3.6 Documentation update.
61 # :2007-05-18: 0.4 Implement Converter.__iter__ as stack of iterator
62 # generators. Iterating over a converter instance now
63 # yields lines instead of blocks.
64 # Provide "hooks" for pre- and postprocessing filters.
65 # Rename states to avoid confusion with formats:
66 # "text" -> "documentation", "code" -> "code_block".
67 # :2007-05-22: 0.4.1 Converter.__iter__: cleanup and reorganization,
68 # rename parent class Converter -> TextCodeConverter.
69 # :2007-05-23: 0.4.2 Merged Text2Code.converter and Code2Text.converter into
70 # TextCodeConverter.converter.
71 # :2007-05-30: 0.4.3 Replaced use of defaults.code_extensions with
72 # values.languages.keys().
73 # Removed spurious `print` statement in code_block_handler.
74 # Added basic support for 'c' and 'css' languages
75 # with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
76 # :2007-06-06: 0.5 Moved `collect_blocks`_ out of `TextCodeConverter`_,
77 # bugfix: collect all trailing blank lines into a block.
78 # Expand tabs with `expandtabs_filter`_.
79 # :2007-06-20: 0.6 Configurable code-block marker (default ``::``)
80 # :2007-06-28: 0.6.1 Bugfix: reset self.code_block_marker_missing
81 # :2007-12-12: 0.7 prepending an empty string to sys.path in run_doctest() to
82 # allow imports from the current working dir
86 """pylit: bidirectional converter between a *text source* with embedded
87 computer code and a *code source* with embedded documentation.
90 __docformat__
= 'restructuredtext'
98 # PyLit is a bidirectional converter between two formats of a computer
101 # * a (reStructured) text document with program code embedded in
103 # * a compilable (or executable) code source with *documentation* embedded in
112 import __builtin__
, os
, sys
121 # The `defaults` object provides a central repository for default values
122 # and their customisation. ::
124 defaults
= optparse
.Values()
128 # * the initialization of data arguments in TextCodeConverter_ and
131 # * completion of command line options in `PylitOptions.complete_values`_.
133 # This allows the easy creation of custom back-ends that customise the
134 # defaults and then call main_ e.g.:
137 # >>> pylit.defaults.comment_string = "## "
138 # >>> pylit.defaults.codeindent = 4
141 # The following default values are defined in pylit.py:
146 # Mapping of code file extension to code language.
147 # Used by `OptionValues.complete`_ to set the `defaults.language`.
148 # The ``--language`` command line option or setting ``defaults.language`` in
149 # programmatic use override this auto-setting feature. ::
151 defaults
.languages
= {".py": "python",
158 # defaults.fallback_language
159 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
161 # Language to use, if there is no matching extension (e.g. if pylit is used as
162 # filter) and no `language` is specified::
164 defaults
.fallback_language
= "python"
166 # defaults.text_extensions
167 # ~~~~~~~~~~~~~~~~~~~~~~~~
169 # List of known extensions of (reStructured) text files.
170 # Used by `OptionValues._get_outfile` to auto-determine the output filename.
173 defaults
.text_extensions
= [".txt"]
176 # defaults.comment_strings
177 # ~~~~~~~~~~~~~~~~~~~~~~~~
179 # Dictionary of comment strings for known languages. Comment strings include
180 # trailing whitespace. ::
182 defaults
.comment_strings
= {"python": '# ',
188 # Used in Code2Text_ to recognise text blocks and in Text2Code_ to format
189 # text blocks as comments.
191 # defaults.header_string
192 # ~~~~~~~~~~~~~~~~~~~~~~
194 # Marker string for a header code block in the text source. No trailing
195 # whitespace needed as indented code follows.
196 # Must be a valid rst directive that accepts code on the same line, e.g.
197 # ``'..admonition::'``.
199 # Default is a comment marker::
201 defaults
.header_string
= '..'
203 # defaults.code_block_marker
204 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
206 # Marker string for a code block in the text source.
208 # Default is a literal-block marker::
210 defaults
.code_block_marker
= '::'
212 # In a document where code examples are only one of several uses of literal
213 # blocks, it is more appropriate to single out the sourcecode with a dedicated
214 # "code-block" directive.
216 # Some highlight plug-ins require a special "sourcecode" or "code-block"
217 # directive instead of the ``::`` literal block marker. Actually,
218 # syntax-highlight is possible without changes to docutils with the Pygments_
219 # package using a "code-block" directive. See the `syntax highlight`_ section
220 # in the features documentation.
222 # The `code_block_marker` string is used in a regular expression. Examples for
223 # alternative forms are ``.. code-block::`` or ``.. code-block:: .* python``.
224 # The second example can differentiate between Python code blocks and
225 # code-blocks in other languages.
227 # Another use would be to mark some code-blocks inactive allowing a literate
228 # source to contain code-blocks that should become active only in some cases.
235 # Export to the output format stripping documentation or code blocks::
237 defaults
.strip
= False
239 # defaults.strip_marker
240 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
242 # Strip literal marker from the end of documentation blocks when
243 # converting to code format. Makes the code more concise but looses the
244 # synchronization of line numbers in text and code formats. Can also be used
245 # (together with the auto-completion of the code-text conversion) to change
246 # the `code_block_marker`::
248 defaults
.strip_marker
= False
250 # defaults.preprocessors
251 # ~~~~~~~~~~~~~~~~~~~~~~
253 # Preprocess the data with language-specific filters_
254 # Set below in Filters_::
256 defaults
.preprocessors
= {}
258 # defaults.postprocessors
259 # ~~~~~~~~~~~~~~~~~~~~~~~
261 # Postprocess the data with language-specific filters_::
263 defaults
.postprocessors
= {}
265 # defaults.codeindent
266 # ~~~~~~~~~~~~~~~~~~~
268 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
270 defaults
.codeindent
= 2
272 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
273 # first recognized code line (header or first indented literal block
274 # of the text source).
279 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
281 defaults
.overwrite
= 'update'
285 # :'yes': overwrite eventually existing `outfile`,
286 # :'update': fail if the `outfile` is newer than `infile`,
287 # :'no': fail if `outfile` exists.
293 # Try to import optional extensions::
304 # The converter classes implement a simple state machine to separate and
305 # transform documentation and code blocks. For this task, only a very limited
306 # parsing is needed. PyLit's parser assumes:
308 # * `indented literal blocks`_ in a text source are code blocks.
310 # * comment blocks in a code source where every line starts with a matching
311 # comment string are documentation blocks.
317 class TextCodeConverter(object):
318 """Parent class for the converters `Text2Code` and `Code2Text`.
321 # The parent class defines data attributes and functions used in both
322 # `Text2Code`_ converting a text source to executable code source, and
323 # `Code2Text`_ converting commented code to a text source.
328 # Class default values are fetched from the `defaults`_ object and can be
329 # overridden by matching keyword arguments during class instantiation. This
330 # also works with keyword arguments to `get_converter`_ and `main`_, as these
331 # functions pass on unused keyword args to the instantiation of a converter
334 language
= defaults
.fallback_language
335 comment_strings
= defaults
.comment_strings
336 comment_string
= "" # set in __init__ (if empty)
337 codeindent
= defaults
.codeindent
338 header_string
= defaults
.header_string
339 code_block_marker
= defaults
.code_block_marker
340 strip
= defaults
.strip
341 strip_marker
= defaults
.strip_marker
342 state
= "" # type of current block, see `TextCodeConverter.convert`_
347 # TextCodeConverter.__init__
348 # """"""""""""""""""""""""""
350 # Initializing sets the `data` attribute, an iterable object yielding lines of
351 # the source to convert. [1]_
353 # Additional keyword arguments are stored as instance variables, overwriting
354 # the class defaults. If still empty, `comment_string` is set accordign to the
359 def __init__(self
, data
, **keyw
):
360 """data -- iterable data object
361 (list, file, generator, string, ...)
362 **keyw -- remaining keyword arguments are
363 stored as data-attributes
366 self
.__dict
__.update(keyw
)
367 if not self
.comment_string
:
368 self
.comment_string
= self
.comment_strings
[self
.language
]
370 # Pre- and postprocessing filters are set (with
371 # `TextCodeConverter.get_filter`_)::
373 self
.preprocessor
= self
.get_filter("preprocessors", self
.language
)
374 self
.postprocessor
= self
.get_filter("postprocessors", self
.language
)
376 # Finally, the regular_expression for the `code_block_marker` is compiled to
377 # find valid cases of code_block_marker in a given line and return the groups:
379 # \1 prefix, \2 code_block_marker, \3 remainder
382 marker
= self
.code_block_marker
384 self
.marker_regexp
= re
.compile('^( *(?!\.\.).*)(%s)([ \n]*)$'
387 # assume code_block_marker is a directive like '.. code-block::'
388 self
.marker_regexp
= re
.compile('^( *)(%s)(.*\n?)$' % marker
)
390 # .. [1] The most common choice of data is a `file` object with the text
393 # To convert a string into a suitable object, use its splitlines method
394 # like ``"2 lines\nof source".splitlines(True)``.
397 # TextCodeConverter.__iter__
398 # """"""""""""""""""""""""""
400 # Return an iterator for the instance. Iteration yields lines of converted
403 # The iterator is a chain of iterators acting on `self.data` that does
406 # * text<->code format conversion
409 # Pre- and postprocessing are only performed, if filters for the current
410 # language are registered in `defaults.preprocessors`_ and|or
411 # `defaults.postprocessors`_. The filters must accept an iterable as first
412 # argument and yield the processed input data linewise.
416 """Iterate over input data source and yield converted lines
418 return self
.postprocessor(self
.convert(self
.preprocessor(self
.data
)))
421 # TextCodeConverter.__call__
422 # """"""""""""""""""""""""""
423 # The special `__call__` method allows the use of class instances as callable
424 # objects. It returns the converted data as list of lines::
427 """Iterate over state-machine and return results as list of lines"""
428 return [line
for line
in self
]
431 # TextCodeConverter.__str__
432 # """""""""""""""""""""""""
433 # Return converted data as string::
436 return "".join(self())
439 # Helpers and convenience methods
440 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
442 # TextCodeConverter.convert
443 # """""""""""""""""""""""""
445 # The `convert` method generates an iterator that does the actual code <-->
446 # text format conversion. The converted data is yielded line-wise and the
447 # instance's `status` argument indicates whether the current line is "header",
448 # "documentation", or "code_block"::
450 def convert(self
, lines
):
451 """Iterate over lines of a program document and convert
452 between "text" and "code" format
455 # Initialise internal data arguments. (Done here, so that every new iteration
456 # re-initialises them.)
459 # the "type" of the currently processed block of lines. One of
461 # :"": initial state: check for header,
462 # :"header": leading code block: strip `header_string`,
463 # :"documentation": documentation part: comment out,
464 # :"code_block": literal blocks containing source code: unindent.
471 # * Do not confuse the internal attribute `_codeindent` with the configurable
472 # `codeindent` (without the leading underscore).
473 # * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
474 # first non-blank "code_block" line and stripped from all "code_block" lines
475 # in the text-to-code conversion,
476 # * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
477 # "code_block" lines in the code-to-text conversion.
484 # * set by `Text2Code.documentation_handler`_ to the minimal indent of a
485 # documentation block,
486 # * used in `Text2Code.set_state`_ to find the end of a code block.
492 # `code_block_marker_missing`
493 # If the last paragraph of a documentation block does not end with a
494 # "code_block_marker" (the literal-block marker ``::``), it must
495 # be added (otherwise, the back-conversion fails.).
497 # `code_block_marker_missing` is set by `Code2Text.documentation_handler`_
498 # and evaluated by `Code2Text.code_block_handler`_, because the
499 # documentation_handler does not know whether the next bloc will be
500 # documentation (with no need for a code_block_marker) or a code block.
504 self
.code_block_marker_missing
= False
506 # Determine the state of the block and convert with the matching "handler"::
508 for block
in collect_blocks(expandtabs_filter(lines
)):
509 self
.set_state(block
)
510 for line
in getattr(self
, self
.state
+"_handler")(block
):
514 # TextCodeConverter.get_filter
515 # """"""""""""""""""""""""""""
518 def get_filter(self
, filter_set
, language
):
519 """Return language specific filter"""
520 if self
.__class
__ == Text2Code
:
521 key
= "text2"+language
522 elif self
.__class
__ == Code2Text
:
523 key
= language
+"2text"
527 return getattr(defaults
, filter_set
)[key
]
528 except (AttributeError, KeyError):
529 # print "there is no %r filter in %r"%(key, filter_set)
531 return identity_filter
534 # TextCodeConverter.get_indent
535 # """"""""""""""""""""""""""""
536 # Return the number of leading spaces in `line`::
538 def get_indent(self
, line
):
539 """Return the indentation of `string`.
541 return len(line
) - len(line
.lstrip())
547 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
548 # Code blocks are unindented, documentation is commented (or filtered, if the
549 # ``strip`` option is True).
551 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
552 # literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
553 # treated as part of the documentation. This allows the inclusion of
559 # Mark that there is no double colon before the doctest block in the
562 # The class inherits the interface and helper functions from
563 # TextCodeConverter_ and adds functions specific to the text-to-code format
566 class Text2Code(TextCodeConverter
):
567 """Convert a (reStructured) text source to code source
570 # Text2Code.set_state
571 # ~~~~~~~~~~~~~~~~~~~~~
574 def set_state(self
, block
):
575 """Determine state of `block`. Set `self.state`
578 # `set_state` is used inside an iteration. Hence, if we are out of data, a
579 # StopItertion exception should be raised::
584 # The new state depends on the active state (from the last block) and
585 # features of the current block. It is either "header", "documentation", or
588 # If the current state is "" (first block), check for
589 # the `header_string` indicating a leading code block::
592 # print "set state for %r"%block
593 if block
[0].startswith(self
.header_string
):
594 self
.state
= "header"
596 self
.state
= "documentation"
598 # If the current state is "documentation", the next block is also
599 # documentation. The end of a documentation part is detected in the
600 # `Text2Code.documentation_handler`_::
602 # elif self.state == "documentation":
603 # self.state = "documentation"
605 # A "code_block" ends with the first less indented, nonblank line.
606 # `_textindent` is set by the documentation handler to the indent of the
607 # preceding documentation block::
609 elif self
.state
in ["code_block", "header"]:
610 indents
= [self
.get_indent(line
) for line
in block
]
611 # print "set_state:", indents, self._textindent
612 if indents
and min(indents
) <= self
._textindent
:
613 self
.state
= 'documentation'
615 self
.state
= 'code_block'
617 # TODO: (or not to do?) insert blank line before the first line with too-small
618 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
619 # split and push-back of the documentation part)?
621 # Text2Code.header_handler
622 # ~~~~~~~~~~~~~~~~~~~~~~~~
624 # Sometimes code needs to remain on the first line(s) of the document to be
625 # valid. The most common example is the "shebang" line that tells a POSIX
626 # shell how to process an executable file::
628 #!/usr/bin/env python
630 # In Python, the special comment to indicate the encoding, e.g.
631 # ``# -*- coding: iso-8859-1 -*-``, must occure before any other comment
634 # If we want to keep the line numbers in sync for text and code source, the
635 # reStructured Text markup for these header lines must start at the same line
636 # as the first header line. Therfore, header lines could not be marked as
637 # literal block (this would require the ``::`` and an empty line above the
640 # OTOH, a comment may start at the same line as the comment marker and it
641 # includes subsequent indented lines. Comments are visible in the reStructured
642 # Text source but hidden in the pretty-printed output.
644 # With a header converted to comment in the text source, everything before
645 # the first documentation block (i.e. before the first paragraph using the
646 # matching comment string) will be hidden away (in HTML or PDF output).
648 # This seems a good compromise, the advantages
650 # * line numbers are kept
651 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
652 # * greater flexibility: you can hide a repeating header in a project
653 # consisting of many source files.
655 # set off the disadvantages
657 # - it may come as surprise if a part of the file is not "printed",
658 # - one more syntax element to learn for rst newbees to start with pylit,
659 # (however, starting from the code source, this will be auto-generated)
661 # In the case that there is no matching comment at all, the complete code
662 # source will become a comment -- however, in this case it is not very likely
663 # the source is a literate document anyway.
665 # If needed for the documentation, it is possible to quote the header in (or
666 # after) the first documentation block, e.g. as `parsed literal`.
669 def header_handler(self
, lines
):
670 """Format leading code block"""
671 # strip header string from first line
672 lines
[0] = lines
[0].replace(self
.header_string
, "", 1)
673 # yield remaining lines formatted as code-block
674 for line
in self
.code_block_handler(lines
):
678 # Text2Code.documentation_handler
679 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
681 # The 'documentation' handler processes everything that is not recognized as
682 # "code_block". Documentation is quoted with `self.comment_string`
683 # (or filtered with `--strip=True`). ::
685 def documentation_handler(self
, lines
):
686 """Convert documentation blocks from text to code format
689 # Test for the end of the documentation block: does the second last line end
690 # with `::` but is neither a comment nor a directive?
692 # If end-of-documentation marker is detected,
694 # * set state to 'code_block'
695 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
696 # next "documentation" block)
697 # * do not comment the last line (the blank line separating documentation
702 endnum
= len(lines
) - 2
703 for (num
, line
) in enumerate(lines
):
705 if self
.state
== "code_block":
708 yield self
.comment_string
+ line
709 if (num
== endnum
and self
.marker_regexp
.search(line
)):
710 self
.state
= "code_block"
711 self
._textindent
= self
.get_indent(line
)
713 # TODO: Ensure a trailing blank line? Would need to test all documentation
714 # lines for end-of-documentation marker and add a line by calling the
715 # `ensure_trailing_blank_line` method (which also issues a warning)
718 # Text2Code.code_block_handler
719 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
721 # The "code_block" handler is called with an indented literal block. It
722 # removes leading whitespace up to the indentation of the first code line in
723 # the file (this deviation from docutils behaviour allows indented blocks of
726 def code_block_handler(self
, block
):
727 """Convert indented literal blocks to source code format
730 # If still unset, determine the indentation of code blocks from first non-blank
733 if self
._codeindent
== 0:
734 self
._codeindent
= self
.get_indent(block
[0])
736 # Yield unindented lines after check whether we can safely unindent. If the
737 # line is less indented then `_codeindent`, something got wrong. ::
740 if line
.lstrip() and self
.get_indent(line
) < self
._codeindent
:
741 raise ValueError, "code block contains line less indented " \
742 "than %d spaces \n%r"%(self
._codeindent
, block
)
743 yield line
.replace(" "*self
._codeindent
, "", 1)
749 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
750 # a source in "code format" (i.e. in a programming language), extracts
751 # documentation from comment blocks, and puts program code in literal blocks.
753 # The class inherits the interface and helper functions from
754 # TextCodeConverter_ and adds functions specific to the text-to-code format
757 class Code2Text(TextCodeConverter
):
758 """Convert code source to text source
761 # Code2Text.set_state
762 # ~~~~~~~~~~~~~~~~~~~
764 # Check if block is "header", "documentation", or "code_block":
766 # A paragraph is "documentation", if every non-blank line starts with a
767 # matching comment string (including whitespace except for commented blank
770 def set_state(self
, block
):
771 """Determine state of `block`."""
773 # skip documentation lines (commented, blank or blank comment)
774 if (line
.startswith(self
.comment_string
)
776 or line
.rstrip() == self
.comment_string
.rstrip()
779 # non-commented line found:
781 self
.state
= "header"
783 self
.state
= "code_block"
787 # keep state if the block is just a blank line
788 # if len(block) == 1 and self._is_blank_codeline(line):
790 self
.state
= "documentation"
793 # Code2Text.header_handler
794 # ~~~~~~~~~~~~~~~~~~~~~~~~
796 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
797 # discussion of the "header" state.) ::
799 def header_handler(self
, lines
):
800 """Format leading code block"""
801 if self
.strip
== True:
803 # get iterator over the lines that formats them as code-block
804 lines
= iter(self
.code_block_handler(lines
))
805 # prepend header string to first line
806 yield self
.header_string
+ lines
.next()
807 # yield remaining lines
811 # Code2Text.documentation_handler
812 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
814 # The *documentation state* handler converts a comment to a documentation
815 # block by stripping the leading `comment string` from every line::
817 def documentation_handler(self
, block
):
818 """Uncomment documentation blocks in source code
821 # Strip comment strings::
823 lines
= [self
.uncomment_line(line
) for line
in block
]
825 # If the code block is stripped, the literal marker would lead to an error
826 # when the text is converted with docutils. Strip it as well. Otherwise, check
827 # for the `code_block_marker` (default ``::``) at the end of the documentation
830 if self
.strip
or self
.strip_marker
:
831 self
.strip_code_block_marker(lines
)
834 self
.code_block_marker_missing
= \
835 not self
.marker_regexp
.search(lines
[-2])
836 except IndexError: # len(lines < 2), e.g. last line of document
837 self
.code_block_marker_missing
= True
844 # Code2Text.uncomment_line
845 # ~~~~~~~~~~~~~~~~~~~~~~~~
847 # Strip comment string from a documentation line and return it. Consider the
848 # case that a blank line has a comment string without trailing whitespace::
850 def uncomment_line(self
, line
):
851 """Return uncommented documentation line"""
852 stripped_comment_string
= self
.comment_string
.rstrip()
853 line
= line
.replace(self
.comment_string
, "", 1)
854 if line
.rstrip() == stripped_comment_string
:
855 line
= line
.replace(stripped_comment_string
, "", 1)
859 # Code2Text.code_block_handler
860 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
862 # The `code_block` handler returns the code block as indented literal
863 # block (or filters it, if ``self.strip == True``). The amount of the code
864 # indentation is controled by `self.codeindent` (default 2). ::
866 def code_block_handler(self
, lines
):
867 """Covert code blocks to text format (indent or strip)
869 if self
.strip
== True:
871 # eventually insert transition marker
872 if self
.code_block_marker_missing
:
873 self
.state
= "documentation"
876 self
.code_block_marker_missing
= False
877 self
.state
= "code_block"
879 yield " "*self
.codeindent
+ line
883 # Code2Text.strip_code_block_marker
884 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
886 # Replace the literal marker with the equivalent of docutils replace rules
888 # * strip `::`-line (and preceding blank line) if on a line on its own
889 # * strip `::` if it is preceded by whitespace.
890 # * convert `::` to a single colon if preceded by text
892 # `lines` should be a list of documentation lines (with a trailing blank line).
893 # It is modified in-place::
895 def strip_code_block_marker(self
, lines
):
899 return # just one line (no trailing blank line)
901 # match with regexp: `match` is None or has groups
902 # \1 leading text, \2 code_block_marker, \3 remainder
903 match
= self
.marker_regexp
.search(line
)
905 if not match
: # no code_block_marker present
907 if not match
.group(1): # `code_block_marker` on an extra line
909 # delete preceding line if it is blank
910 if len(lines
) >= 2 and not lines
[-2].lstrip():
912 elif match
.group(1).rstrip() < match
.group(1):
913 # '::' follows whitespace
914 lines
[-2] = match
.group(1).rstrip() + match
.group(3)
915 else: # '::' follows text
916 lines
[-2] = match
.group(1).rstrip() + ':' + match
.group(3)
921 # Filters allow pre- and post-processing of the data to bring it in a format
922 # suitable for the "normal" text<->code conversion. An example is conversion
923 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
924 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
925 # ``//`` comments (and back).
927 # Filters are generator functions that return an iterator acting on a
928 # `data` iterable and yielding processed `data` lines.
933 # The most basic filter is the identity filter, that returns its argument as
936 def identity_filter(data
):
937 """Return data iterator without any processing"""
943 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
945 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
946 # hard tabs can lead to errors when the indentation is changed. ::
948 def expandtabs_filter(data
):
949 """Yield data tokens with hard-tabs expanded"""
951 yield line
.expandtabs()
957 # A filter to aggregate "paragraphs" (blocks separated by blank
958 # lines). Yields lists of lines::
960 def collect_blocks(lines
):
961 """collect lines in a list
963 yield list for each paragraph, i.e. block of lines separated by a
964 blank line (whitespace only).
966 Trailing blank lines are collected as well.
968 blank_line_reached
= False
971 if blank_line_reached
and line
.rstrip():
973 blank_line_reached
= False
976 if not line
.rstrip():
977 blank_line_reached
= True
983 # dumb_c_preprocessor
984 # -------------------
986 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
987 # only converts lines that
989 # * start with "/\* " and end with " \*/" (followed by whitespace only)
991 # A more sophisticated version would also
993 # * convert multi-line comments
995 # + Keep indentation or strip 3 leading spaces?
997 # * account for nested comments
999 # * only convert comments that are separated from code by a blank line
1003 def dumb_c_preprocessor(data
):
1004 """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1005 comment_string
= defaults
.comment_strings
["c++"]
1009 if (line
.startswith(boc_string
)
1010 and line
.rstrip().endswith(eoc_string
)
1012 line
= line
.replace(boc_string
, comment_string
, 1)
1013 line
= "".join(line
.rsplit(eoc_string
, 1))
1016 # Unfortunately, the `replace` method of strings does not support negative
1017 # numbers for the `count` argument:
1019 # >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1022 # However, there is the `rsplit` method, that can be used together with `join`:
1024 # >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1027 # dumb_c_postprocessor
1028 # --------------------
1030 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1033 def dumb_c_postprocessor(data
):
1034 """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1035 comment_string
= defaults
.comment_strings
["c++"]
1039 if line
.rstrip() == comment_string
.rstrip():
1040 line
= line
.replace(comment_string
, "", 1)
1041 elif line
.startswith(comment_string
):
1042 line
= line
.replace(comment_string
, boc_string
, 1)
1043 line
= line
.rstrip() + eoc_string
+ "\n"
1052 defaults
.preprocessors
['c2text'] = dumb_c_preprocessor
1053 defaults
.preprocessors
['css2text'] = dumb_c_preprocessor
1054 defaults
.postprocessors
['text2c'] = dumb_c_postprocessor
1055 defaults
.postprocessors
['text2css'] = dumb_c_postprocessor
1061 # Using this script from the command line will convert a file according to its
1062 # extension. This default can be overridden by a couple of options.
1064 # Dual source handling
1065 # --------------------
1067 # How to determine which source is up-to-date?
1068 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1070 # - set modification date of `oufile` to the one of `infile`
1072 # Points out that the source files are 'synchronized'.
1074 # * Are there problems to expect from "backdating" a file? Which?
1076 # Looking at http://www.unix.com/showthread.php?t=20526, it seems
1077 # perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1078 # description of the "actuality" of the data in the file.
1080 # * Should this become a default or an option?
1082 # - alternatively move input file to a backup copy (with option: `--replace`)
1084 # - check modification date before overwriting
1085 # (with option: `--overwrite=update`)
1087 # - check modification date before editing (implemented as `Jed editor`_
1088 # function `pylit_check()` in `pylit.sl`_)
1090 # .. _Jed editor: http://www.jedsoft.org/jed/
1091 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1093 # Recognised Filename Extensions
1094 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1096 # Instead of defining a new extension for "pylit" literate programms,
1097 # by default ``.txt`` will be appended for the text source and stripped by
1098 # the conversion to the code source. I.e. for a Python program foo:
1100 # * the code source is called ``foo.py``
1101 # * the text source is called ``foo.py.txt``
1102 # * the html rendering is called ``foo.py.html``
1108 # The following class adds `as_dict` and `__getattr__` methods to
1109 # `optparse.Values`::
1111 class OptionValues(optparse
.Values
):
1113 # OptionValues.as_dict
1114 # ~~~~~~~~~~~~~~~~~~~~
1116 # For use as keyword arguments, it is handy to have the options in a
1117 # dictionary. `as_dict` returns a copy of the instances object dictionary::
1120 """Return options as dictionary object"""
1121 return self
.__dict
__.copy()
1123 # OptionValues.complete
1124 # ~~~~~~~~~~~~~~~~~~~~~
1128 def complete(self
, **keyw
):
1130 Complete the option values with keyword arguments.
1132 Do not overwrite existing values. Only use arguments that do not
1133 have a corresponding attribute in `self`,
1136 if not self
.__dict
__.has_key(key
):
1137 setattr(self
, key
, keyw
[key
])
1139 # OptionValues.__getattr__
1140 # ~~~~~~~~~~~~~~~~~~~~~~~~
1142 # To replace calls using ``options.ensure_value("OPTION", None)`` with the
1143 # more concise ``options.OPTION``, we define `__getattr__` [#]_ ::
1145 def __getattr__(self
, name
):
1146 """Return default value for non existing options"""
1150 # .. [#] The special method `__getattr__` is only called when an attribute
1151 # lookup has not found the attribute in the usual places (i.e. it is
1152 # not an instance attribute nor is it found in the class tree for
1159 # The `PylitOptions` class comprises an option parser and methods for parsing
1160 # and completion of command line options::
1162 class PylitOptions(object):
1163 """Storage and handling of command line options for pylit"""
1171 """Set up an `OptionParser` instance for pylit command line options
1174 p
= optparse
.OptionParser(usage
=main
.__doc
__, version
=_version
)
1176 p
.add_option("-c", "--code2txt", dest
="txt2code", action
="store_false",
1177 help="convert code source to text source")
1178 p
.add_option("-m", "--code-block-marker", dest
="code_block_marker",
1179 help="syntax token starting a code block. (default '::')")
1180 p
.add_option("--comment-string", dest
="comment_string",
1181 help="documentation block marker in code source "
1183 p
.add_option("-d", "--diff", action
="store_true",
1184 help="test for differences to existing file")
1185 p
.add_option("--doctest", action
="store_true",
1186 help="run doctest.testfile() on the text version")
1187 p
.add_option("-e", "--execute", action
="store_true",
1188 help="execute code (Python only)")
1189 p
.add_option("--language", action
="store",
1190 choices
= defaults
.languages
.values(),
1191 help="use LANGUAGE native comment style")
1192 p
.add_option("--overwrite", action
="store",
1193 choices
= ["yes", "update", "no"],
1194 help="overwrite output file (default 'update')")
1195 p
.add_option("--replace", action
="store_true",
1196 help="move infile to a backup copy (appending '~')")
1197 p
.add_option("-s", "--strip", action
="store_true",
1198 help="export by stripping documentation or code")
1199 p
.add_option("-t", "--txt2code", action
="store_true",
1200 help="convert text source to code source")
1204 # PylitOptions.parse_args
1205 # ~~~~~~~~~~~~~~~~~~~~~~~
1207 # The `parse_args` method calls the `optparse.OptionParser` on command
1208 # line or provided args and returns the result as `PylitOptions.Values`
1209 # instance. Defaults can be provided as keyword arguments::
1211 def parse_args(self
, args
=sys
.argv
[1:], **keyw
):
1212 """parse command line arguments using `optparse.OptionParser`
1214 parse_args(args, **keyw) -> OptionValues instance
1216 args -- list of command line arguments.
1217 keyw -- keyword arguments or dictionary of option defaults
1220 (values
, args
) = self
.parser
.parse_args(args
, OptionValues(keyw
))
1221 # Convert FILE and OUTFILE positional args to option values
1222 # (other positional arguments are ignored)
1224 values
.infile
= args
[0]
1225 values
.outfile
= args
[1]
1231 # PylitOptions.complete_values
1232 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1234 # Complete an OptionValues instance `values`. Use module-level defaults and
1235 # context information to set missing option values to sensible defaults (if
1238 def complete_values(self
, values
):
1239 """complete option values with module and context sensible defaults
1241 x.complete_values(values) -> values
1242 values -- OptionValues instance
1245 # Complete with module-level defaults_::
1247 values
.complete(**defaults
.__dict
__)
1249 # Ensure infile is a string::
1251 values
.ensure_value("infile", "")
1253 # Guess conversion direction from `infile` filename::
1255 if values
.txt2code
is None:
1256 in_extension
= os
.path
.splitext(values
.infile
)[1]
1257 if in_extension
in values
.text_extensions
:
1258 values
.txt2code
= True
1259 elif in_extension
in values
.languages
.keys():
1260 values
.txt2code
= False
1262 # Auto-determine the output file name::
1264 values
.ensure_value("outfile", self
._get
_outfile
_name
(values
))
1266 # Second try: Guess conversion direction from outfile filename::
1268 if values
.txt2code
is None:
1269 out_extension
= os
.path
.splitext(values
.outfile
)[1]
1270 values
.txt2code
= not (out_extension
in values
.text_extensions
)
1272 # Set the language of the code::
1274 if values
.txt2code
is True:
1275 code_extension
= os
.path
.splitext(values
.outfile
)[1]
1276 elif values
.txt2code
is False:
1277 code_extension
= os
.path
.splitext(values
.infile
)[1]
1278 values
.ensure_value("language",
1279 values
.languages
.get(code_extension
,
1280 values
.fallback_language
))
1284 # PylitOptions._get_outfile_name
1285 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1287 # Construct a matching filename for the output file. The output filename is
1288 # constructed from `infile` by the following rules:
1290 # * '-' (stdin) results in '-' (stdout)
1291 # * strip the `txt_extension` (txt2code) or
1292 # * add a `txt_ extension` (code2txt)
1293 # * fallback: if no guess can be made, add ".out"
1295 # .. TODO: use values.outfile_extension if it exists?
1299 def _get_outfile_name(self
, values
):
1300 """Return a matching output filename for `infile`
1302 # if input is stdin, default output is stdout
1303 if values
.infile
== '-':
1306 # Derive from `infile` name: strip or add text extension
1307 (base
, ext
) = os
.path
.splitext(values
.infile
)
1308 if ext
in values
.text_extensions
:
1310 if ext
in values
.languages
.keys() or values
.txt2code
== False:
1311 return values
.infile
+ values
.text_extensions
[0] # add
1313 return values
.infile
+ ".out"
1315 # PylitOptions.__call__
1316 # ~~~~~~~~~~~~~~~~~~~~~
1318 # The special `__call__` method allows to use PylitOptions instances as
1319 # *callables*: Calling an instance parses the argument list to extract option
1320 # values and completes them based on "context-sensitive defaults". Keyword
1321 # arguments are passed to `PylitOptions.parse_args`_ as default values. ::
1323 def __call__(self
, args
=sys
.argv
[1:], **keyw
):
1324 """parse and complete command line args return option values
1326 values
= self
.parse_args(args
, **keyw
)
1327 return self
.complete_values(values
)
1337 # Return file objects for in- and output. If the input path is missing,
1338 # write usage and abort. (An alternative would be to use stdin as default.
1339 # However, this leaves the uninitiated user with a non-responding application
1340 # if (s)he just tries the script without any arguments) ::
1342 def open_streams(infile
= '-', outfile
= '-', overwrite
='update', **keyw
):
1343 """Open and return the input and output stream
1345 open_streams(infile, outfile) -> (in_stream, out_stream)
1347 in_stream -- file(infile) or sys.stdin
1348 out_stream -- file(outfile) or sys.stdout
1349 overwrite -- 'yes': overwrite eventually existing `outfile`,
1350 'update': fail if the `outfile` is newer than `infile`,
1351 'no': fail if `outfile` exists.
1353 Irrelevant if `outfile` == '-'.
1356 strerror
= "Missing input file name ('-' for stdin; -h for help)"
1357 raise IOError, (2, strerror
, infile
)
1359 in_stream
= sys
.stdin
1361 in_stream
= file(infile
, 'r')
1363 out_stream
= sys
.stdout
1364 elif overwrite
== 'no' and os
.path
.exists(outfile
):
1365 raise IOError, (1, "Output file exists!", outfile
)
1366 elif overwrite
== 'update' and is_newer(outfile
, infile
):
1367 raise IOError, (1, "Output file is newer than input file!", outfile
)
1369 out_stream
= file(outfile
, 'w')
1370 return (in_stream
, out_stream
)
1377 def is_newer(path1
, path2
):
1378 """Check if `path1` is newer than `path2` (using mtime)
1380 Compare modification time of files at path1 and path2.
1382 Non-existing files are considered oldest: Return False if path1 doesnot
1383 exist and True if path2 doesnot exist.
1385 Return None for equal modification time. (This evaluates to False in a
1386 boolean context but allows a test for equality.)
1390 mtime1
= os
.path
.getmtime(path1
)
1394 mtime2
= os
.path
.getmtime(path2
)
1397 # print "mtime1", mtime1, path1, "\n", "mtime2", mtime2, path2
1399 if mtime1
== mtime2
:
1401 return mtime1
> mtime2
1407 # Get an instance of the converter state machine::
1409 def get_converter(data
, txt2code
=True, **keyw
):
1411 return Text2Code(data
, **keyw
)
1413 return Code2Text(data
, **keyw
)
1423 def run_doctest(infile
="-", txt2code
=True,
1424 globs
={}, verbose
=False, optionflags
=0, **keyw
):
1425 """run doctest on the text source
1428 # Allow imports from the current working dir by prepending an empty string to
1429 # sys.path (see doc of sys.path())::
1431 sys
.path
.insert(0, '')
1433 # Import classes from the doctest module::
1435 from doctest
import DocTestParser
, DocTestRunner
1437 # Read in source. Make sure it is in text format, as tests in comments are not
1438 # found by doctest::
1440 (data
, out_stream
) = open_streams(infile
, "-")
1441 if txt2code
is False:
1442 converter
= Code2Text(data
, **keyw
)
1443 docstring
= str(converter
)
1445 docstring
= data
.read()
1448 # Use the doctest Advanced API to run all doctests in the source text::
1450 test
= DocTestParser().get_doctest(docstring
, globs
, name
="",
1451 filename
=infile
, lineno
=0)
1452 runner
= DocTestRunner(verbose
, optionflags
)
1455 # give feedback also if no failures occured
1456 if not runner
.failures
:
1457 print "%d failures in %d tests"%(runner
.failures
, runner
.tries
)
1458 return runner
.failures
, runner
.tries
1466 def diff(infile
='-', outfile
='-', txt2code
=True, **keyw
):
1467 """Report differences between converted infile and existing outfile
1469 If outfile is '-', do a round-trip conversion and report differences
1474 instream
= file(infile
)
1475 # for diffing, we need a copy of the data as list::
1476 data
= instream
.readlines()
1478 converter
= get_converter(data
, txt2code
, **keyw
)
1482 outstream
= file(outfile
)
1483 old
= outstream
.readlines()
1485 newname
= "<conversion of %s>"%infile
1489 # back-convert the output data
1490 converter
= get_converter(new
, not txt2code
)
1492 newname
= "<round-conversion of %s>"%infile
1494 # find and print the differences
1495 is_different
= False
1496 # print type(old), old
1497 # print type(new), new
1498 delta
= difflib
.unified_diff(old
, new
,
1499 # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1500 fromfile
=oldname
, tofile
=newname
)
1504 if not is_different
:
1507 print "no differences found"
1514 # Works only for python code.
1516 # Doesnot work with `eval`, as code is not just one expression. ::
1518 def execute(infile
="-", txt2code
=True, **keyw
):
1519 """Execute the input file. Convert first, if it is a text source.
1524 data
= str(Text2Code(data
, **keyw
))
1525 # print "executing " + options.infile
1532 # If this script is called from the command line, the `main` function will
1533 # convert the input (file or stdin) between text and code formats.
1535 # Option default values for the conversion can be given as keyword arguments
1536 # to `main`_. The option defaults will be updated by command line options and
1537 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1538 # helper functions and the converter instantiation.
1540 # This allows easy customization for programmatic use -- just call `main`
1541 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1545 def main(args
=sys
.argv
[1:], **defaults
):
1546 """%prog [options] INFILE [OUTFILE]
1548 Convert between (reStructured) text source with embedded code,
1549 and code source with embedded documentation (comment blocks)
1551 The special filename '-' stands for standard in and output.
1554 # Parse and complete the options::
1556 options
= PylitOptions()(args
, **defaults
)
1557 # print "infile", repr(options.infile)
1559 # Special actions with early return::
1562 return run_doctest(**options
.as_dict())
1565 return diff(**options
.as_dict())
1568 return execute(**options
.as_dict())
1570 # Open in- and output streams::
1573 (data
, out_stream
) = open_streams(**options
.as_dict())
1575 print "IOError: %s %s" % (ex
.filename
, ex
.strerror
)
1578 # Get a converter instance::
1580 converter
= get_converter(data
, **options
.as_dict())
1582 # Convert and write to out_stream::
1584 out_stream
.write(str(converter
))
1586 if out_stream
is not sys
.stdout
:
1587 print "extract written to", out_stream
.name
1590 # If input and output are from files, set the modification time (`mtime`) of
1591 # the output file to the one of the input file to indicate that the contained
1592 # information is equal. [#]_ ::
1595 os
.utime(options
.outfile
, (os
.path
.getatime(options
.outfile
),
1596 os
.path
.getmtime(options
.infile
))
1601 ## print "mtime", os.path.getmtime(options.infile), options.infile
1602 ## print "mtime", os.path.getmtime(options.outfile), options.outfile
1605 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1606 # closed, as otherwise the change will be overwritten when `close` is
1607 # called afterwards (either explicitely or at program exit).
1610 # Rename the infile to a backup copy if ``--replace`` is set::
1613 os
.rename(options
.infile
, options
.infile
+ "~")
1616 # Run main, if called from the command line::
1618 if __name__
== '__main__':
1625 # Open questions and ideas for further development
1630 # * can we gain from using "shutils" over "os.path" and "os"?
1631 # * use pylint or pyChecker to enfoce a consistent style?
1636 # * Use templates for the "intelligent guesses" (with Python syntax for string
1637 # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1639 # * Is it sensible to offer the `header_string` option also as command line
1642 # * treatment of blank lines:
1644 # * Alternatives: Keep blank lines blank
1648 # + "if empty" (no whitespace). Comment if there is whitespace.
1650 # This would allow non-obstructing markup but unfortunately this is (in
1651 # most editors) also non-visible markup -> bad.
1653 # + "if double" (if there is more than one consecutive blank line)
1655 # + "never" (current setting)
1657 # So the setting could be something like::
1659 # defaults.keep_blank_lines = { "python": "if double",
1660 # "elisp": "always"}
1664 # ----------------------
1666 # * Ignore "matching comments" in literal strings?
1668 # Too complicated: Would need a specific detection algorithm for every
1669 # language that supports multi-line literal strings (C++, PHP, Python)
1671 # * Warn if a comment in code will become documentation after round-trip?
1674 # doctstrings in code blocks
1675 # --------------------------
1677 # * How to handle docstrings in code blocks? (it would be nice to convert them
1678 # to rst-text if ``__docformat__ == restructuredtext``)
1680 # TODO: Ask at docutils users|developers
1685 # http://docutils.sourceforge.net/
1686 # .. _indented literal block:
1687 # .. _indented literal blocks:
1688 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1689 # .. _quoted literal block:
1690 # .. _quoted literal blocks:
1691 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1692 # .. _doctest block:
1693 # .. _doctest blocks:
1694 # http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks
1695 # .. _pygments: http://pygments.org/
1696 # .. _syntax highlight: ../features/syntax-highlight.html
1697 # .. _parsed-literal blocks:
1698 # http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block