2 # -*- coding: iso-8859-1 -*-
4 # ===============================================================
5 # pylit.py: Literate programming with Python and reStructuredText
6 # ===============================================================
9 # :Copyright: 2005, 2007 Guenter Milde.
10 # Released under the terms of the GNU General Public License
22 # :2005-06-29: Initial version
23 # :2005-06-30: first literate version of the script
24 # :2005-07-01: object orientated script using generators
25 # :2005-07-10: Two state machine (later added 'header' state)
26 # :2006-12-04: Start of work on version 0.2 (code restructuring)
27 # :2007-01-23: 0.2 published at http://pylit.berlios.de
28 # :2007-01-25: 0.2.1 Outsourced non-core documentation to the PyLit pages.
29 # :2007-01-26: 0.2.2 new behaviour of `diff` function
30 # :2007-01-29: 0.2.3 new `header` methods after suggestion by Riccardo Murri
31 # :2007-01-31: 0.2.4 raise Error if code indent is too small
32 # :2007-02-05: 0.2.5 new command line option --comment-string
33 # :2007-02-09: 0.2.6 add section with open questions,
34 # Code2Text: let only blank lines (no comment str)
35 # separate text and code,
36 # fix `Code2Text.header`
37 # :2007-02-19: 0.2.7 simplify `Code2Text.header,`
38 # new `iter_strip` method replacing a lot of ``if``-s
39 # :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile
40 # :2007-02-27: 0.3 new `Code2Text` converter after an idea by Riccardo Murri
44 """pylit: Literate programming with Python and reStructuredText
46 PyLit is a bidirectional converter between
48 * a (reStructured) text source with embedded code, and
49 * a code source with embedded text blocks (comments)
52 __docformat__
= 'restructuredtext'
67 # * non-standard extensions
71 from simplestates
import SimpleStates
# generic state machine
80 # The PushIterator is a minimal implementation of an iterator with
81 # backtracking from the `Effective Python Programming`_ OSCON 2005 tutorial by
82 # Anthony Baxter. As the definition is small, it is inlined now. For the full
83 # reasoning and documentation see `iterqueue.py`_.
85 # .. _`Effective Python Programming`:
86 # http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf
88 # .. _iterqueue.py: iterqueue.py.html
92 class PushIterator(object):
93 def __init__(self
, iterable
):
94 self
.it
= iter(iterable
)
97 """Return `self`, as this is already an iterator"""
100 return (self
.cache
and self
.cache
.pop()) or self
.it
.next()
101 def push(self
, value
):
102 self
.cache
.append(value
)
107 # The converter classes implement a simple `state machine` to separate and
108 # transform text and code blocks. For this task, only a very limited parsing
109 # is needed. Using the full blown docutils_ rst parser would introduce a
110 # large overhead and slow down the conversion.
112 # PyLit's simple parser assumes:
114 # * indented literal blocks in a text source are code blocks.
116 # * comment lines that start with a matching comment string in a code source
119 # .. _docutils: http://docutils.sourceforge.net/
121 # The actual converter classes are derived from `PyLitConverter`:
122 # `Text2Code`_ converts a text source to executable code, while `Code2Text`_
123 # does the opposite: converting commented code to a text source.
125 # The `PyLitConverter` class inherits the state machine framework
126 # (initalisation, scheduler, iterator interface, ...) from `SimpleStates`,
127 # overrides the ``__init__`` method, and adds auxiliary methods and
128 # configuration attributes (options). ::
130 class PyLitConverter(SimpleStates
):
131 """parent class for `Text2Code` and `Code2Text`, the state machines
132 converting between text source and code source of a literal program.
138 # The data attributes are class default values. They will be overridden by
139 # matching keyword arguments during class instantiation.
141 # `get_converter`_ and `main`_ pass on unused keyword arguments to
142 # the instantiation of a converter class. This way, keyword arguments
143 # to these functions can be used to customize the converter.
145 # Default language and language specific defaults::
148 comment_strings
= {"python": '# ',
152 # Number of spaces to indent code blocks in the code -> text conversion.[#]_
154 # .. [#] For the text -> code conversion, the codeindent is determined by the
155 # first recognized code line (leading comment or first indented literal
156 # block of the text source).
162 # Marker string for the first code block. (Should be a valid rst directive
163 # that accepts code on the same line, e.g. ``'.. admonition::'``.) No
164 # trailing whitespace needed as indented code follows. Default is a comment
169 # Export to the output format stripping text or code blocks::
181 # Initializing sets up the `data` attribute, an iterable object yielding
182 # lines of the source to convert.[1]_ ::
184 def __init__(self
, data
, **keyw
):
185 """data -- iterable data object
186 (list, file, generator, string, ...)
187 **keyw -- all remaining keyword arguments are
188 stored as class attributes
191 # As the state handlers need backtracking, the data is wrapped in a
192 # `PushIterator`_ if it doesnot already have a `push` method::
194 if hasattr(data
, 'push'):
197 self
.data
= PushIterator(data
)
200 # Additional keyword arguments are stored as data attributes, overwriting the
203 self
.__dict
__.update(keyw
)
205 # The comment string is set to the languages comment string if not given in
206 # the keyword arguments::
208 if not hasattr(self
, "comment_string") or not self
.comment_string
:
209 self
.comment_string
= self
.comment_strings
[self
.language
]
211 # .. [1] The most common choice of data is a `file` object with the text
214 # To convert a string into a suitable object, use its splitlines method
215 # with the optional `keepends` argument set to True.
220 # Return converted data as string::
223 blocks
= ["".join(block
) for block
in self()]
224 return "".join(blocks
)
226 # Converter.get_indent
227 # ~~~~~~~~~~~~~~~~~~~~
229 # Return the number of leading spaces in `string` after expanding tabs ::
231 def get_indent(self
, string
):
232 """Return the indentation of `string`.
234 line
= string
.expandtabs()
235 return len(line
) - len(line
.lstrip())
237 # Converter.ensure_trailing_blank_line
238 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
240 # Ensure there is a blank line as last element of the list `lines`::
242 def ensure_trailing_blank_line(self
, lines
, next_line
):
245 if lines
[-1].lstrip():
246 sys
.stderr
.write("\nWarning: inserted blank line between\n %s %s"
247 %(lines
[-1], next_line
))
251 # Converter.collect_blocks
255 def collect_blocks(self
):
256 """collect lines in a list
258 return list for each block of lines (paragraph) seperated by a
259 blank line (whitespace only)
262 for line
in self
.data
:
264 if not line
.rstrip():
273 # The `Text2Code` class separates code blocks (indented literal blocks) from
274 # reStructured text. Code blocks are unindented, text is commented (or
275 # filtered, if the ``strip`` option is True.
277 # Only `indented literal blocks` are extracted. `quoted literal blocks` and
278 # `pydoc blocks` are treated as text. This allows the easy inclusion of
284 # .. [#] Mark that there is no double colon before the doctest block in
287 # The state handlers are implemented as generators. Iterating over a
288 # `Text2Code` instance initializes them to generate iterators for
289 # the respective states (see ``simplestates.py``).
293 class Text2Code(PyLitConverter
):
294 """Convert a (reStructured) text source to code source
297 # INIT: call the parent classes init method.
299 # If the `strip` argument is true, replace the `__iter_` method
300 # with a special one that drops "spurious" blocks::
302 def __init__(self
, data
, **keyw
):
303 PyLitConverter
.__init
__(self
, data
, **keyw
)
304 if getattr(self
, "strip", False):
305 self
.__iter
__ = self
.iter_strip
310 # Convert the header (leading rst comment block) to code::
313 """Convert header (comment) to code"""
314 line
= self
.data_iterator
.next()
316 # Test first line for rst comment: (We need to do this explicitely here, as
317 # the code handler will only recognize the start of a text block if a line
318 # starting with "matching comment" is preceded by an empty line. However, we
319 # have to care for the case of the first line beeing a "text line".
321 # Which variant is better?
323 # 1. starts with comment marker and has
324 # something behind the comment on the first line::
326 # if line.startswith("..") and len(line.rstrip()) > 2:
328 # 2. Convert any leading comment to code::
330 if line
.startswith(self
.header_string
):
332 # Strip leading comment string (typically added by `Code2Text.header`) and
333 # return the result of processing the data with the code handler::
335 self
.data_iterator
.push(line
.replace(self
.header_string
, "", 1))
338 # No header code found: Push back first non-header line and set state to
341 self
.data_iterator
.push(line
)
345 # Text2Code.text_handler_generator
346 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
348 # The 'text' handler processes everything that is not an indented literal
349 # comment. Text is quoted with `self.comment_string` or filtered (with
352 # It is implemented as a generator function that acts on the `data` iterator
353 # and yields text blocks.
355 # Declaration and initialization::
357 def text_handler_generator(self
):
358 """Convert text blocks from rst to comment
362 # Iterate over the data_iterator (which yields the data lines)::
364 for line
in self
.data_iterator
:
365 # print "Text: '%s'"%line
367 # Default action: add comment string and collect in `lines` list::
369 lines
.append(self
.comment_string
+ line
)
371 # Test for the end of the text block: a line that ends with `::` but is neither
372 # a comment nor a directive::
374 if (line
.rstrip().endswith("::")
375 and not line
.lstrip().startswith("..")):
377 # End of text block is detected, now:
379 # set the current text indent level (needed by the code handler to find the
380 # end of code block) and set the state to "code" (i.e. the next call of
381 # `self.next` goes to the code handler)::
383 self
._textindent
= self
.get_indent(line
)
386 # Ensure a trailing blank line (which is the paragraph separator in
387 # reStructured Text. Look at the next line, if it is blank -- OK, if it is
388 # not blank, push it back (it should be code) and add a line by calling the
389 # `ensure_trailing_blank_line` method (which also issues a warning)::
391 line
= self
.data_iterator
.next()
393 self
.data_iterator
.push(line
) # push back
394 self
.ensure_trailing_blank_line(lines
, line
)
398 # Now yield and reset the lines. (There was a function call to remove a
399 # literal marker (if on a line on itself) to shorten the comment. However,
400 # this behaviour was removed as the resulting difference in line numbers leads
401 # to misleading error messages in doctests)::
403 #remove_literal_marker(lines)
407 # End of data: if we "fall of" the iteration loop, just join and return the
413 # Text2Code.code_handler_generator
414 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416 # The `code` handler is called when a literal block marker is encounterd. It
417 # returns a code block (indented literal block), removing leading whitespace
418 # up to the indentation of the first code line in the file (this deviation
419 # from docutils behaviour allows indented blocks of Python code).
421 # As the code handler detects the switch to "text" state by looking at
422 # the line indents, it needs to push back the last probed data token. I.e.
423 # the data_iterator must support a `push` method. (This is the
424 # reason for the use of the PushIterator class in `__init__`.) ::
426 def code_handler_generator(self
):
427 """Convert indented literal blocks to source code
430 codeindent
= None # indent of first non-blank code line, set below
431 indent_string
= "" # leading whitespace chars ...
433 # Iterate over the lines in the input data::
435 for line
in self
.data_iterator
:
436 # print "Code: '%s'"%line
438 # Pass on blank lines (no test for end of code block needed|possible)::
440 if not line
.rstrip():
441 lines
.append(line
.replace(indent_string
, "", 1))
444 # Test for end of code block:
446 # A literal block ends with the first less indented, nonblank line.
447 # `self._textindent` is set by the text handler to the indent of the
448 # preceding paragraph.
450 # To prevent problems with different tabulator settings, hard tabs in code
451 # lines are expanded with the `expandtabs` string method when calculating the
452 # indentation (i.e. replaced by 8 spaces, by default).
456 if self
.get_indent(line
) <= self
._textindent
:
458 self
.data_iterator
.push(line
)
460 # append blank line (if not already present)
461 self
.ensure_trailing_blank_line(lines
, line
)
463 # reset list of lines
467 # OK, we are sure now that the current line is neither blank nor a text line.
469 # If still unset, determine the code indentation from first non-blank code
472 if codeindent
is None and line
.lstrip():
473 codeindent
= self
.get_indent(line
)
474 indent_string
= line
[:codeindent
]
476 # Append unindented line to lines cache (but check if we can safely unindent
479 if not line
.startswith(indent_string
):
480 raise ValueError, "cannot unindent line %r,\n"%line \
481 + " doesnot start with code indent string %r"%indent
_string
483 lines
.append(line
[codeindent
:])
485 # No more lines in the input data: just return what we have::
490 # Txt2Code.remove_literal_marker
491 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
493 # Remove literal marker (::) in "expanded form" i.e. in a paragraph on its own.
495 # While cleaning up the code source, it leads to confusion for doctest and
496 # searches (e.g. grep) as line-numbers between text and code source will
499 def remove_literal_marker(list):
502 if (lines
[-3].strip() == self
.comment_string
.strip()
503 and lines
[-2].strip() == self
.comment_string
+ '::'):
508 # Text2Code.iter_strip
509 # ~~~~~~~~~~~~~~~~~~~~
511 # Modification of the `simplestates.__iter__` method that will replace it when
512 # the `strip` keyword argument is `True` during class instantiation:
514 # Iterate over class instances dropping text blocks::
516 def iter_strip(self
):
517 """Generate and return an iterator dropping text blocks
519 self
.data_iterator
= self
.data
520 self
._initialize
_state
_generators
()
522 yield getattr(self
, self
.state
)()
523 getattr(self
, self
.state
)() # drop text block
530 # The `Code2Text` class does the opposite of `Text2Code`_ -- it processes
531 # valid source code, extracts comments, and puts non-commented code in literal
534 # Only lines starting with a comment string matching the one in the
535 # `comment_string` data attribute are considered text lines.
537 # The class is derived from the PyLitConverter state machine and adds handlers
538 # for the three states "header", "text", and "code". ::
540 class Code2Text(PyLitConverter
):
541 """Convert code source to text source
548 # If the last text block doesnot end with a code marker (by default, the
549 # literal-block marker ``::``), the `text` method will set `code marker` to
550 # a paragraph that will start the next code block. It is yielded if non-empty
551 # at a text-code transition. If there is no preceding text block, `code_marker`
552 # contains the `header_string`::
555 self
.code_marker
= []
557 self
.code_marker
= [self
.header_string
]
559 for block
in self
.collect_blocks():
561 # Test the state of the block, return it processed with the right handler::
563 if self
.block_is_text(block
):
566 if self
.state
!= "code" and self
.code_marker
:
567 yield self
.code_marker
569 yield getattr(self
, self
.state
)(block
)
572 # A paragraph is a text block, if every non-blank line starts with a matching
573 # comment string (test includes whitespace except for commented blank lines!)
576 def block_is_text(self
, block
):
579 and not line
.startswith(self
.comment_string
)
580 and line
.rstrip() != self
.comment_string
.rstrip()):
587 # Sometimes code needs to remain on the first line(s) of the document to be
588 # valid. The most common example is the "shebang" line that tells a POSIX
589 # shell how to process an executable file::
591 #!/usr/bin/env python
593 # In Python, the ``# -*- coding: iso-8859-1 -*-`` line must occure before any
594 # other comment or code.
596 # If we want to keep the line numbers in sync for text and code source, the
597 # reStructured Text markup for these header lines must start at the same line
598 # as the first header line. Therfore, header lines could not be marked as
599 # literal block (this would require the "::" and an empty line above the code).
601 # OTOH, a comment may start at the same line as the comment marker and it
602 # includes subsequent indented lines. Comments are visible in the reStructured
603 # Text source but hidden in the pretty-printed output.
605 # With a header converted to comment in the text source, everything before the
606 # first text block (i.e. before the first paragraph using the matching comment
607 # string) will be hidden away (in HTML or PDF output).
609 # This seems a good compromise, the advantages
611 # * line numbers are kept
612 # * the "normal" code conversion rules (indent/unindent by `codeindent` apply
613 # * greater flexibility: you can hide a repeating header in a project
614 # consisting of many source files.
616 # set off the disadvantages
618 # - it may come as surprise if a part of the file is not "printed",
619 # - one more syntax element to learn for rst newbees to start with pylit,
620 # (however, starting from the code source, this will be auto-generated)
622 # In the case that there is no matching comment at all, the complete code
623 # source will become a comment -- however, in this case it is not very likely
624 # the source is a literate document anyway.
626 # If needed for the documentation, it is possible to repeat the header in (or
627 # after) the first text block, e.g. with a `line block` in a `block quote`:
629 # | ``#!/usr/bin/env python``
630 # | ``# -*- coding: iso-8859-1 -*-``
632 # The current implementation represents the header state by the setting of
633 # `code_marker` to ``[self.header_string]``. The first non-empty text block
634 # will overwrite this setting.
637 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
639 # The *text state handler* converts a comment to a text block
640 # Strip the leading comment string::
642 def text(self
, lines
):
643 """Uncomment text blocks in source code
646 lines
= [line
.replace(self
.comment_string
, "", 1) for line
in lines
]
648 lines
= [re
.sub("^"+self
.comment_string
.rstrip(), "", line
)
652 self
.strip_literal_marker(lines
)
653 self
.code_marker
= []
655 # Check for code block marker (double colon) at the end of the text block
656 # Update the `code_marker` argument. The current `code marker` is 'prepended'
657 # to the next code block by `Code2Text.code`_ ::
660 if lines
[-2].rstrip().endswith("::"):
661 self
.code_marker
= []
663 self
.code_marker
= ["::\n", "\n"]
665 # Return the text block to the calling function::
673 # The `code` method is called on non-commented code. Code is returned as
674 # indented literal block (or filtered, if ``self.strip == True``). The amount
675 # of the code indentation is controled by `self.codeindent` (default 2).
678 def code(self
, lines
):
679 """Indent lines or strip if `strip` == `True`
681 if self
.strip
== True:
684 return [" "*self
.codeindent
+ line
for line
in lines
]
686 # Code2Text.strip_literal_marker
687 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
689 # If the code block is stripped, the literal marker would lead to an error
690 # when the text is converted with docutils. Replace it with the equivalent of
691 # docutils replace rules
693 # * strip `::`-line (and preceding blank line) if on a line on its own
694 # * strip `::` if it is preceded by whitespace.
695 # * convert `::` to a single colon if preceded by text
697 # `lines` should be list of text lines (with a trailing blank line).
698 # It is modified in-place::
700 def strip_literal_marker(self
, lines
):
703 except IndexError: # len(lines < 2)
706 # split at rightmost '::'
708 (head
, tail
) = line
.rsplit('::', 1)
709 except ValueError: # only one part (no '::')
712 # '::' on an extra line
715 # delete preceding line if it is blank
716 if len(lines
) >= 2 and not lines
[-2].lstrip():
718 # '::' follows whitespace
719 elif head
.rstrip() < head
:
721 lines
[-2] = "".join((head
, tail
))
724 lines
[-2] = ":".join((head
, tail
))
731 # Using this script from the command line will convert a file according to its
732 # extension. This default can be overridden by a couple of options.
734 # Dual source handling
735 # --------------------
737 # How to determine which source is up-to-date?
738 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
740 # - set modification date of `oufile` to the one of `infile`
742 # Points out that the source files are 'synchronized'.
744 # * Are there problems to expect from "backdating" a file? Which?
746 # Looking at http://www.unix.com/showthread.php?t=20526, it seems
747 # perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
748 # description of the "actuality" of the data in the file.
750 # * Should this become a default or an option?
752 # - alternatively move input file to a backup copy (with option: `--replace`)
754 # - check modification date before overwriting
755 # (with option: `--overwrite=update`)
757 # - check modification date before editing (implemented as `Jed editor`_
758 # function `pylit_check()` in `pylit.sl`_)
760 # .. _Jed editor: http://www.jedsoft.org/jed/
761 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
763 # Recognised Filename Extensions
764 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
766 # Finding an easy to remember, unused filename extension is not easy.
769 # a double extension (similar to .tar.gz, say) seems most appropriate
770 # (at least on UNIX). However, it fails on FAT16 filesystems.
771 # The same scheme can be used for c.txt, p.txt and the like.
774 # is recognised as extension by os.path.splitext but also fails on FAT16
777 # (PYthon Text) is used by the Python test interpreter
778 # `pytest <http:www.zetadev.com/software/pytest/>`__
781 # was even mentioned as extension for "literate Python" files in an
782 # email exchange (http://www.python.org/tim_one/000115.html) but
783 # subsequently used for Python libraries.
786 # seems to be free (as by a Google search, "lpy" is the name of a python
787 # code pretty printer but this should not pose a problem).
790 # seems to be free as well.
792 # Instead of defining a new extension for "pylit" literate programms,
793 # by default ``.txt`` will be appended for literate code and stripped by
794 # the conversion to executable code. i.e. for a program foo:
796 # * the literate source is called ``foo.py.txt``
797 # * the html rendering is called ``foo.py.html``
798 # * the python source is called ``foo.py``
805 # For use as keyword arguments, it is handy to have the options
806 # in a dictionary. The following class adds an `as_dict` method
807 # to `optparse.Values`::
809 class OptionValues(optparse
.Values
):
811 """Return options as dictionary object"""
812 return dict([(option
, getattr(self
, option
)) for option
in dir(self
)
813 if option
not in dir(OptionValues
)
814 and option
is not None
820 # Options are stored in the values attribute of the `PylitOptions` class.
821 # It is initialized with default values and parsed command line options (and
822 # arguments) This scheme allows easy customization by code importing the
825 class PylitOptions(object):
826 """Storage and handling of program options
829 # Recognized file extensions for text and code versions of the source::
831 code_languages
= {".py": "python",
834 code_extensions
= code_languages
.keys()
835 text_extensions
= [".txt"]
840 # Instantiation sets up an OptionParser and initializes it with pylit's
841 # command line options and `default_values`. It then updates the values based
842 # on command line options and sensible defaults::
844 def __init__(self
, args
=sys
.argv
[1:], **default_values
):
845 """Set up an `OptionParser` instance and parse and complete arguments
847 p
= optparse
.OptionParser(usage
=main
.__doc
__, version
="0.2")
849 p
.set_defaults(**default_values
)
851 p
.add_option("-c", "--code2txt", dest
="txt2code", action
="store_false",
852 help="convert code to reStructured text")
853 p
.add_option("--comment-string", dest
="comment_string",
854 help="text block marker (default '# ' (for Python))" )
855 p
.add_option("-d", "--diff", action
="store_true",
856 help="test for differences to existing file")
857 p
.add_option("--doctest", action
="store_true",
858 help="run doctest.testfile() on the text version")
859 p
.add_option("-e", "--execute", action
="store_true",
860 help="execute code (Python only)")
861 p
.add_option("-f", "--infile",
862 help="input file name ('-' for stdout)" )
863 p
.add_option("--overwrite", action
="store",
864 choices
= ["yes", "update", "no"],
865 help="overwrite output file (default 'update')")
866 p
.add_option("-o", "--outfile",
867 help="output file name ('-' for stdout)" )
868 p
.add_option("--replace", action
="store_true",
869 help="move infile to a backup copy (appending '~')")
870 p
.add_option("-s", "--strip", action
="store_true",
871 help="export by stripping text or code")
872 p
.add_option("-t", "--txt2code", action
="store_true",
873 help="convert reStructured text to code")
876 # parse to fill a self.Values instance
877 self
.values
= self
.parse_args(args
)
878 # complete with context-sensitive defaults
879 self
.values
= self
.complete_values(self
.values
)
884 # "Calling" an instance updates the option values based on command line
885 # arguments and default values and does a completion of the options based on
886 # "context-sensitive defaults"::
888 def __call__(self
, args
=sys
.argv
[1:], **default_values
):
889 """parse and complete command line args
891 values
= self
.parse_args(args
, **default_values
)
892 return self
.complete_values(values
)
895 # PylitOptions.parse_args
896 # ~~~~~~~~~~~~~~~~~~~~~~~
898 # The `parse_args` method calls the `optparse.OptionParser` on command
899 # line or provided args and returns the result as `PylitOptions.Values`
900 # instance. Defaults can be provided as keyword arguments::
902 def parse_args(self
, args
=sys
.argv
[1:], **default_values
):
903 """parse command line arguments using `optparse.OptionParser`
905 args -- list of command line arguments.
906 default_values -- dictionary of option defaults
909 defaults
= self
.parser
.defaults
.copy()
910 defaults
.update(default_values
)
912 (values
, args
) = self
.parser
.parse_args(args
, OptionValues(defaults
))
913 # Convert FILE and OUTFILE positional args to option values
914 # (other positional arguments are ignored)
916 values
.infile
= args
[0]
917 values
.outfile
= args
[1]
922 # PylitOptions.complete_values
923 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
925 # The `complete` method uses context information to set missing option values
926 # to sensible defaults (if possible).
930 def complete_values(self
, values
):
931 """complete option values with context sensible defaults
933 values
.ensure_value("infile", "")
934 # Guess conversion direction from infile filename
935 if values
.ensure_value("txt2code", None) is None:
936 in_extension
= os
.path
.splitext(values
.infile
)[1]
937 if in_extension
in self
.text_extensions
:
938 values
.txt2code
= True
939 elif in_extension
in self
.code_extensions
:
940 values
.txt2code
= False
941 # Auto-determine the output file name
942 values
.ensure_value("outfile", self
.get_outfile_name(values
.infile
,
944 # Guess conversion direction from outfile filename or set to default
945 if values
.txt2code
is None:
946 out_extension
= os
.path
.splitext(values
.outfile
)[1]
947 values
.txt2code
= not (out_extension
in self
.text_extensions
)
949 # Set the language of the code (default "python")
950 if values
.txt2code
is True:
951 code_extension
= os
.path
.splitext(values
.outfile
)[1]
952 elif values
.txt2code
is False:
953 code_extension
= os
.path
.splitext(values
.infile
)[1]
954 values
.ensure_value("language",
955 self
.code_languages
.get(code_extension
, "python"))
957 # Set the default overwrite mode
958 values
.ensure_value("overwrite", 'update')
962 # PylitOptions.get_outfile_name
963 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
965 # Construct a matching filename for the output file. The output filename is
966 # constructed from `infile` by the following rules:
968 # * '-' (stdin) results in '-' (stdout)
969 # * strip the `txt_extension` or add the `code_extension` (txt2code)
970 # * add a `txt_ extension` (code2txt)
971 # * fallback: if no guess can be made, add ".out"
975 def get_outfile_name(self
, infile
, txt2code
=None):
976 """Return a matching output filename for `infile`
978 # if input is stdin, default output is stdout
982 (base
, ext
) = os
.path
.splitext(infile
)
983 # TODO: should get_outfile_name() use self.values.outfile_extension
986 # strip text extension
987 if ext
in self
.text_extensions
:
989 # add (first) text extension for code files
990 if ext
in self
.code_extensions
or txt2code
== False:
991 return infile
+ self
.text_extensions
[0]
993 return infile
+ ".out"
1003 # Return file objects for in- and output. If the input path is missing,
1004 # write usage and abort. (An alternative would be to use stdin as default.
1005 # However, this leaves the uninitiated user with a non-responding application
1006 # if (s)he just tries the script without any arguments) ::
1008 def open_streams(infile
= '-', outfile
= '-', overwrite
='update', **keyw
):
1009 """Open and return the input and output stream
1011 open_streams(infile, outfile) -> (in_stream, out_stream)
1013 in_stream -- file(infile) or sys.stdin
1014 out_stream -- file(outfile) or sys.stdout
1015 overwrite -- ['yes', 'update', 'no']
1016 if 'update', only open output file if it is older than
1018 Irrelevant if outfile == '-'.
1021 strerror
= "Missing input file name ('-' for stdin; -h for help)"
1022 raise IOError, (2, strerror
, infile
)
1024 in_stream
= sys
.stdin
1026 in_stream
= file(infile
, 'r')
1028 out_stream
= sys
.stdout
1029 elif overwrite
== 'no' and os
.path
.exists(outfile
):
1030 raise IOError, (1, "Output file exists!", outfile
)
1031 elif overwrite
== 'update' and is_newer(outfile
, infile
):
1032 raise IOError, (1, "Output file is newer than input file!", outfile
)
1034 out_stream
= file(outfile
, 'w')
1035 return (in_stream
, out_stream
)
1042 def is_newer(path1
, path2
):
1043 """Check if `path1` is newer than `path2` (using mtime)
1045 Compare modification time of files at path1 and path2.
1047 Non-existing files are considered oldest: Return False if path1 doesnot
1048 exist and True if path2 doesnot exist.
1050 Return None for equal modification time. (This evaluates to False in a
1051 boolean context but allows a test for equality.)
1055 mtime1
= os
.path
.getmtime(path1
)
1059 mtime2
= os
.path
.getmtime(path2
)
1062 # print "mtime1", mtime1, path1, "\n", "mtime2", mtime2, path2
1064 if mtime1
== mtime2
:
1066 return mtime1
> mtime2
1072 # Get an instance of the converter state machine::
1074 def get_converter(data
, txt2code
=True, **keyw
):
1076 return Text2Code(data
, **keyw
)
1078 return Code2Text(data
, **keyw
)
1089 def run_doctest(infile
="-", txt2code
=True,
1090 globs
={}, verbose
=False, optionflags
=0, **keyw
):
1091 """run doctest on the text source
1093 from doctest
import DocTestParser
, DocTestRunner
1094 (data
, out_stream
) = open_streams(infile
, "-")
1096 # If source is code, convert to text, as tests in comments are not found by
1099 if txt2code
is False:
1100 converter
= Code2Text(data
, **keyw
)
1101 docstring
= str(converter
)
1103 docstring
= data
.read()
1105 # Use the doctest Advanced API to do all doctests in a given string::
1107 test
= DocTestParser().get_doctest(docstring
, globs
={}, name
="",
1108 filename
=infile
, lineno
=0)
1109 runner
= DocTestRunner(verbose
=verbose
, optionflags
=optionflags
)
1112 if not runner
.failures
:
1113 print "%d failures in %d tests"%(runner
.failures
, runner
.tries
)
1114 return runner
.failures
, runner
.tries
1122 def diff(infile
='-', outfile
='-', txt2code
=True, **keyw
):
1123 """Report differences between converted infile and existing outfile
1125 If outfile is '-', do a round-trip conversion and report differences
1130 instream
= file(infile
)
1131 # for diffing, we need a copy of the data as list::
1132 data
= instream
.readlines()
1134 converter
= get_converter(data
, txt2code
, **keyw
)
1135 new
= str(converter
).splitlines(True)
1138 outstream
= file(outfile
)
1139 old
= outstream
.readlines()
1141 newname
= "<conversion of %s>"%infile
1145 # back-convert the output data
1146 converter
= get_converter(new
, not txt2code
)
1147 new
= str(converter
).splitlines(True)
1148 newname
= "<round-conversion of %s>"%infile
1150 # find and print the differences
1151 delta
= list(difflib
.unified_diff(old
, new
, fromfile
=oldname
,
1156 print "no differences found"
1158 print "".join(delta
)
1164 # If this script is called from the command line, the `main` function will
1165 # convert the input (file or stdin) between text and code formats.
1170 # Option defaults for the conversion can be as keyword arguments to `main`_.
1171 # The option defaults will be updated by command line options and extended
1172 # with "intelligent guesses" by `PylitOptions` and passed on to helper
1173 # functions and the converter instantiation.
1175 # This allows easy customization for programmatic use -- just or call `main`
1176 # with the appropriate keyword options (or with a `option_defaults`
1177 # dictionary.), e.g.:
1179 # >>> option_defaults = {'language': "c++",
1180 # ... 'codeindent': 4,
1181 # ... 'header_string': '..admonition::'
1184 # >>> main(**option_defaults)
1188 def main(args
=sys
.argv
[1:], **option_defaults
):
1189 """%prog [options] FILE [OUTFILE]
1191 Convert between reStructured Text with embedded code, and
1192 Source code with embedded text comment blocks"""
1194 # Parse and complete the options::
1196 options
= PylitOptions(args
, **option_defaults
).values
1198 # Run doctests if ``--doctest`` option is set::
1200 if options
.ensure_value("doctest", None):
1201 return run_doctest(**options
.as_dict())
1203 # Do a round-trip and report differences if the ``--diff`` opton is set::
1205 if options
.ensure_value("diff", None):
1206 return diff(**options
.as_dict())
1208 # Open in- and output streams::
1211 (data
, out_stream
) = open_streams(**options
.as_dict())
1213 print "IOError: %s %s" % (ex
.filename
, ex
.strerror
)
1216 # Get a converter instance::
1218 converter
= get_converter(data
, **options
.as_dict())
1220 # Execute if the ``-execute`` option is set::
1222 if options
.ensure_value("execute", None):
1223 print "executing " + options
.infile
1224 if options
.txt2code
:
1225 code
= str(converter
)
1231 # Default action: Convert and write to out_stream::
1233 out_stream
.write(str(converter
))
1235 if out_stream
is not sys
.stdout
:
1236 print "extract written to", out_stream
.name
1239 # Rename the infile to a backup copy if ``--replace`` is set::
1241 if options
.ensure_value("replace", None):
1242 os
.rename(options
.infile
, options
.infile
+ "~")
1244 # If not (and input and output are from files), set the modification time
1245 # (`mtime`) of the output file to the one of the input file to indicate that
1246 # the contained information is equal.[#]_ ::
1250 os
.utime(options
.outfile
, (os
.path
.getatime(options
.outfile
),
1251 os
.path
.getmtime(options
.infile
))
1256 ## print "mtime", os.path.getmtime(options.infile), options.infile
1257 ## print "mtime", os.path.getmtime(options.outfile), options.outfile
1260 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1261 # closed, as otherwise the change will be overwritten when `close` is
1262 # called afterwards (either explicitely or at program exit).
1264 # Run main, if called from the command line::
1266 if __name__
== '__main__':
1273 # Open questions and ideas for further development
1278 # * Collect option defaults in a dictionary (on module level)
1280 # Facilitates the setting of options in programmatic use
1282 # Use templates for the "intelligent guesses" (with Python syntax for string
1283 # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1285 # * Is it sensible to offer the `header_string` option also as command line
1291 # ----------------------
1293 # * How can I include a literal block that should not be in the
1294 # executable code (e.g. an example, an earlier version or variant)?
1297 # Use a `quoted literal block` (with a quotation different from
1298 # the comment string used for text blocks to keep it as commented over the
1299 # code-text round-trips.
1301 # Python `pydoc` examples can also use the special pydoc block syntax (no
1305 # use a special "code block" directive or a special "no code
1308 # * ignore "matching comments" in literal strings?
1310 # (would need a specific detection algorithm for every language that
1311 # supports multi-line literal strings (C++, PHP, Python)
1313 # * Warn if a comment in code will become text after round-trip?
1315 # code syntax highlight
1316 # ---------------------
1318 # use `listing` package in LaTeX->PDF
1322 # * the syntax highlight support in rest2web
1323 # (uses the Moin-Moin Python colorizer, see a version at
1324 # http://www.standards-schmandards.com/2005/fangs-093/)
1325 # * Pygments (pure Python, many languages, rst integration recipe):
1326 # http://pygments.org/docs/rstdirective/
1327 # * Silvercity, enscript, ...
1329 # Some plug-ins require a special "code block" directive instead of the
1330 # `::`-literal block. TODO: make this an option
1332 # Ask at docutils users|developers
1334 # * How to handle docstrings in code blocks? (it would be nice to convert them
1335 # to rst-text if ``__docformat__ == restructuredtext``)