commit web-site sources to SVN
[pylit.git] / examples / pylit.py
blobf6951ffcf5eb57112c079211cf71e5e034f245b8
1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
4 # ===============================================================
5 # pylit.py: Literate programming with Python and reStructuredText
6 # ===============================================================
7 #
8 # :Date: 2007-01-31
9 # :Copyright: 2005, 2007 Guenter Milde.
10 # Released under the terms of the GNU General Public License
11 # (v. 2 or later)
13 # .. sectnum::
14 # .. contents::
16 # Frontmatter
17 # ===========
19 # Changelog
20 # ---------
22 # :2005-06-29: Initial version
23 # :2005-06-30: first literate version of the script
24 # :2005-07-01: object orientated script using generators
25 # :2005-07-10: Two state machine (later added 'header' state)
26 # :2006-12-04: Start of work on version 0.2 (code restructuring)
27 # :2007-01-23: 0.2 published at http://pylit.berlios.de
28 # :2007-01-25: 0.2.1 Outsourced non-core documentation to the PyLit pages.
29 # :2007-01-26: 0.2.2 new behaviour of `diff` function
30 # :2007-01-29: 0.2.3 new `header` methods after suggestion by Riccardo Murri
31 # :2007-01-31: 0.2.4 raise Error if code indent is too small
32 # :2007-02-05: 0.2.5 new command line option --comment-string
33 # :2007-02-09: 0.2.6 add section with open questions,
34 # Code2Text: let only blank lines (no comment str)
35 # separate text and code,
36 # fix `Code2Text.header`
37 # :2007-02-19: 0.2.7 simplify `Code2Text.header,`
38 # new `iter_strip` method replacing a lot of ``if``-s
39 # :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile
41 # ::
43 """pylit: Literate programming with Python and reStructuredText
45 PyLit is a bidirectional converter between
47 * a (reStructured) text source with embedded code, and
48 * a code source with embedded text blocks (comments)
49 """
51 __docformat__ = 'restructuredtext'
54 # Requirements
55 # ------------
57 # * library modules
59 # ::
61 import os
62 import sys
63 import optparse
65 # * non-standard extensions
67 # ::
69 from simplestates import SimpleStates # generic state machine
72 # Option defaults
73 # ===============
75 # Module-level option defaults for the conversion can be stored in a
76 # dictionary that is passed as keyword arguments to `main`_.
78 # `option_defaults` will be updated by command line options and extended with
79 # "intelligent guesses" by `PylitOptions` and passed on to helper functions
80 # and the converter instantiation.
82 # This allows easy customization for programmatic use -- just overwrite the
83 # defaults after importing `pylit` but before calling `main` (or call `main`
84 # with a custom `option_values` dictionary.)
86 # ::
88 option_defaults = {}
90 # Default language and language specific defaults::
92 option_defaults['language'] = "python"
93 option_defaults['comment_strings'] = {"python": '# ',
94 "slang": '% ',
95 "c++": '// '}
96 option_defaults['code_languages'] = {".py": "python",
97 ".sl": "slang",
98 ".c": "c++"}
99 option_defaults['code_extensions'] = option_defaults['code_languages'].keys()
100 option_defaults['text_extensions'] = [".txt"]
102 # Number of spaces to indent code blocks in the code -> text conversion.[#]_
104 # .. [#] For the text -> code conversion, the codeindent is determined by the
105 # first recognized code line (leading comment or first indented literal
106 # block).
108 # ::
110 option_defaults['codeindent'] = 2
112 # Marker string for the first code block. (Should be a valid rst directive
113 # that accepts code on the same line, e.g. ``'.. admonition::'``.) No
114 # trailing whitespace needed as indented code follows. ::
116 option_defaults['header_string'] = '..'
118 # Export to the output format stripping text or code blocks::
120 option_defaults['strip'] = False
122 # Execute code (Python only)")::
124 option_defaults['execute'] = False
129 # Classes
130 # =======
132 # PushIterator
133 # ------------
135 # The PushIterator is a minimal implementation of an iterator with
136 # backtracking from the `Effective Python Programming`_ OSCON 2005 tutorial by
137 # Anthony Baxter. As the definition is small, it is inlined now. For the full
138 # reasoning and documentation see `iterqueue.py`_.
140 # .. _`Effective Python Programming`:
141 # http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf
143 # .. _iterqueue.py: iterqueue.py.html
145 # ::
147 class PushIterator:
148 def __init__(self, iterable):
149 self.it = iter(iterable)
150 self.cache = []
151 def __iter__(self):
152 """Return `self`, as this is already an iterator"""
153 return self
154 def next(self):
155 return (self.cache and self.cache.pop()) or self.it.next()
156 def push(self, value):
157 self.cache.append(value)
159 # Converter
160 # ---------
162 # The converter classes implement a simple `state machine` to separate and
163 # transform text and code blocks. For this task, only a very limited parsing
164 # is needed. Using the full blown docutils_ rst parser would introduce a
165 # large overhead and slow down the conversion.
167 # PyLit's simple parser assumes:
169 # * indented literal blocks in a text source are code blocks.
171 # * comment lines that start with a matching comment string in a code source
172 # are text blocks.
174 # .. _docutils: http://docutils.sourceforge.net/
176 # The actual converter classes are derived from `PyLitConverter`:
177 # `Text2Code`_ converts a text source to executable code, while `Code2Text`_
178 # does the opposite: converting commented code to a text source.
180 # The `PyLitConverter` class inherits the state machine framework
181 # (initalisation, scheduler, iterator interface, ...) from `SimpleStates`,
182 # overrides the ``__init__`` method, and adds auxiliary methods and
183 # configuration attributes (options). ::
185 class PyLitConverter(SimpleStates):
186 """parent class for `Text2Code` and `Code2Text`, the state machines
187 converting between text source and code source of a literal program.
190 # Data attributes
191 # ~~~~~~~~~~~~~~~
193 # The data attributes are class default values. They will be overridden by
194 # matching keyword arguments during class instantiation.
196 # `get_converter`_ and `main`_ pass on unused keyword arguments to
197 # the instantiation of a converter class. This way, keyword arguments
198 # to these functions can be used to customize the converter. ::
200 language = option_defaults['language']
201 comment_strings = option_defaults['comment_strings']
202 strip = option_defaults['strip']
203 codeindent = option_defaults['codeindent']
204 header_string = option_defaults['header_string']
206 state = 'header' # initial state
208 # Instantiation
209 # ~~~~~~~~~~~~~
211 # Initializing sets up the `data` attribute, an iterable object yielding
212 # lines of the source to convert.[1]_ ::
214 def __init__(self, data, **keyw):
215 """data -- iterable data object
216 (list, file, generator, string, ...)
217 **keyw -- all remaining keyword arguments are
218 stored as class attributes
221 # As the state handlers need backtracking, the data is wrapped in a
222 # `PushIterator`_ if it doesnot already have a `push` method::
224 if hasattr(data, 'push'):
225 self.data = data
226 else:
227 self.data = PushIterator(data)
228 self._textindent = 0
230 # Additional keyword arguments are stored as data attributes, overwriting the
231 # class defaults::
233 self.__dict__.update(keyw)
235 # The comment string is set to the languages comment string if not given in
236 # the keyword arguments::
238 if not hasattr(self, "comment_string") or not self.comment_string:
239 self.comment_string = self.comment_strings[self.language]
241 # If the `strip` argument is true, replace the `__iter_` method
242 # with a special one that drops "spurious" blocks::
244 if getattr(self, "strip", False):
245 self.__iter__ = self.iter_strip
247 # .. [1] The most common choice of data is a `file` object with the text
248 # or code source.
250 # To convert a string into a suitable object, use its splitlines method
251 # with the optional `keepends` argument set to True.
253 # Converter.__str__
254 # ~~~~~~~~~~~~~~~~~
256 # Return converted data as string::
258 def __str__(self):
259 blocks = ["".join(block) for block in self()]
260 return "".join(blocks)
262 # Converter.get_indent
263 # ~~~~~~~~~~~~~~~~~~~~
265 # Return the number of leading spaces in `string` after expanding tabs ::
267 def get_indent(self, string):
268 """Return the indentation of `string`.
270 line = string.expandtabs()
271 return len(line) - len(line.lstrip())
273 # Converter.ensure_trailing_blank_line
274 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
276 # Ensure there is a blank line as last element of the list `lines`::
278 def ensure_trailing_blank_line(self, lines, next_line):
279 if not lines:
280 return
281 if lines[-1].lstrip():
282 sys.stderr.write("\nWarning: inserted blank line between\n %s %s"
283 %(lines[-1], next_line))
284 lines.append("\n")
287 # Text2Code
288 # ---------
290 # The `Text2Code` class separates code blocks (indented literal blocks) from
291 # reStructured text. Code blocks are unindented, text is commented (or
292 # filtered, if the ``strip`` option is True.
294 # Only `indented literal blocks` are extracted. `quoted literal blocks` and
295 # `pydoc blocks` are treated as text. This allows the easy inclusion of
296 # examples: [#]_
298 # >>> 23 + 3
299 # 26
301 # .. [#] Mark that there is no double colon before the doctest block in
302 # the text source.
304 # The state handlers are implemented as generators. Iterating over a
305 # `Text2Code` instance initializes them to generate iterators for
306 # the respective states (see ``simplestates.py``).
308 # ::
310 class Text2Code(PyLitConverter):
311 """Convert a (reStructured) text source to code source
314 # Text2Code.header
315 # ~~~~~~~~~~~~~~~~
317 # Convert the header (leading rst comment block) to code::
319 def header(self):
320 """Convert header (comment) to code"""
321 line = self.data_iterator.next()
323 # Test first line for rst comment: (We need to do this explicitely here, as
324 # the code handler will only recognize the start of a text block if a line
325 # starting with "matching comment" is preceded by an empty line. However, we
326 # have to care for the case of the first line beeing a "text line".
328 # Which variant is better?
330 # 1. starts with comment marker and has
331 # something behind the comment on the first line::
333 # if line.startswith("..") and len(line.rstrip()) > 2:
335 # 2. Convert any leading comment to code::
337 if line.startswith(self.header_string):
339 # Strip leading comment string (typically added by `Code2Text.header`) and
340 # return the result of processing the data with the code handler::
342 self.data_iterator.push(line.replace(self.header_string, "", 1))
343 return self.code()
345 # No header code found: Push back first non-header line and set state to
346 # "text"::
348 self.data_iterator.push(line)
349 self.state = 'text'
350 return []
352 # Text2Code.text_handler_generator
353 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
355 # The 'text' handler processes everything that is not an indented literal
356 # comment. Text is quoted with `self.comment_string` or filtered (with
357 # strip=True).
359 # It is implemented as a generator function that acts on the `data` iterator
360 # and yields text blocks.
362 # Declaration and initialization::
364 def text_handler_generator(self):
365 """Convert text blocks from rst to comment
367 lines = []
369 # Iterate over the data_iterator (which yields the data lines)::
371 for line in self.data_iterator:
372 # print "Text: '%s'"%line
374 # Default action: add comment string and collect in `lines` list::
376 lines.append(self.comment_string + line)
378 # Test for the end of the text block: a line that ends with `::` but is neither
379 # a comment nor a directive::
381 if (line.rstrip().endswith("::")
382 and not line.lstrip().startswith("..")):
384 # End of text block is detected, now:
386 # set the current text indent level (needed by the code handler to find the
387 # end of code block) and set the state to "code" (i.e. the next call of
388 # `self.next` goes to the code handler)::
390 self._textindent = self.get_indent(line)
391 self.state = 'code'
393 # Ensure a trailing blank line (which is the paragraph separator in
394 # reStructured Text. Look at the next line, if it is blank -- OK, if it is
395 # not blank, push it back (it should be code) and add a line by calling the
396 # `ensure_trailing_blank_line` method (which also issues a warning)::
398 line = self.data_iterator.next()
399 if line.lstrip():
400 self.data_iterator.push(line) # push back
401 self.ensure_trailing_blank_line(lines, line)
402 else:
403 lines.append(line)
405 # Now yield and reset the lines. (There was a function call to remove a
406 # literal marker (if on a line on itself) to shorten the comment. However,
407 # this behaviour was removed as the resulting difference in line numbers leads
408 # to misleading error messages in doctests)::
410 #remove_literal_marker(lines)
411 yield lines
412 lines = []
414 # End of data: if we "fall of" the iteration loop, just join and return the
415 # lines::
417 yield lines
420 # Text2Code.code_handler_generator
421 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
423 # The `code` handler is called when a literal block marker is encounterd. It
424 # returns a code block (indented literal block), removing leading whitespace
425 # up to the indentation of the first code line in the file (this deviation
426 # from docutils behaviour allows indented blocks of Python code).
428 # As the code handler detects the switch to "text" state by looking at
429 # the line indents, it needs to push back the last probed data token. I.e.
430 # the data_iterator must support a `push` method. (This is the
431 # reason for the use of the PushIterator class in `__init__`.) ::
433 def code_handler_generator(self):
434 """Convert indented literal blocks to source code
436 lines = []
437 codeindent = None # indent of first non-blank code line, set below
438 indent_string = "" # leading whitespace chars ...
440 # Iterate over the lines in the input data::
442 for line in self.data_iterator:
443 # print "Code: '%s'"%line
445 # Pass on blank lines (no test for end of code block needed|possible)::
447 if not line.rstrip():
448 lines.append(line.replace(indent_string, "", 1))
449 continue
451 # Test for end of code block:
453 # A literal block ends with the first less indented, nonblank line.
454 # `self._textindent` is set by the text handler to the indent of the
455 # preceding paragraph.
457 # To prevent problems with different tabulator settings, hard tabs in code
458 # lines are expanded with the `expandtabs` string method when calculating the
459 # indentation (i.e. replaced by 8 spaces, by default).
461 # ::
463 if self.get_indent(line) <= self._textindent:
464 # push back line
465 self.data_iterator.push(line)
466 self.state = 'text'
467 # append blank line (if not already present)
468 self.ensure_trailing_blank_line(lines, line)
469 yield lines
470 # reset list of lines
471 lines = []
472 continue
474 # OK, we are sure now that the current line is neither blank nor a text line.
476 # If still unset, determine the code indentation from first non-blank code
477 # line::
479 if codeindent is None and line.lstrip():
480 codeindent = self.get_indent(line)
481 indent_string = line[:codeindent]
483 # Append unindented line to lines cache (but check if we can safely unindent
484 # first)::
486 if not line.startswith(indent_string):
487 raise ValueError, "cannot unindent line %r,\n"%line \
488 + " doesnot start with code indent string %r"%indent_string
490 lines.append(line[codeindent:])
492 # No more lines in the input data: just return what we have::
494 yield lines
497 # Txt2Code.remove_literal_marker
498 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
500 # Remove literal marker (::) in "expanded form" i.e. in a paragraph on its own.
502 # While cleaning up the code source, it leads to confusion for doctest and
503 # searches (e.g. grep) as line-numbers between text and code source will
504 # differ. ::
506 def remove_literal_marker(list):
507 try:
508 # print lines[-3:]
509 if (lines[-3].strip() == self.comment_string.strip()
510 and lines[-2].strip() == self.comment_string + '::'):
511 del(lines[-3:-1])
512 except IndexError:
513 pass
515 # Text2Code.iter_strip
516 # ~~~~~~~~~~~~~~~~~~~~
518 # Modification of the `simplestates.__iter__` method that will replace it when
519 # the `strip` keyword argument is `True` during class instantiation:
521 # Iterate over class instances dropping text blocks::
523 def iter_strip(self):
524 """Generate and return an iterator dropping text blocks
526 self.data_iterator = self.data
527 self._initialize_state_generators()
528 while True:
529 yield getattr(self, self.state)()
530 getattr(self, self.state)() # drop text block
534 # Code2Text
535 # ---------
537 # The `Code2Text` class does the opposite of `Text2Code`_ -- it processes
538 # valid source code, extracts comments, and puts non-commented code in literal
539 # blocks.
541 # Only lines starting with a comment string matching the one in the
542 # `comment_string` data attribute are considered text lines.
544 # The class is derived from the PyLitConverter state machine and adds handlers
545 # for the three states "header", "text", and "code". ::
547 class Code2Text(PyLitConverter):
548 """Convert code source to text source
551 # Code2Text.header
552 # ~~~~~~~~~~~~~~~~
554 # Sometimes code needs to remain on the first line(s) of the document to be
555 # valid. The most common example is the "shebang" line that tells a POSIX
556 # shell how to process an executable file::
558 #!/usr/bin/env python
560 # In Python, the ``# -*- coding: iso-8859-1 -*-`` line must occure before any
561 # other comment or code.
563 # If we want to keep the line numbers in sync for text and code source, the
564 # reStructured Text markup for these header lines must start at the same line
565 # as the first header line. Therfore, header lines could not be marked as
566 # literal block (this would require the "::" and an empty line above the code).
568 # OTOH, a comment may start at the same line as the comment marker and it
569 # includes subsequent indented lines. Comments are visible in the reStructured
570 # Text source but hidden in the pretty-printed output.
572 # With a header converted to comment in the text source, everything before the
573 # first text block (i.e. before the first paragraph using the matching comment
574 # string) will be hidden away (in HTML or PDF output).
576 # This seems a good compromise, the advantages
578 # * line numbers are kept
579 # * the "normal" code conversion rules (indent/unindent by `codeindent` apply
580 # * greater flexibility: you can hide a repeating header in a project
581 # consisting of many source files.
583 # set off the disadvantages
585 # - it may come as surprise if a part of the file is not "printed",
586 # - one more syntax element to learn for rst newbees to start with pylit,
587 # (however, starting from the code source, this will be auto-generated)
589 # In the case that there is no matching comment at all, the complete code
590 # source will become a comment -- however, in this case it is not very likely
591 # the source is a literate document anyway.
593 # If needed for the documentation, it is possible to repeat the header in (or
594 # after) the first text block, e.g. with a `line block` in a `block quote`:
596 # | ``#!/usr/bin/env python``
597 # | ``# -*- coding: iso-8859-1 -*-``
599 # ::
601 def header(self):
602 """Convert leading code to rst comment"""
604 # Parse with the `text` method. If there is no leading text, return the
605 # `header_string` (by default the rst comment marker)::
607 lines = self.text()
608 if lines:
609 return lines
610 return [self.header_string]
613 # Code2Text.text_handler_generator
614 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
616 # The text handler converts a comment to a text block if it matches the
617 # following requirements:
619 # * every line starts with a matching comment string (test includes whitespace!)
620 # * comment is separated from code by a blank line (the paragraph separator in
621 # reStructuredText)
623 # It is implemented as a generator function that acts on the `data` iterator
624 # and yields text blocks.
626 # Text is uncommented. A literal block marker is appended, if not already
627 # present ::
629 def text_handler_generator(self):
630 """Uncomment text blocks in source code
633 # Set up an output cache and iterate over the data lines (remember, code lines
634 # are processed by the code handler and not seen here). ::
636 lines = []
637 for line in self.data_iterator:
638 # print "Text: " + line
640 # Pass on blank lines. Strip comment string from otherwise blank lines
641 # Continue with the next line, as there is no need to test blank lines
642 # for the end of text. ::
644 if not line.lstrip():
645 lines.append(line)
646 continue
648 # Test for end of text block: the first line that doesnot start with a
649 # matching comment string. This tests also whitespace that is part of the
650 # comment string! ::
652 if not line.startswith(self.comment_string):
654 # Missing whitespace in the `comment_string` is not significant for otherwise
655 # blank lines. Add the whitespace and continue::
657 if line.rstrip() == self.comment_string.rstrip():
658 lines.append(line.replace(self.comment_string.rstrip(),
659 self.comment_string, 1))
660 continue
662 # End of text block: Push back the line and let the "code" handler handle it
663 # (and subsequent lines)::
665 self.state = 'code'
666 self.data_iterator.push(line)
668 # Also restore and push back lines that precede the next code line without a
669 # blank line (paragraph separator) inbetween::
671 while lines and lines[-1].lstrip():
672 self.data_iterator.push(lines.pop())
674 # Strip the leading comment string::
676 lines = [line.replace(self.comment_string, "", 1)
677 for line in lines]
679 # Ensure literal block marker (double colon) at the end of the text block::
681 if len(lines)>1 and not lines[-2].rstrip().endswith("::"):
682 lines.extend(["::\n", "\n"])
684 # Yield the text block (process following lines with `code_handler`.
685 # When the state is again set to "text", reset the cache and continue with
686 # next text line ::
688 yield lines
689 lines = []
690 continue
692 # Test passed: It's text line. Append to the `lines` cache::
694 lines.append(line)
696 # No more lines: Just return the remaining lines::
698 yield [line.replace(self.comment_string, "", 1) for line in lines]
701 # Code2Text.code_handler_generator
702 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
704 # The `code` method is called on non-commented code. Code is returned as
705 # indented literal block (or filtered, if ``strip=True``). The amount of the
706 # code indentation is controled by `self.codeindent` (default 2).
708 # ::
710 def code_handler_generator(self):
711 """Convert source code to indented literal blocks.
713 lines = []
714 for line in self.data_iterator:
715 # yield "Code: " + line
716 # pass on empty lines (only newline)
717 if line == "\n":
718 lines.append(line)
719 continue
720 # # strip comment string from blank lines
721 # if line.rstrip() == self.comment_string.rstrip():
722 # lines.append("\n")
723 # continue
725 # Test for end of code block:
727 # * matching comment string at begin of line,
728 # * following a blank line.
730 # The test includes whitespace in `self.comment_string` normally, but ignores
731 # trailing whitespace if the line after the comment string is blank. ::
733 if (line.startswith(self.comment_string) or
734 line.rstrip() == self.comment_string.rstrip()
735 ) and lines and not lines[-1].strip():
737 self.data_iterator.push(line)
738 self.state = 'text'
739 # self.ensure_trailing_blank_line(lines, line)
740 yield lines
741 # reset
742 lines = []
743 continue
745 # default action: indent by codeindent and append to lines cache::
747 lines.append(" "*self.codeindent + line)
749 # no more lines in data_iterator -- return collected lines::
751 yield lines
754 # Code2Text.iter_strip
755 # ~~~~~~~~~~~~~~~~~~~~
757 # Modification of the `simplestates.__iter__` method that will replace it when
758 # the `strip` keyword argument is `True` during class instantiation:
760 # Iterate over class instances dropping the header block and code blocks::
762 def iter_strip(self):
763 """Generate and return an iterator dropping code|text blocks
765 self.data_iterator = self.data
766 self._initialize_state_generators()
767 textblock = self.header() # drop the header
768 if textblock != [self.header_string]:
769 self.strip_literal_marker(textblock)
770 yield textblock
771 while True:
772 getattr(self, self.state)() # drop code blocks
773 textblock = getattr(self, self.state)()
774 self.strip_literal_marker(textblock)
775 yield textblock
778 # Code2Text.strip_literal_marker
779 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
781 # If the code block is stripped, the literal marker would lead to an error
782 # when the text is converted with docutils. Replace it with the equivalent of
783 # docutils replace rules
785 # * strip `::`-line as well as preceding blank line if on a line on its own
786 # * strip `::` if it is preceded by whitespace.
787 # * convert `::` to a single colon if preceded by text
789 # `lines` should be list of text lines (with a trailing blank line).
790 # It is modified in-place::
792 def strip_literal_marker(self, lines):
793 if len(lines) < 2:
794 return
795 parts = lines[-2].rsplit('::', 1)
796 if lines[-2].strip() == '::':
797 del(lines[-2])
798 if len(lines) >= 2 and not lines[-2].lstrip():
799 del(lines[-2])
800 elif parts[0].rstrip() < parts[0]:
801 parts[0] = parts[0].rstrip()
802 lines[-2] = "".join(parts)
803 else:
804 lines[-2] = ":".join(parts)
808 # Command line use
809 # ================
811 # Using this script from the command line will convert a file according to its
812 # extension. This default can be overridden by a couple of options.
814 # Dual source handling
815 # --------------------
817 # How to determine which source is up-to-date?
818 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
820 # - set modification date of `oufile` to the one of `infile`
822 # Points out that the source files are 'synchronized'.
824 # * Are there problems to expect from "backdating" a file? Which?
826 # Looking at http://www.unix.com/showthread.php?t=20526, it seems
827 # perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
828 # description of the "actuality" of the data in the file.
830 # * Should this become a default or an option?
832 # - alternatively move input file to a backup copy (with option: `--replace`)
834 # - check modification date before overwriting
835 # (with option: `--overwrite=update`)
837 # - check modification date before editing (implemented as `Jed editor`_
838 # function `pylit_check()` in `pylit.sl`_)
840 # .. _Jed editor: http://www.jedsoft.org/jed/
841 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
843 # Recognised Filename Extensions
844 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
846 # Finding an easy to remember, unused filename extension is not easy.
848 # .py.txt
849 # a double extension (similar to .tar.gz, say) seems most appropriate
850 # (at least on UNIX). However, it fails on FAT16 filesystems.
851 # The same scheme can be used for c.txt, p.txt and the like.
853 # .pytxt
854 # is recognised as extension by os.path.splitext but also fails on FAT16
856 # .pyt
857 # (PYthon Text) is used by the Python test interpreter
858 # `pytest <http:www.zetadev.com/software/pytest/>`__
860 # .pyl
861 # was even mentioned as extension for "literate Python" files in an
862 # email exchange (http://www.python.org/tim_one/000115.html) but
863 # subsequently used for Python libraries.
865 # .lpy
866 # seems to be free (as by a Google search, "lpy" is the name of a python
867 # code pretty printer but this should not pose a problem).
869 # .tpy
870 # seems to be free as well.
872 # Instead of defining a new extension for "pylit" literate programms,
873 # by default ``.txt`` will be appended for literate code and stripped by
874 # the conversion to executable code. i.e. for a program foo:
876 # * the literate source is called ``foo.py.txt``
877 # * the html rendering is called ``foo.py.html``
878 # * the python source is called ``foo.py``
882 # OptionValues
883 # ------------
885 # For use as keyword arguments, it is handy to have the options
886 # in a dictionary. The following class adds an `as_dict` method
887 # to `optparse.Values`::
889 class OptionValues(optparse.Values):
890 def as_dict(self):
891 """Return options as dictionary object"""
892 return dict([(option, getattr(self, option)) for option in dir(self)
893 if option not in dir(OptionValues)
894 and option is not None
897 # PylitOptions
898 # ------------
900 # Options are stored in the values attribute of the `PylitOptions` class.
901 # It is initialized with default values and parsed command line options (and
902 # arguments) This scheme allows easy customization by code importing the
903 # `pylit` module. ::
905 class PylitOptions(object):
906 """Storage and handling of program options
909 # Recognized file extensions for text and code versions of the source::
911 code_languages = option_defaults['code_languages']
912 code_extensions = option_defaults['code_extensions']
913 text_extensions = option_defaults['text_extensions']
915 # Instantiation
916 # ~~~~~~~~~~~~~
918 # Instantiation sets up an OptionParser and initializes it with pylit's
919 # command line options and `default_values`. It then updates the values based
920 # on command line options and sensible defaults::
922 def __init__(self, args=sys.argv[1:], **default_values):
923 """Set up an `OptionParser` instance and parse and complete arguments
925 p = optparse.OptionParser(usage=main.__doc__, version="0.2")
926 # set defaults
927 p.set_defaults(**default_values)
928 # add the options
929 p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
930 help="convert code to reStructured text")
931 p.add_option("--comment-string", dest="comment_string",
932 help="text block marker (default '# ' (for Python))" )
933 p.add_option("-d", "--diff", action="store_true",
934 help="test for differences to existing file")
935 p.add_option("--doctest", action="store_true",
936 help="run doctest.testfile() on the text version")
937 p.add_option("-e", "--execute", action="store_true",
938 help="execute code (Python only)")
939 p.add_option("-f", "--infile",
940 help="input file name ('-' for stdout)" )
941 p.add_option("--overwrite", action="store",
942 choices = ["yes", "update", "no"],
943 help="overwrite output file (default 'update')")
944 p.add_option("-o", "--outfile",
945 help="output file name ('-' for stdout)" )
946 p.add_option("--replace", action="store_true",
947 help="move infile to a backup copy (appending '~')")
948 p.add_option("-s", "--strip", action="store_true",
949 help="export by stripping text or code")
950 p.add_option("-t", "--txt2code", action="store_true",
951 help="convert reStructured text to code")
952 self.parser = p
954 # parse to fill a self.Values instance
955 self.values = self.parse_args(args)
956 # complete with context-sensitive defaults
957 self.values = self.complete_values(self.values)
959 # Calling
960 # ~~~~~~~
962 # "Calling" an instance updates the option values based on command line
963 # arguments and default values and does a completion of the options based on
964 # "context-sensitive defaults"::
966 def __call__(self, args=sys.argv[1:], **default_values):
967 """parse and complete command line args
969 values = self.parse_args(args, **default_values)
970 return self.complete_values(values)
973 # PylitOptions.parse_args
974 # ~~~~~~~~~~~~~~~~~~~~~~~
976 # The `parse_args` method calls the `optparse.OptionParser` on command
977 # line or provided args and returns the result as `PylitOptions.Values`
978 # instance. Defaults can be provided as keyword arguments::
980 def parse_args(self, args=sys.argv[1:], **default_values):
981 """parse command line arguments using `optparse.OptionParser`
983 args -- list of command line arguments.
984 default_values -- dictionary of option defaults
986 # update defaults
987 defaults = self.parser.defaults.copy()
988 defaults.update(default_values)
989 # parse arguments
990 (values, args) = self.parser.parse_args(args, OptionValues(defaults))
991 # Convert FILE and OUTFILE positional args to option values
992 # (other positional arguments are ignored)
993 try:
994 values.infile = args[0]
995 values.outfile = args[1]
996 except IndexError:
997 pass
998 return values
1000 # PylitOptions.complete_values
1001 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1003 # The `complete` method uses context information to set missing option values
1004 # to sensible defaults (if possible).
1006 # ::
1008 def complete_values(self, values):
1009 """complete option values with context sensible defaults
1011 values.ensure_value("infile", "")
1012 # Guess conversion direction from infile filename
1013 if values.ensure_value("txt2code", None) is None:
1014 in_extension = os.path.splitext(values.infile)[1]
1015 if in_extension in self.text_extensions:
1016 values.txt2code = True
1017 elif in_extension in self.code_extensions:
1018 values.txt2code = False
1019 # Auto-determine the output file name
1020 values.ensure_value("outfile", self.get_outfile_name(values.infile,
1021 values.txt2code))
1022 # Guess conversion direction from outfile filename or set to default
1023 if values.txt2code is None:
1024 out_extension = os.path.splitext(values.outfile)[1]
1025 values.txt2code = not (out_extension in self.text_extensions)
1027 # Set the language of the code (default "python")
1028 if values.txt2code is True:
1029 code_extension = os.path.splitext(values.outfile)[1]
1030 elif values.txt2code is False:
1031 code_extension = os.path.splitext(values.infile)[1]
1032 values.ensure_value("language",
1033 self.code_languages.get(code_extension, "python"))
1035 # Set the default overwrite mode
1036 values.ensure_value("overwrite", 'update')
1038 return values
1040 # PylitOptions.get_outfile_name
1041 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1043 # Construct a matching filename for the output file. The output filename is
1044 # constructed from `infile` by the following rules:
1046 # * '-' (stdin) results in '-' (stdout)
1047 # * strip the `txt_extension` or add the `code_extension` (txt2code)
1048 # * add a `txt_ extension` (code2txt)
1049 # * fallback: if no guess can be made, add ".out"
1051 # ::
1053 def get_outfile_name(self, infile, txt2code=None):
1054 """Return a matching output filename for `infile`
1056 # if input is stdin, default output is stdout
1057 if infile == '-':
1058 return '-'
1059 # Modify `infile`
1060 (base, ext) = os.path.splitext(infile)
1061 # TODO: should get_outfile_name() use self.values.outfile_extension
1062 # if it exists?
1064 # strip text extension
1065 if ext in self.text_extensions:
1066 return base
1067 # add (first) text extension for code files
1068 if ext in self.code_extensions or txt2code == False:
1069 return infile + self.text_extensions[0]
1070 # give up
1071 return infile + ".out"
1075 # Helper functions
1076 # ----------------
1078 # open_streams
1079 # ~~~~~~~~~~~~
1081 # Return file objects for in- and output. If the input path is missing,
1082 # write usage and abort. (An alternative would be to use stdin as default.
1083 # However, this leaves the uninitiated user with a non-responding application
1084 # if (s)he just tries the script without any arguments) ::
1086 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1087 """Open and return the input and output stream
1089 open_streams(infile, outfile) -> (in_stream, out_stream)
1091 in_stream -- file(infile) or sys.stdin
1092 out_stream -- file(outfile) or sys.stdout
1093 overwrite -- ['yes', 'update', 'no']
1094 if 'update', only open output file if it is older than
1095 the input stream.
1096 Irrelevant if outfile == '-'.
1098 if not infile:
1099 strerror = "Missing input file name ('-' for stdin; -h for help)"
1100 raise IOError, (2, strerror, infile)
1101 if infile == '-':
1102 in_stream = sys.stdin
1103 else:
1104 in_stream = file(infile, 'r')
1105 if outfile == '-':
1106 out_stream = sys.stdout
1107 elif overwrite == 'no' and os.path.exists(outfile):
1108 raise IOError, (1, "Output file exists!", outfile)
1109 elif overwrite == 'update' and is_newer(outfile, infile):
1110 raise IOError, (1, "Output file is newer than input file!", outfile)
1111 else:
1112 out_stream = file(outfile, 'w')
1113 return (in_stream, out_stream)
1115 # is_newer
1116 # ~~~~~~~~
1118 # ::
1120 def is_newer(path1, path2):
1121 """Check if `path1` is newer than `path2` (using mtime)
1123 Compare modification time of files at path1 and path2.
1125 Non-existing files are considered oldest: Return False if path1 doesnot
1126 exist and True if path2 doesnot exist.
1128 Return None for equal modification time. (This evaluates to False in a
1129 boolean context but allows a test for equality.)
1132 try:
1133 mtime1 = os.path.getmtime(path1)
1134 except OSError:
1135 mtime1 = -1
1136 try:
1137 mtime2 = os.path.getmtime(path2)
1138 except OSError:
1139 mtime2 = -1
1140 # print "mtime1", mtime1, path1, "\n", "mtime2", mtime2, path2
1142 if mtime1 == mtime2:
1143 return None
1144 return mtime1 > mtime2
1147 # get_converter
1148 # ~~~~~~~~~~~~~
1150 # Get an instance of the converter state machine::
1152 def get_converter(data, txt2code=True, **keyw):
1153 if txt2code:
1154 return Text2Code(data, **keyw)
1155 else:
1156 return Code2Text(data, **keyw)
1159 # Use cases
1160 # ---------
1162 # run_doctest
1163 # ~~~~~~~~~~~
1165 # ::
1167 def run_doctest(infile="-", txt2code=True,
1168 globs={}, verbose=False, optionflags=0, **keyw):
1169 """run doctest on the text source
1171 from doctest import DocTestParser, DocTestRunner
1172 (data, out_stream) = open_streams(infile, "-")
1174 # If source is code, convert to text, as tests in comments are not found by
1175 # doctest::
1177 if txt2code is False:
1178 converter = Code2Text(data, **keyw)
1179 docstring = str(converter)
1180 else:
1181 docstring = data.read()
1183 # Use the doctest Advanced API to do all doctests in a given string::
1185 test = DocTestParser().get_doctest(docstring, globs={}, name="",
1186 filename=infile, lineno=0)
1187 runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
1188 runner.run(test)
1189 runner.summarize
1190 if not runner.failures:
1191 print "%d failures in %d tests"%(runner.failures, runner.tries)
1192 return runner.failures, runner.tries
1195 # diff
1196 # ~~~~
1198 # ::
1200 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1201 """Report differences between converted infile and existing outfile
1203 If outfile is '-', do a round-trip conversion and report differences
1206 import difflib
1208 instream = file(infile)
1209 # for diffing, we need a copy of the data as list::
1210 data = instream.readlines()
1211 # convert
1212 converter = get_converter(data, txt2code, **keyw)
1213 new = str(converter).splitlines(True)
1215 if outfile != '-':
1216 outstream = file(outfile)
1217 old = outstream.readlines()
1218 oldname = outfile
1219 newname = "<conversion of %s>"%infile
1220 else:
1221 old = data
1222 oldname = infile
1223 # back-convert the output data
1224 converter = get_converter(new, not txt2code)
1225 new = str(converter).splitlines(True)
1226 newname = "<round-conversion of %s>"%infile
1228 # find and print the differences
1229 delta = list(difflib.unified_diff(old, new, fromfile=oldname,
1230 tofile=newname))
1231 if not delta:
1232 print oldname
1233 print newname
1234 print "no differences found"
1235 return False
1236 print "".join(delta)
1237 return True
1239 # main
1240 # ----
1242 # If this script is called from the command line, the `main` function will
1243 # convert the input (file or stdin) between text and code formats::
1245 def main(args=sys.argv[1:], **option_defaults):
1246 """%prog [options] FILE [OUTFILE]
1248 Convert between reStructured Text with embedded code, and
1249 Source code with embedded text comment blocks"""
1251 # Parse and complete the options::
1253 options = PylitOptions(args, **option_defaults).values
1255 # Run doctests if ``--doctest`` option is set::
1257 if options.ensure_value("doctest", None):
1258 return run_doctest(**options.as_dict())
1260 # Do a round-trip and report differences if the ``--diff`` opton is set::
1262 if options.ensure_value("diff", None):
1263 return diff(**options.as_dict())
1265 # Open in- and output streams::
1267 try:
1268 (data, out_stream) = open_streams(**options.as_dict())
1269 except IOError, ex:
1270 print "IOError: %s %s" % (ex.filename, ex.strerror)
1271 sys.exit(ex.errno)
1273 # Get a converter instance::
1275 converter = get_converter(data, **options.as_dict())
1277 # Execute if the ``-execute`` option is set::
1279 if options.ensure_value("execute", None):
1280 print "executing " + options.infile
1281 if options.txt2code:
1282 code = str(converter)
1283 else:
1284 code = data
1285 exec code
1286 return
1288 # Default action: Convert and write to out_stream::
1290 out_stream.write(str(converter))
1292 if out_stream is not sys.stdout:
1293 print "extract written to", out_stream.name
1294 out_stream.close()
1296 # Rename the infile to a backup copy if ``--replace`` is set::
1298 if options.ensure_value("replace", None):
1299 os.rename(options.infile, options.infile + "~")
1301 # If not (and input and output are from files), set the modification time
1302 # (`mtime`) of the output file to the one of the input file to indicate that
1303 # the contained information is equal.[#]_ ::
1305 else:
1306 try:
1307 os.utime(options.outfile, (os.path.getatime(options.outfile),
1308 os.path.getmtime(options.infile))
1310 except OSError:
1311 pass
1313 ## print "mtime", os.path.getmtime(options.infile), options.infile
1314 ## print "mtime", os.path.getmtime(options.outfile), options.outfile
1317 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1318 # closed, as otherwise the change will be overwritten when `close` is
1319 # called afterwards (either explicitely or at program exit).
1321 # Run main, if called from the command line::
1323 if __name__ == '__main__':
1324 main()
1327 # Open questions
1328 # ==============
1330 # Open questions and ideas for further development
1332 # Options
1333 # -------
1335 # * Collect option defaults in a dictionary (on module level)
1337 # Facilitates the setting of options in programmatic use
1339 # Use templates for the "intelligent guesses" (with Python syntax for string
1340 # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1342 # * Is it sensible to offer the `header_string` option also as command line
1343 # option?
1345 # * Configurable
1347 # Parsing Problems
1348 # ----------------------
1350 # * How can I include a literal block that should not be in the
1351 # executable code (e.g. an example, an earlier version or variant)?
1353 # Workaround:
1354 # Use a `quoted literal block` (with a quotation different from
1355 # the comment string used for text blocks to keep it as commented over the
1356 # code-text round-trips.
1358 # Python `pydoc` examples can also use the special pydoc block syntax (no
1359 # double colon!).
1361 # Alternative:
1362 # use a special "code block" directive or a special "no code
1363 # block" directive.
1365 # * ignore "matching comments" in literal strings?
1367 # (would need a specific detection algorithm for every language that
1368 # supports multi-line literal strings (C++, PHP, Python)
1370 # * Warn if a comment in code will become text after round-trip?
1372 # code syntax highlight
1373 # ---------------------
1375 # use `listing` package in LaTeX->PDF
1377 # in html, see
1379 # * the syntax highlight support in rest2web
1380 # (uses the Moin-Moin Python colorizer, see a version at
1381 # http://www.standards-schmandards.com/2005/fangs-093/)
1382 # * Pygments (pure Python, many languages, rst integration recipe):
1383 # http://pygments.org/docs/rstdirective/
1384 # * Silvercity, enscript, ...
1386 # Some plug-ins require a special "code block" directive instead of the
1387 # `::`-literal block. TODO: make this an option
1389 # Ask at docutils users|developers
1391 # * How to handle docstrings in code blocks? (it would be nice to convert them
1392 # to rst-text if ``__docformat__ == restructuredtext``)