src/pylit.py

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3
   4 # ===============================================================
   5 # pylit.py: Literate programming with reStructuredText
   6 # ===============================================================
   7 #
   8 # :Date:      $Date$
   9 # :Version:   SVN-Revision $Revision$
  10 # :URL:       $URL$
  11 # :Copyright: 2005, 2007 Guenter Milde.
  12 #             Released under the terms of the GNU General Public License
  13 #             (v. 2 or later)
  14 #
  15 # .. sectnum::
  16 # .. contents::
  17 #
  18 # Frontmatter
  19 # ===========
  20 #
  21 # Changelog
  22 # ---------
  23 #
  24 # :2005-06-29: Initial version.
  25 # :2005-06-30: First literate version.
  26 # :2005-07-01: Object orientated script using generators.
  27 # :2005-07-10: Two state machine (later added 'header' state).
  28 # :2006-12-04: Start of work on version 0.2 (code restructuring).
  29 # :2007-01-23: 0.2   Published at http://pylit.berlios.de.
  30 # :2007-01-25: 0.2.1 Outsourced non-core documentation to the PyLit pages.
  31 # :2007-01-26: 0.2.2 New behaviour of `diff` function.
  32 # :2007-01-29: 0.2.3 New `header` methods after suggestion by Riccardo Murri.
  33 # :2007-01-31: 0.2.4 Raise Error if code indent is too small.
  34 # :2007-02-05: 0.2.5 New command line option --comment-string.
  35 # :2007-02-09: 0.2.6 Add section with open questions,
  36 #                    Code2Text: let only blank lines (no comment str)
  37 #                    separate text and code,
  38 #                    fix `Code2Text.header`.
  39 # :2007-02-19: 0.2.7 Simplify `Code2Text.header`,
  40 #                    new `iter_strip` method replacing a lot of ``if``-s.
  41 # :2007-02-22: 0.2.8 Set `mtime` of outfile to the one of infile.
  42 # :2007-02-27: 0.3   New `Code2Text` converter after an idea by Riccardo Murri,
  43 #                    explicite `option_defaults` dict for easier customization.
  44 # :2007-03-02: 0.3.1 Expand hard-tabs to prevent errors in indentation,
  45 #                    `Text2Code` now also works on blocks,
  46 #                    removed dependency on SimpleStates module.
  47 # :2007-03-06: 0.3.2 Bugfix: do not set `language` in `option_defaults`
  48 #                    renamed `code_languages` to `languages`.
  49 # :2007-03-16: 0.3.3 New language css,
  50 #                    option_defaults -> defaults = optparse.Values(),
  51 #                    simpler PylitOptions: don't store parsed values,
  52 #                    don't parse at initialization,
  53 #                    OptionValues: return `None` for non-existing attributes,
  54 #                    removed -infile and -outfile, use positional arguments.
  55 # :2007-03-19: 0.3.4 Documentation update,
  56 #                    separate `execute` function.
  57 # :2007-03-21:       Code cleanup in `Text2Code.__iter__`.
  58 # :2007-03-23: 0.3.5 Removed "css" from known languages after learning that
  59 #                    there is no C++ style "// " comment string in CSS2.
  60 # :2007-04-24: 0.3.6 Documentation update.
  61 # :2007-05-18: 0.4   Implement Converter.__iter__ as stack of iterator
  62 #                    generators. Iterating over a converter instance now
  63 #                    yields lines instead of blocks.
  64 #                    Provide "hooks" for pre- and postprocessing filters.
  65 #                    Rename states to avoid confusion with formats:
  66 #                    "text" -> "documentation", "code" -> "code_block".
  67 # :2007-05-22: 0.4.1 Converter.__iter__: cleanup and reorganization,
  68 #                    rename parent class Converter -> TextCodeConverter.
  69 # :2007-05-23: 0.4.2 Merged Text2Code.converter and Code2Text.converter into
  70 #                    TextCodeConverter.converter.
  71 # :2007-05-30: 0.4.3 Replaced use of defaults.code_extensions with
  72 #                    values.languages.keys().
  73 #                    Removed spurious `print` statement in code_block_handler.
  74 #                    Added basic support for 'c' and 'css' languages
  75 #                    with `dumb_c_preprocessor`_ and `dumb_c_postprocessor`_.
  76 # :2007-06-06: 0.5   Moved `collect_blocks`_ out of `TextCodeConverter`_,
  77 #                    bugfix: collect all trailing blank lines into a block.
  78 #                    Expand tabs with `expandtabs_filter`_.
  79 # :2007-06-20: 0.6   Configurable code-block marker (default ``::``)
  80 # :2007-06-28: 0.6.1 Bugfix: reset self.code_block_marker_missing
  81 # :2007-12-12: 0.7   prepending an empty string to sys.path in run_doctest() to
  82 #                    allow imports from the current working dir
  83 #
  84 # ::
  85
  86 """pylit: bidirectional converter between a *text source* with embedded
  87 computer code and a *code source* with embedded documentation.
  88 """
  89
  90 __docformat__ = 'restructuredtext'
  91
  92 _version = "0.5"
  93
  94
  95 # Introduction
  96 # ------------
  97 #
  98 # PyLit is a bidirectional converter between two formats of a computer
  99 # program source:
 100 #
 101 # * a (reStructured) text document with program code embedded in
 102 #   *code blocks*, and
 103 # * a compilable (or executable) code source with *documentation* embedded in
 104 #   comment blocks
 105 #
 106 #
 107 # Requirements
 108 # ------------
 109 #
 110 # ::
 111
 112 import __builtin__, os, sys
 113 import re, optparse
 114
 115 # Customisation
 116 # =============
 117 #
 118 # defaults
 119 # --------
 120 #
 121 # The `defaults` object provides a central repository for default values
 122 # and their customisation. ::
 123
 124 defaults = optparse.Values()
 125
 126 # It is used for
 127 #
 128 # * the initialization of data arguments in TextCodeConverter_ and
 129 #   PylitOptions_
 130 #
 131 # * completion of command line options in `PylitOptions.complete_values`_.
 132 #
 133 # This allows the easy creation of custom back-ends that customise the
 134 # defaults and then call main_ e.g.:
 135 #
 136 #   >>> import pylit
 137 #   >>> pylit.defaults.comment_string = "## "
 138 #   >>> pylit.defaults.codeindent = 4
 139 #   >>> #pylit.main()
 140 #
 141 # The following default values are defined in pylit.py:
 142 #
 143 # defaults.languages
 144 # ~~~~~~~~~~~~~~~~~~
 145 #
 146 # Mapping of code file extension to code language.
 147 # Used by `OptionValues.complete`_ to set the `defaults.language`.
 148 # The ``--language`` command line option or setting ``defaults.language`` in
 149 # programmatic use override this auto-setting feature. ::
 150
 151 defaults.languages  = {".py": "python",
 152                        ".sl": "slang",
 153                        ".css": "css",
 154                        ".c": "c",
 155                        ".cc": "c++"}
 156
 157
 158 # defaults.fallback_language
 159 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 160 #
 161 # Language to use, if there is no matching extension (e.g. if pylit is used as
 162 # filter) and no `language` is specified::
 163
 164 defaults.fallback_language = "python"
 165
 166 # defaults.text_extensions
 167 # ~~~~~~~~~~~~~~~~~~~~~~~~
 168 #
 169 # List of known extensions of (reStructured) text files.
 170 # Used by `OptionValues._get_outfile` to auto-determine the output filename.
 171 # ::
 172
 173 defaults.text_extensions = [".txt"]
 174
 175
 176 # defaults.comment_strings
 177 # ~~~~~~~~~~~~~~~~~~~~~~~~
 178 #
 179 # Dictionary of comment strings for known languages. Comment strings include
 180 # trailing whitespace. ::
 181
 182 defaults.comment_strings = {"python": '# ',
 183                             "slang":  '% ',
 184                             "css":    '// ',
 185                             "c":      '// ',
 186                             "c++":    '// '}
 187
 188 # Used in Code2Text_ to recognise text blocks and in Text2Code_ to format
 189 # text blocks as comments.
 190 #
 191 # defaults.header_string
 192 # ~~~~~~~~~~~~~~~~~~~~~~
 193 #
 194 # Marker string for a header code block in the text source. No trailing
 195 # whitespace needed as indented code follows.
 196 # Must be a valid rst directive that accepts code on the same line, e.g.
 197 # ``'..admonition::'``.
 198 #
 199 # Default is a comment marker::
 200
 201 defaults.header_string = '..'
 202
 203 # defaults.code_block_marker
 204 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 205 #
 206 # Marker string for a code block in the text source.
 207 #
 208 # Default is a literal-block marker::
 209
 210 defaults.code_block_marker = '::'
 211
 212 # In a document where code examples are only one of several uses of literal
 213 # blocks, it is more appropriate to single out the sourcecode with a dedicated
 214 # "code-block" directive.
 215 #
 216 # Some highlight plug-ins require a special "sourcecode" or "code-block"
 217 # directive instead of the ``::`` literal block marker. Actually,
 218 # syntax-highlight is possible without changes to docutils with the Pygments_
 219 # package using a "code-block" directive. See the `syntax highlight`_ section
 220 # in the features documentation.
 221 #
 222 # The `code_block_marker` string is used in a regular expression. Examples for
 223 # alternative forms are ``.. code-block::`` or ``.. code-block:: .* python``.
 224 # The second example can differentiate between Python code blocks and
 225 # code-blocks in other languages.
 226 #
 227 # Another use would be to mark some code-blocks inactive allowing a literate
 228 # source to contain code-blocks that should become active only in some cases.
 229 #
 230 #
 231 #
 232 # defaults.strip
 233 # ~~~~~~~~~~~~~~
 234 #
 235 # Export to the output format stripping documentation or code blocks::
 236
 237 defaults.strip = False
 238
 239 # defaults.strip_marker
 240 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 241 #
 242 # Strip literal marker from the end of documentation blocks when
 243 # converting  to code format. Makes the code more concise but looses the
 244 # synchronization of line numbers in text and code formats. Can also be used
 245 # (together with the auto-completion of the code-text conversion) to change
 246 # the `code_block_marker`::
 247
 248 defaults.strip_marker = False
 249
 250 # defaults.preprocessors
 251 # ~~~~~~~~~~~~~~~~~~~~~~
 252 #
 253 # Preprocess the data with language-specific filters_
 254 # Set below in Filters_::
 255
 256 defaults.preprocessors = {}
 257
 258 # defaults.postprocessors
 259 # ~~~~~~~~~~~~~~~~~~~~~~~
 260 #
 261 # Postprocess the data with language-specific filters_::
 262
 263 defaults.postprocessors = {}
 264
 265 # defaults.codeindent
 266 # ~~~~~~~~~~~~~~~~~~~
 267 #
 268 # Number of spaces to indent code blocks in `Code2Text.code_block_handler`_::
 269
 270 defaults.codeindent =  2
 271
 272 # In `Text2Code.code_block_handler`_, the codeindent is determined by the
 273 # first recognized code line (header or first indented literal block
 274 # of the text source).
 275 #
 276 # defaults.overwrite
 277 # ~~~~~~~~~~~~~~~~~~
 278 #
 279 # What to do if the outfile already exists? (ignored if `outfile` == '-')::
 280
 281 defaults.overwrite = 'update'
 282
 283 # Recognized values:
 284 #
 285 #  :'yes':    overwrite eventually existing `outfile`,
 286 #  :'update': fail if the `outfile` is newer than `infile`,
 287 #  :'no':     fail if `outfile` exists.
 288 #
 289 #
 290 # Extensions
 291 # ----------
 292 #
 293 # Try to import optional extensions::
 294
 295 try:
 296     import pylit_elisp
 297 except ImportError:
 298     pass
 299
 300
 301 # Converter Classes
 302 # =================
 303 #
 304 # The converter classes implement a simple state machine to separate and
 305 # transform documentation and code blocks. For this task, only a very limited
 306 # parsing is needed. PyLit's parser assumes:
 307 #
 308 # * `indented literal blocks`_ in a text source are code blocks.
 309 #
 310 # * comment blocks in a code source where every line starts with a matching
 311 #   comment string are documentation blocks.
 312 #
 313 # TextCodeConverter
 314 # -----------------
 315 # ::
 316
 317 class TextCodeConverter(object):
 318     """Parent class for the converters `Text2Code` and `Code2Text`.
 319     """
 320
 321 # The parent class defines data attributes and functions used in both
 322 # `Text2Code`_ converting a text source to executable code source, and
 323 # `Code2Text`_ converting commented code to a text source.
 324 #
 325 # Data attributes
 326 # ~~~~~~~~~~~~~~~
 327 #
 328 # Class default values are fetched from the `defaults`_ object and can be
 329 # overridden by matching keyword arguments during class instantiation. This
 330 # also works with keyword arguments to `get_converter`_ and `main`_, as these
 331 # functions pass on unused keyword args to the instantiation of a converter
 332 # class. ::
 333
 334     language = defaults.fallback_language
 335     comment_strings = defaults.comment_strings
 336     comment_string = "" # set in __init__ (if empty)
 337     codeindent =  defaults.codeindent
 338     header_string = defaults.header_string
 339     code_block_marker = defaults.code_block_marker
 340     strip = defaults.strip
 341     strip_marker = defaults.strip_marker
 342     state = "" # type of current block, see `TextCodeConverter.convert`_
 343
 344 # Interface methods
 345 # ~~~~~~~~~~~~~~~~~
 346 #
 347 # TextCodeConverter.__init__
 348 # """"""""""""""""""""""""""
 349 #
 350 # Initializing sets the `data` attribute, an iterable object yielding lines of
 351 # the source to convert. [1]_
 352 #
 353 # Additional keyword arguments are stored as instance variables, overwriting
 354 # the class defaults. If still empty, `comment_string` is set accordign to the
 355 # `language`
 356 #
 357 # ::
 358
 359     def __init__(self, data, **keyw):
 360         """data   --  iterable data object
 361                       (list, file, generator, string, ...)
 362            **keyw --  remaining keyword arguments are
 363                       stored as data-attributes
 364         """
 365         self.data = data
 366         self.__dict__.update(keyw)
 367         if not self.comment_string:
 368             self.comment_string = self.comment_strings[self.language]
 369
 370 # Pre- and postprocessing filters are set (with
 371 # `TextCodeConverter.get_filter`_)::
 372
 373         self.preprocessor = self.get_filter("preprocessors", self.language)
 374         self.postprocessor = self.get_filter("postprocessors", self.language)
 375
 376 # Finally,  the regular_expression for the `code_block_marker` is compiled to
 377 # find valid cases of code_block_marker in a given line and return the groups:
 378 #
 379 # \1 prefix, \2 code_block_marker, \3 remainder
 380 # ::
 381
 382         marker = self.code_block_marker
 383         if marker == '::':
 384             self.marker_regexp = re.compile('^( *(?!\.\.).*)(%s)([ \n]*)$'
 385                                             % marker)
 386         else:
 387             # assume code_block_marker is a directive like '.. code-block::'
 388             self.marker_regexp = re.compile('^( *)(%s)(.*\n?)$' % marker)
 389
 390 # .. [1] The most common choice of data is a `file` object with the text
 391 #        or code source.
 392 #
 393 #        To convert a string into a suitable object, use its splitlines method
 394 #        like ``"2 lines\nof source".splitlines(True)``.
 395 #
 396 #
 397 # TextCodeConverter.__iter__
 398 # """"""""""""""""""""""""""
 399 #
 400 # Return an iterator for the instance. Iteration yields lines of converted
 401 # data.
 402 #
 403 # The iterator is a chain of iterators acting on `self.data` that does
 404 #
 405 # * preprocessing
 406 # * text<->code format conversion
 407 # * postprocessing
 408 #
 409 # Pre- and postprocessing are only performed, if filters for the current
 410 # language are registered in `defaults.preprocessors`_ and|or
 411 # `defaults.postprocessors`_. The filters must accept an iterable as first
 412 # argument and yield the processed input data linewise.
 413 # ::
 414
 415     def __iter__(self):
 416         """Iterate over input data source and yield converted lines
 417         """
 418         return self.postprocessor(self.convert(self.preprocessor(self.data)))
 419
 420
 421 # TextCodeConverter.__call__
 422 # """"""""""""""""""""""""""
 423 # The special `__call__` method allows the use of class instances as callable
 424 # objects. It returns the converted data as list of lines::
 425
 426     def __call__(self):
 427         """Iterate over state-machine and return results as list of lines"""
 428         return [line for line in self]
 429
 430
 431 # TextCodeConverter.__str__
 432 # """""""""""""""""""""""""
 433 # Return converted data as string::
 434
 435     def __str__(self):
 436         return "".join(self())
 437
 438
 439 # Helpers and convenience methods
 440 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 441 #
 442 # TextCodeConverter.convert
 443 # """""""""""""""""""""""""
 444 #
 445 # The `convert` method generates an iterator that does the actual  code <-->
 446 # text format conversion. The converted data is yielded line-wise and the
 447 # instance's `status` argument indicates whether the current line is "header",
 448 # "documentation", or "code_block"::
 449
 450     def convert(self, lines):
 451         """Iterate over lines of a program document and convert
 452         between "text" and "code" format
 453         """
 454
 455 # Initialise internal data arguments. (Done here, so that every new iteration
 456 # re-initialises them.)
 457 #
 458 # `state`
 459 #   the "type" of the currently processed block of lines. One of
 460 #
 461 #   :"":              initial state: check for header,
 462 #   :"header":        leading code block: strip `header_string`,
 463 #   :"documentation": documentation part: comment out,
 464 #   :"code_block":    literal blocks containing source code: unindent.
 465 #
 466 # ::
 467
 468         self.state = ""
 469
 470 # `_codeindent`
 471 #   * Do not confuse the internal attribute `_codeindent` with the configurable
 472 #     `codeindent` (without the leading underscore).
 473 #   * `_codeindent` is set in `Text2Code.code_block_handler`_ to the indent of
 474 #     first non-blank "code_block" line and stripped from all "code_block" lines
 475 #     in the text-to-code conversion,
 476 #   * `codeindent` is set in `__init__` to `defaults.codeindent`_ and added to
 477 #     "code_block" lines in the code-to-text conversion.
 478 #
 479 # ::
 480
 481         self._codeindent = 0
 482
 483 # `_textindent`
 484 #   * set by `Text2Code.documentation_handler`_ to the minimal indent of a
 485 #     documentation block,
 486 #   * used in `Text2Code.set_state`_ to find the end of a code block.
 487 #
 488 # ::
 489
 490         self._textindent = 0
 491
 492 # `code_block_marker_missing`
 493 #   If the last paragraph of a documentation block does not end with a
 494 #   "code_block_marker" (the literal-block marker ``::``), it must
 495 #   be added (otherwise, the back-conversion fails.).
 496 #
 497 #   `code_block_marker_missing` is set by `Code2Text.documentation_handler`_
 498 #   and evaluated by `Code2Text.code_block_handler`_, because the
 499 #   documentation_handler does not know whether the next bloc will be
 500 #   documentation (with no need for a code_block_marker) or a code block.
 501 #
 502 # ::
 503
 504         self.code_block_marker_missing = False
 505
 506 # Determine the state of the block and convert with the matching "handler"::
 507
 508         for block in collect_blocks(expandtabs_filter(lines)):
 509             self.set_state(block)
 510             for line in getattr(self, self.state+"_handler")(block):
 511                 yield line
 512
 513
 514 # TextCodeConverter.get_filter
 515 # """"""""""""""""""""""""""""
 516 # ::
 517
 518     def get_filter(self, filter_set, language):
 519         """Return language specific filter"""
 520         if self.__class__ == Text2Code:
 521             key = "text2"+language
 522         elif self.__class__ == Code2Text:
 523             key = language+"2text"
 524         else:
 525             key = ""
 526         try:
 527             return getattr(defaults, filter_set)[key]
 528         except (AttributeError, KeyError):
 529             # print "there is no %r filter in %r"%(key, filter_set)
 530             pass
 531         return identity_filter
 532
 533
 534 # TextCodeConverter.get_indent
 535 # """"""""""""""""""""""""""""
 536 # Return the number of leading spaces in `line`::
 537
 538     def get_indent(self, line):
 539         """Return the indentation of `string`.
 540         """
 541         return len(line) - len(line.lstrip())
 542
 543
 544 # Text2Code
 545 # ---------
 546 #
 547 # The `Text2Code` converter separates *code-blocks* [#]_ from *documentation*.
 548 # Code blocks are unindented, documentation is commented (or filtered, if the
 549 # ``strip`` option is True).
 550 #
 551 # .. [#] Only `indented literal blocks`_ are considered code-blocks. `quoted
 552 #        literal blocks`_, `parsed-literal blocks`_, and `doctest blocks`_ are
 553 #        treated as part of the documentation. This allows the inclusion of
 554 #        examples:
 555 #
 556 #           >>> 23 + 3
 557 #           26
 558 #
 559 #        Mark that there is no double colon before the doctest block in the
 560 #        text source.
 561 #
 562 # The class inherits the interface and helper functions from
 563 # TextCodeConverter_ and adds functions specific to the text-to-code format
 564 # conversion::
 565
 566 class Text2Code(TextCodeConverter):
 567     """Convert a (reStructured) text source to code source
 568     """
 569
 570 # Text2Code.set_state
 571 # ~~~~~~~~~~~~~~~~~~~~~
 572 # ::
 573
 574     def set_state(self, block):
 575         """Determine state of `block`. Set `self.state`
 576         """
 577
 578 # `set_state` is used inside an iteration. Hence, if we are out of data, a
 579 # StopItertion exception should be raised::
 580
 581         if not block:
 582             raise StopIteration
 583
 584 # The new state depends on the active state (from the last block) and
 585 # features of the current block. It is either "header", "documentation", or
 586 # "code_block".
 587 #
 588 # If the current state is "" (first block), check for
 589 # the  `header_string` indicating a leading code block::
 590
 591         if self.state == "":
 592             # print "set state for %r"%block
 593             if block[0].startswith(self.header_string):
 594                 self.state = "header"
 595             else:
 596                 self.state = "documentation"
 597
 598 # If the current state is "documentation", the next block is also
 599 # documentation. The end of a documentation part is detected in the
 600 # `Text2Code.documentation_handler`_::
 601
 602         # elif self.state == "documentation":
 603         #    self.state = "documentation"
 604
 605 # A "code_block" ends with the first less indented, nonblank line.
 606 # `_textindent` is set by the documentation handler to the indent of the
 607 # preceding documentation block::
 608
 609         elif self.state in ["code_block", "header"]:
 610             indents = [self.get_indent(line) for line in block]
 611             # print "set_state:", indents, self._textindent
 612             if indents and min(indents) <= self._textindent:
 613                 self.state = 'documentation'
 614             else:
 615                 self.state = 'code_block'
 616
 617 # TODO: (or not to do?) insert blank line before the first line with too-small
 618 # codeindent using self.ensure_trailing_blank_line(lines, line) (would need
 619 # split and push-back of the documentation part)?
 620 #
 621 # Text2Code.header_handler
 622 # ~~~~~~~~~~~~~~~~~~~~~~~~
 623 #
 624 # Sometimes code needs to remain on the first line(s) of the document to be
 625 # valid. The most common example is the "shebang" line that tells a POSIX
 626 # shell how to process an executable file::
 627
 628 #!/usr/bin/env python
 629
 630 # In Python, the special comment to indicate the encoding, e.g.
 631 # ``# -*- coding: iso-8859-1 -*-``, must occure before any other comment
 632 # or code too.
 633 #
 634 # If we want to keep the line numbers in sync for text and code source, the
 635 # reStructured Text markup for these header lines must start at the same line
 636 # as the first header line. Therfore, header lines could not be marked as
 637 # literal block (this would require the ``::`` and an empty line above the
 638 # code_block).
 639 #
 640 # OTOH, a comment may start at the same line as the comment marker and it
 641 # includes subsequent indented lines. Comments are visible in the reStructured
 642 # Text source but hidden in the pretty-printed output.
 643 #
 644 # With a header converted to comment in the text source, everything before
 645 # the first documentation block (i.e. before the first paragraph using the
 646 # matching comment string) will be hidden away (in HTML or PDF output).
 647 #
 648 # This seems a good compromise, the advantages
 649 #
 650 # * line numbers are kept
 651 # * the "normal" code_block conversion rules (indent/unindent by `codeindent` apply
 652 # * greater flexibility: you can hide a repeating header in a project
 653 #   consisting of many source files.
 654 #
 655 # set off the disadvantages
 656 #
 657 # - it may come as surprise if a part of the file is not "printed",
 658 # - one more syntax element to learn for rst newbees to start with pylit,
 659 #   (however, starting from the code source, this will be auto-generated)
 660 #
 661 # In the case that there is no matching comment at all, the complete code
 662 # source will become a comment -- however, in this case it is not very likely
 663 # the source is a literate document anyway.
 664 #
 665 # If needed for the documentation, it is possible to quote the header in (or
 666 # after) the first documentation block, e.g. as `parsed literal`.
 667 # ::
 668
 669     def header_handler(self, lines):
 670         """Format leading code block"""
 671         # strip header string from first line
 672         lines[0] = lines[0].replace(self.header_string, "", 1)
 673         # yield remaining lines formatted as code-block
 674         for line in self.code_block_handler(lines):
 675             yield line
 676
 677
 678 # Text2Code.documentation_handler
 679 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 680 #
 681 # The 'documentation' handler processes everything that is not recognized as
 682 # "code_block". Documentation is quoted with `self.comment_string`
 683 # (or filtered with `--strip=True`). ::
 684
 685     def documentation_handler(self, lines):
 686         """Convert documentation blocks from text to code format
 687         """
 688
 689 # Test for the end of the documentation block: does the second last line end
 690 # with `::` but is neither a comment nor a directive?
 691 #
 692 # If end-of-documentation marker is detected,
 693 #
 694 # * set state to 'code_block'
 695 # * set `self._textindent` (needed by `Text2Code.set_state`_ to find the
 696 #   next "documentation" block)
 697 # * do not comment the last line (the blank line separating documentation
 698 #   and code blocks).
 699 #
 700 # ::
 701
 702         endnum = len(lines) - 2
 703         for (num, line) in enumerate(lines):
 704             if not self.strip:
 705                 if self.state == "code_block":
 706                     yield line
 707                 else:
 708                     yield self.comment_string + line
 709             if (num == endnum and self.marker_regexp.search(line)):
 710                 self.state = "code_block"
 711                 self._textindent = self.get_indent(line)
 712
 713 # TODO: Ensure a trailing blank line? Would need to test all documentation
 714 # lines for end-of-documentation marker and add a line by calling the
 715 # `ensure_trailing_blank_line` method (which also issues a warning)
 716 #
 717 #
 718 # Text2Code.code_block_handler
 719 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 720 #
 721 # The "code_block" handler is called with an indented literal block. It
 722 # removes leading whitespace up to the indentation of the first code line in
 723 # the file (this deviation from docutils behaviour allows indented blocks of
 724 # Python code). ::
 725
 726     def code_block_handler(self, block):
 727         """Convert indented literal blocks to source code format
 728         """
 729
 730 # If still unset, determine the indentation of code blocks from first non-blank
 731 # code line::
 732
 733         if self._codeindent == 0:
 734             self._codeindent = self.get_indent(block[0])
 735
 736 # Yield unindented lines after check whether we can safely unindent. If the
 737 # line is less indented then `_codeindent`, something got wrong. ::
 738
 739         for line in block:
 740             if line.lstrip() and self.get_indent(line) < self._codeindent:
 741                 raise ValueError, "code block contains line less indented " \
 742                       "than %d spaces \n%r"%(self._codeindent, block)
 743             yield line.replace(" "*self._codeindent, "", 1)
 744
 745
 746 # Code2Text
 747 # ---------
 748 #
 749 # The `Code2Text` converter does the opposite of `Text2Code`_ -- it processes
 750 # a source in "code format" (i.e. in a programming language), extracts
 751 # documentation from comment blocks, and puts program code in literal blocks.
 752 #
 753 # The class inherits the interface and helper functions from
 754 # TextCodeConverter_ and adds functions specific to the text-to-code  format
 755 # conversion::
 756
 757 class Code2Text(TextCodeConverter):
 758     """Convert code source to text source
 759     """
 760
 761 # Code2Text.set_state
 762 # ~~~~~~~~~~~~~~~~~~~
 763 #
 764 # Check if block is "header", "documentation", or "code_block":
 765 #
 766 # A paragraph is "documentation", if every non-blank line starts with a
 767 # matching comment string (including whitespace except for commented blank
 768 # lines) ::
 769
 770     def set_state(self, block):
 771         """Determine state of `block`."""
 772         for line in block:
 773             # skip documentation lines (commented, blank or blank comment)
 774             if (line.startswith(self.comment_string)
 775                 or not line.rstrip()
 776                 or line.rstrip() == self.comment_string.rstrip()
 777                ):
 778                 continue
 779             # non-commented line found:
 780             if self.state == "":
 781                 self.state = "header"
 782             else:
 783                 self.state = "code_block"
 784             break
 785         else:
 786             # no code line found
 787             # keep state if the block is just a blank line
 788             # if len(block) == 1 and self._is_blank_codeline(line):
 789             #     return
 790             self.state = "documentation"
 791
 792
 793 # Code2Text.header_handler
 794 # ~~~~~~~~~~~~~~~~~~~~~~~~
 795 #
 796 # Handle a leading code block. (See `Text2Code.header_handler`_ for a
 797 # discussion of the "header" state.) ::
 798
 799     def header_handler(self, lines):
 800         """Format leading code block"""
 801         if self.strip == True:
 802             return
 803         # get iterator over the lines that formats them as code-block
 804         lines = iter(self.code_block_handler(lines))
 805         # prepend header string to first line
 806         yield self.header_string + lines.next()
 807         # yield remaining lines
 808         for line in lines:
 809             yield line
 810
 811 # Code2Text.documentation_handler
 812 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 813 #
 814 # The *documentation state* handler converts a comment to a documentation
 815 # block by stripping the leading `comment string` from every line::
 816
 817     def documentation_handler(self, block):
 818         """Uncomment documentation blocks in source code
 819         """
 820
 821 # Strip comment strings::
 822
 823         lines = [self.uncomment_line(line) for line in block]
 824
 825 # If the code block is stripped, the literal marker would lead to an error
 826 # when the text is converted with docutils. Strip it as well. Otherwise, check
 827 # for the `code_block_marker` (default ``::``) at the end of the documentation
 828 # block::
 829
 830         if self.strip or self.strip_marker:
 831             self.strip_code_block_marker(lines)
 832         else:
 833             try:
 834                 self.code_block_marker_missing = \
 835                     not self.marker_regexp.search(lines[-2])
 836             except IndexError:  # len(lines < 2), e.g. last line of document
 837                 self.code_block_marker_missing = True
 838
 839 # Yield lines::
 840
 841         for line in lines:
 842             yield line
 843
 844 # Code2Text.uncomment_line
 845 # ~~~~~~~~~~~~~~~~~~~~~~~~
 846 #
 847 # Strip comment string from a documentation line and return it. Consider the
 848 # case that a blank line has a comment string without trailing whitespace::
 849
 850     def uncomment_line(self, line):
 851         """Return uncommented documentation line"""
 852         stripped_comment_string = self.comment_string.rstrip()
 853         line = line.replace(self.comment_string, "", 1)
 854         if line.rstrip() == stripped_comment_string:
 855             line = line.replace(stripped_comment_string, "", 1)
 856         return line
 857
 858
 859 # Code2Text.code_block_handler
 860 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 861 #
 862 # The `code_block` handler returns the code block as indented literal
 863 # block (or filters it, if ``self.strip == True``). The amount of the code
 864 # indentation is controled by `self.codeindent` (default 2).  ::
 865
 866     def code_block_handler(self, lines):
 867         """Covert code blocks to text format (indent or strip)
 868         """
 869         if self.strip == True:
 870             return
 871         # eventually insert transition marker
 872         if self.code_block_marker_missing:
 873             self.state = "documentation"
 874             yield "::\n"
 875             yield "\n"
 876             self.code_block_marker_missing = False
 877             self.state = "code_block"
 878         for line in lines:
 879             yield " "*self.codeindent + line
 880
 881
 882
 883 # Code2Text.strip_code_block_marker
 884 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 885 #
 886 # Replace the literal marker with the equivalent of docutils replace rules
 887 #
 888 # * strip `::`-line (and preceding blank line) if on a line on its own
 889 # * strip `::` if it is preceded by whitespace.
 890 # * convert `::` to a single colon if preceded by text
 891 #
 892 # `lines` should be a list of documentation lines (with a trailing blank line).
 893 # It is modified in-place::
 894
 895     def strip_code_block_marker(self, lines):
 896         try:
 897             line = lines[-2]
 898         except IndexError:
 899             return # just one line (no trailing blank line)
 900
 901         # match with regexp: `match` is None or has groups
 902         # \1 leading text, \2 code_block_marker, \3 remainder
 903         match = self.marker_regexp.search(line)
 904
 905         if not match:                 # no code_block_marker present
 906             return
 907         if not match.group(1):        # `code_block_marker` on an extra line
 908             del(lines[-2])
 909             # delete preceding line if it is blank
 910             if len(lines) >= 2 and not lines[-2].lstrip():
 911                 del(lines[-2])
 912         elif match.group(1).rstrip() < match.group(1):
 913             # '::' follows whitespace
 914             lines[-2] = match.group(1).rstrip() + match.group(3)
 915         else:                         # '::' follows text
 916             lines[-2] = match.group(1).rstrip() + ':' + match.group(3)
 917
 918 # Filters
 919 # =======
 920 #
 921 # Filters allow pre- and post-processing of the data to bring it in a format
 922 # suitable for the "normal" text<->code conversion. An example is conversion
 923 # of `C` ``/*`` ``*/`` comments into C++ ``//`` comments (and back).
 924 # Another example is the conversion of `C` ``/*`` ``*/`` comments into C++
 925 # ``//`` comments (and back).
 926 #
 927 # Filters are generator functions that return an iterator acting on a
 928 # `data` iterable and yielding processed `data` lines.
 929 #
 930 # identity_filter
 931 # ---------------
 932 #
 933 # The most basic filter is the identity filter, that returns its argument as
 934 # iterator::
 935
 936 def identity_filter(data):
 937     """Return data iterator without any processing"""
 938     return iter(data)
 939
 940 # expandtabs_filter
 941 # -----------------
 942 #
 943 # Expand hard-tabs in every line of `data` (cf. `str.expandtabs`).
 944 #
 945 # This filter is applied to the input data by `TextCodeConverter.convert`_ as
 946 # hard tabs can lead to errors when the indentation is changed. ::
 947
 948 def expandtabs_filter(data):
 949     """Yield data tokens with hard-tabs expanded"""
 950     for line in data:
 951         yield line.expandtabs()
 952
 953
 954 # collect_blocks
 955 # --------------
 956 #
 957 # A filter to aggregate "paragraphs" (blocks separated by blank
 958 # lines). Yields lists of lines::
 959
 960 def collect_blocks(lines):
 961     """collect lines in a list
 962
 963     yield list for each paragraph, i.e. block of lines separated by a
 964     blank line (whitespace only).
 965
 966     Trailing blank lines are collected as well.
 967     """
 968     blank_line_reached = False
 969     block = []
 970     for line in lines:
 971         if blank_line_reached and line.rstrip():
 972             yield block
 973             blank_line_reached = False
 974             block = [line]
 975             continue
 976         if not line.rstrip():
 977             blank_line_reached = True
 978         block.append(line)
 979     yield block
 980
 981
 982
 983 # dumb_c_preprocessor
 984 # -------------------
 985 #
 986 # This is a basic filter to convert `C` to `C++` comments. Works line-wise and
 987 # only converts lines that
 988 #
 989 # * start with "/\* " and end with " \*/" (followed by whitespace only)
 990 #
 991 # A more sophisticated version would also
 992 #
 993 # * convert multi-line comments
 994 #
 995 #   + Keep indentation or strip 3 leading spaces?
 996 #
 997 # * account for nested comments
 998 #
 999 # * only convert comments that are separated from code by a blank line
1000 #
1001 # ::
1002
1003 def dumb_c_preprocessor(data):
1004     """change `C` ``/* `` `` */`` comments into C++ ``// `` comments"""
1005     comment_string = defaults.comment_strings["c++"]
1006     boc_string = "/* "
1007     eoc_string = " */"
1008     for line in data:
1009         if (line.startswith(boc_string)
1010             and line.rstrip().endswith(eoc_string)
1011            ):
1012             line = line.replace(boc_string, comment_string, 1)
1013             line = "".join(line.rsplit(eoc_string, 1))
1014         yield line
1015
1016 # Unfortunately, the `replace` method of strings does not support negative
1017 # numbers for the `count` argument:
1018 #
1019 #   >>> "foo */ baz */ bar".replace(" */", "", -1) == "foo */ baz bar"
1020 #   False
1021 #
1022 # However, there is the `rsplit` method, that can be used together with `join`:
1023 #
1024 #   >>> "".join("foo */ baz */ bar".rsplit(" */", 1)) == "foo */ baz bar"
1025 #   True
1026 #
1027 # dumb_c_postprocessor
1028 # --------------------
1029 #
1030 # Undo the preparations by the dumb_c_preprocessor and re-insert valid comment
1031 # delimiters ::
1032
1033 def dumb_c_postprocessor(data):
1034     """change C++ ``// `` comments into `C` ``/* `` `` */`` comments"""
1035     comment_string = defaults.comment_strings["c++"]
1036     boc_string = "/* "
1037     eoc_string = " */"
1038     for line in data:
1039         if line.rstrip() == comment_string.rstrip():
1040             line = line.replace(comment_string, "", 1)
1041         elif line.startswith(comment_string):
1042             line = line.replace(comment_string, boc_string, 1)
1043             line = line.rstrip() + eoc_string + "\n"
1044         yield line
1045
1046
1047 # register filters
1048 # ----------------
1049 #
1050 # ::
1051
1052 defaults.preprocessors['c2text'] = dumb_c_preprocessor
1053 defaults.preprocessors['css2text'] = dumb_c_preprocessor
1054 defaults.postprocessors['text2c'] = dumb_c_postprocessor
1055 defaults.postprocessors['text2css'] = dumb_c_postprocessor
1056
1057
1058 # Command line use
1059 # ================
1060 #
1061 # Using this script from the command line will convert a file according to its
1062 # extension. This default can be overridden by a couple of options.
1063 #
1064 # Dual source handling
1065 # --------------------
1066 #
1067 # How to determine which source is up-to-date?
1068 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1069 #
1070 # - set modification date of `oufile` to the one of `infile`
1071 #
1072 #   Points out that the source files are 'synchronized'.
1073 #
1074 #   * Are there problems to expect from "backdating" a file? Which?
1075 #
1076 #     Looking at http://www.unix.com/showthread.php?t=20526, it seems
1077 #     perfectly legal to set `mtime` (while leaving `ctime`) as `mtime` is a
1078 #     description of the "actuality" of the data in the file.
1079 #
1080 #   * Should this become a default or an option?
1081 #
1082 # - alternatively move input file to a backup copy (with option: `--replace`)
1083 #
1084 # - check modification date before overwriting
1085 #   (with option: `--overwrite=update`)
1086 #
1087 # - check modification date before editing (implemented as `Jed editor`_
1088 #   function `pylit_check()` in `pylit.sl`_)
1089 #
1090 # .. _Jed editor: http://www.jedsoft.org/jed/
1091 # .. _pylit.sl: http://jedmodes.sourceforge.net/mode/pylit/
1092 #
1093 # Recognised Filename Extensions
1094 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1095 #
1096 # Instead of defining a new extension for "pylit" literate programms,
1097 # by default ``.txt`` will be appended for the text source and stripped by
1098 # the conversion to the code source. I.e. for a Python program foo:
1099 #
1100 # * the code source is called ``foo.py``
1101 # * the text source is called ``foo.py.txt``
1102 # * the html rendering is called ``foo.py.html``
1103 #
1104 #
1105 # OptionValues
1106 # ------------
1107 #
1108 # The following class adds `as_dict` and `__getattr__` methods to
1109 # `optparse.Values`::
1110
1111 class OptionValues(optparse.Values):
1112
1113 # OptionValues.as_dict
1114 # ~~~~~~~~~~~~~~~~~~~~
1115 #
1116 # For use as keyword arguments, it is handy to have the options in a
1117 # dictionary. `as_dict` returns a copy of the instances object dictionary::
1118
1119     def as_dict(self):
1120         """Return options as dictionary object"""
1121         return self.__dict__.copy()
1122
1123 # OptionValues.complete
1124 # ~~~~~~~~~~~~~~~~~~~~~
1125 #
1126 # ::
1127
1128     def complete(self, **keyw):
1129         """
1130         Complete the option values with keyword arguments.
1131
1132         Do not overwrite existing values. Only use arguments that do not
1133         have a corresponding attribute in `self`,
1134         """
1135         for key in keyw:
1136             if not self.__dict__.has_key(key):
1137                 setattr(self, key, keyw[key])
1138
1139 # OptionValues.__getattr__
1140 # ~~~~~~~~~~~~~~~~~~~~~~~~
1141 #
1142 # To replace calls using ``options.ensure_value("OPTION", None)`` with the
1143 # more concise ``options.OPTION``, we define `__getattr__` [#]_ ::
1144
1145     def __getattr__(self, name):
1146         """Return default value for non existing options"""
1147         return None
1148
1149
1150 # .. [#] The special method `__getattr__` is only called when an attribute
1151 #        lookup has not found the attribute in the usual places (i.e. it is
1152 #        not an instance attribute nor is it found in the class tree for
1153 #        self).
1154 #
1155 #
1156 # PylitOptions
1157 # ------------
1158 #
1159 # The `PylitOptions` class comprises an option parser and methods for parsing
1160 # and completion of command line options::
1161
1162 class PylitOptions(object):
1163     """Storage and handling of command line options for pylit"""
1164
1165 # Instantiation
1166 # ~~~~~~~~~~~~~
1167 #
1168 # ::
1169
1170     def __init__(self):
1171         """Set up an `OptionParser` instance for pylit command line options
1172
1173         """
1174         p = optparse.OptionParser(usage=main.__doc__, version=_version)
1175         # add the options
1176         p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
1177                      help="convert code source to text source")
1178         p.add_option("-m", "--code-block-marker", dest="code_block_marker",
1179                      help="syntax token starting a code block. (default '::')")
1180         p.add_option("--comment-string", dest="comment_string",
1181                      help="documentation block marker in code source "
1182                      "(default '# ')")
1183         p.add_option("-d", "--diff", action="store_true",
1184                      help="test for differences to existing file")
1185         p.add_option("--doctest", action="store_true",
1186                      help="run doctest.testfile() on the text version")
1187         p.add_option("-e", "--execute", action="store_true",
1188                      help="execute code (Python only)")
1189         p.add_option("--language", action="store",
1190                      choices = defaults.languages.values(),
1191                      help="use LANGUAGE native comment style")
1192         p.add_option("--overwrite", action="store",
1193                      choices = ["yes", "update", "no"],
1194                      help="overwrite output file (default 'update')")
1195         p.add_option("--replace", action="store_true",
1196                      help="move infile to a backup copy (appending '~')")
1197         p.add_option("-s", "--strip", action="store_true",
1198                      help="export by stripping documentation or code")
1199         p.add_option("-t", "--txt2code", action="store_true",
1200                      help="convert text source to code source")
1201         self.parser = p
1202
1203
1204 # PylitOptions.parse_args
1205 # ~~~~~~~~~~~~~~~~~~~~~~~
1206 #
1207 # The `parse_args` method calls the `optparse.OptionParser` on command
1208 # line or provided args and returns the result as `PylitOptions.Values`
1209 # instance. Defaults can be provided as keyword arguments::
1210
1211     def parse_args(self, args=sys.argv[1:], **keyw):
1212         """parse command line arguments using `optparse.OptionParser`
1213
1214            parse_args(args, **keyw) -> OptionValues instance
1215
1216             args --  list of command line arguments.
1217             keyw --  keyword arguments or dictionary of option defaults
1218         """
1219         # parse arguments
1220         (values, args) = self.parser.parse_args(args, OptionValues(keyw))
1221         # Convert FILE and OUTFILE positional args to option values
1222         # (other positional arguments are ignored)
1223         try:
1224             values.infile = args[0]
1225             values.outfile = args[1]
1226         except IndexError:
1227             pass
1228
1229         return values
1230
1231 # PylitOptions.complete_values
1232 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1233 #
1234 # Complete an OptionValues instance `values`.  Use module-level defaults and
1235 # context information to set missing option values to sensible defaults (if
1236 # possible) ::
1237
1238     def complete_values(self, values):
1239         """complete option values with module and context sensible defaults
1240
1241         x.complete_values(values) -> values
1242         values -- OptionValues instance
1243         """
1244
1245 # Complete with module-level defaults_::
1246
1247         values.complete(**defaults.__dict__)
1248
1249 # Ensure infile is a string::
1250
1251         values.ensure_value("infile", "")
1252
1253 # Guess conversion direction from `infile` filename::
1254
1255         if values.txt2code is None:
1256             in_extension = os.path.splitext(values.infile)[1]
1257             if in_extension in values.text_extensions:
1258                 values.txt2code = True
1259             elif in_extension in values.languages.keys():
1260                 values.txt2code = False
1261
1262 # Auto-determine the output file name::
1263
1264         values.ensure_value("outfile", self._get_outfile_name(values))
1265
1266 # Second try: Guess conversion direction from outfile filename::
1267
1268         if values.txt2code is None:
1269             out_extension = os.path.splitext(values.outfile)[1]
1270             values.txt2code = not (out_extension in values.text_extensions)
1271
1272 # Set the language of the code::
1273
1274         if values.txt2code is True:
1275             code_extension = os.path.splitext(values.outfile)[1]
1276         elif values.txt2code is False:
1277             code_extension = os.path.splitext(values.infile)[1]
1278         values.ensure_value("language",
1279                             values.languages.get(code_extension,
1280                                                  values.fallback_language))
1281
1282         return values
1283
1284 # PylitOptions._get_outfile_name
1285 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1286 #
1287 # Construct a matching filename for the output file. The output filename is
1288 # constructed from `infile` by the following rules:
1289 #
1290 # * '-' (stdin) results in '-' (stdout)
1291 # * strip the `txt_extension` (txt2code) or
1292 # * add a `txt_ extension` (code2txt)
1293 # * fallback: if no guess can be made, add ".out"
1294 #
1295 #   .. TODO: use values.outfile_extension if it exists?
1296 #
1297 # ::
1298
1299     def _get_outfile_name(self, values):
1300         """Return a matching output filename for `infile`
1301         """
1302         # if input is stdin, default output is stdout
1303         if values.infile == '-':
1304             return '-'
1305
1306         # Derive from `infile` name: strip or add text extension
1307         (base, ext) = os.path.splitext(values.infile)
1308         if ext in values.text_extensions:
1309             return base # strip
1310         if ext in values.languages.keys() or values.txt2code == False:
1311             return values.infile + values.text_extensions[0] # add
1312         # give up
1313         return values.infile + ".out"
1314
1315 # PylitOptions.__call__
1316 # ~~~~~~~~~~~~~~~~~~~~~
1317 #
1318 # The special `__call__` method allows to use PylitOptions instances as
1319 # *callables*: Calling an instance parses the argument list to extract option
1320 # values and completes them based on "context-sensitive defaults".  Keyword
1321 # arguments are passed to `PylitOptions.parse_args`_ as default values. ::
1322
1323     def __call__(self, args=sys.argv[1:], **keyw):
1324         """parse and complete command line args return option values
1325         """
1326         values = self.parse_args(args, **keyw)
1327         return self.complete_values(values)
1328
1329
1330
1331 # Helper functions
1332 # ----------------
1333 #
1334 # open_streams
1335 # ~~~~~~~~~~~~
1336 #
1337 # Return file objects for in- and output. If the input path is missing,
1338 # write usage and abort. (An alternative would be to use stdin as default.
1339 # However,  this leaves the uninitiated user with a non-responding application
1340 # if (s)he just tries the script without any arguments) ::
1341
1342 def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
1343     """Open and return the input and output stream
1344
1345     open_streams(infile, outfile) -> (in_stream, out_stream)
1346
1347     in_stream   --  file(infile) or sys.stdin
1348     out_stream  --  file(outfile) or sys.stdout
1349     overwrite   --  'yes': overwrite eventually existing `outfile`,
1350                     'update': fail if the `outfile` is newer than `infile`,
1351                     'no': fail if `outfile` exists.
1352
1353                     Irrelevant if `outfile` == '-'.
1354     """
1355     if not infile:
1356         strerror = "Missing input file name ('-' for stdin; -h for help)"
1357         raise IOError, (2, strerror, infile)
1358     if infile == '-':
1359         in_stream = sys.stdin
1360     else:
1361         in_stream = file(infile, 'r')
1362     if outfile == '-':
1363         out_stream = sys.stdout
1364     elif overwrite == 'no' and os.path.exists(outfile):
1365         raise IOError, (1, "Output file exists!", outfile)
1366     elif overwrite == 'update' and is_newer(outfile, infile):
1367         raise IOError, (1, "Output file is newer than input file!", outfile)
1368     else:
1369         out_stream = file(outfile, 'w')
1370     return (in_stream, out_stream)
1371
1372 # is_newer
1373 # ~~~~~~~~
1374 #
1375 # ::
1376
1377 def is_newer(path1, path2):
1378     """Check if `path1` is newer than `path2` (using mtime)
1379
1380     Compare modification time of files at path1 and path2.
1381
1382     Non-existing files are considered oldest: Return False if path1 doesnot
1383     exist and True if path2 doesnot exist.
1384
1385     Return None for equal modification time. (This evaluates to False in a
1386     boolean context but allows a test for equality.)
1387
1388     """
1389     try:
1390         mtime1 = os.path.getmtime(path1)
1391     except OSError:
1392         mtime1 = -1
1393     try:
1394         mtime2 = os.path.getmtime(path2)
1395     except OSError:
1396         mtime2 = -1
1397     # print "mtime1", mtime1, path1, "\n", "mtime2", mtime2, path2
1398
1399     if mtime1 == mtime2:
1400         return None
1401     return mtime1 > mtime2
1402
1403
1404 # get_converter
1405 # ~~~~~~~~~~~~~
1406 #
1407 # Get an instance of the converter state machine::
1408
1409 def get_converter(data, txt2code=True, **keyw):
1410     if txt2code:
1411         return Text2Code(data, **keyw)
1412     else:
1413         return Code2Text(data, **keyw)
1414
1415
1416 # Use cases
1417 # ---------
1418 #
1419 # run_doctest
1420 # ~~~~~~~~~~~
1421 # ::
1422
1423 def run_doctest(infile="-", txt2code=True,
1424                 globs={}, verbose=False, optionflags=0, **keyw):
1425     """run doctest on the text source
1426     """
1427
1428 # Allow imports from the current working dir by prepending an empty string to
1429 # sys.path (see doc of sys.path())::
1430
1431     sys.path.insert(0, '')
1432
1433 # Import classes from the doctest module::
1434
1435     from doctest import DocTestParser, DocTestRunner
1436
1437 # Read in source. Make sure it is in text format, as tests in comments are not
1438 # found by doctest::
1439
1440     (data, out_stream) = open_streams(infile, "-")
1441     if txt2code is False:
1442         converter = Code2Text(data, **keyw)
1443         docstring = str(converter)
1444     else:
1445         docstring = data.read()
1446
1447
1448 # Use the doctest Advanced API to run all doctests in the source text::
1449
1450     test = DocTestParser().get_doctest(docstring, globs, name="",
1451                                        filename=infile, lineno=0)
1452     runner = DocTestRunner(verbose, optionflags)
1453     runner.run(test)
1454     runner.summarize
1455     # give feedback also if no failures occured
1456     if not runner.failures:
1457         print "%d failures in %d tests"%(runner.failures, runner.tries)
1458     return runner.failures, runner.tries
1459
1460
1461 # diff
1462 # ~~~~
1463 #
1464 # ::
1465
1466 def diff(infile='-', outfile='-', txt2code=True, **keyw):
1467     """Report differences between converted infile and existing outfile
1468
1469     If outfile is '-', do a round-trip conversion and report differences
1470     """
1471
1472     import difflib
1473
1474     instream = file(infile)
1475     # for diffing, we need a copy of the data as list::
1476     data = instream.readlines()
1477     # convert
1478     converter = get_converter(data, txt2code, **keyw)
1479     new = converter()
1480
1481     if outfile != '-':
1482         outstream = file(outfile)
1483         old = outstream.readlines()
1484         oldname = outfile
1485         newname = "<conversion of %s>"%infile
1486     else:
1487         old = data
1488         oldname = infile
1489         # back-convert the output data
1490         converter = get_converter(new, not txt2code)
1491         new = converter()
1492         newname = "<round-conversion of %s>"%infile
1493
1494     # find and print the differences
1495     is_different = False
1496     # print type(old), old
1497     # print type(new), new
1498     delta = difflib.unified_diff(old, new,
1499     # delta = difflib.unified_diff(["heute\n", "schon\n"], ["heute\n", "noch\n"],
1500                                       fromfile=oldname, tofile=newname)
1501     for line in delta:
1502         is_different = True
1503         print line,
1504     if not is_different:
1505         print oldname
1506         print newname
1507         print "no differences found"
1508     return is_different
1509
1510
1511 # execute
1512 # ~~~~~~~
1513 #
1514 # Works only for python code.
1515 #
1516 # Doesnot work with `eval`, as code is not just one expression. ::
1517
1518 def execute(infile="-", txt2code=True, **keyw):
1519     """Execute the input file. Convert first, if it is a text source.
1520     """
1521
1522     data = file(infile)
1523     if txt2code:
1524         data = str(Text2Code(data, **keyw))
1525     # print "executing " + options.infile
1526     exec data
1527
1528
1529 # main
1530 # ----
1531 #
1532 # If this script is called from the command line, the `main` function will
1533 # convert the input (file or stdin) between text and code formats.
1534 #
1535 # Option default values for the conversion can be given as keyword arguments
1536 # to `main`_.  The option defaults will be updated by command line options and
1537 # extended with "intelligent guesses" by `PylitOptions`_ and passed on to
1538 # helper functions and the converter instantiation.
1539 #
1540 # This allows easy customization for programmatic use -- just call `main`
1541 # with the appropriate keyword options, e.g. ``pylit.main(comment_string="## ")``
1542 #
1543 # ::
1544
1545 def main(args=sys.argv[1:], **defaults):
1546     """%prog [options] INFILE [OUTFILE]
1547
1548     Convert between (reStructured) text source with embedded code,
1549     and code source with embedded documentation (comment blocks)
1550
1551     The special filename '-' stands for standard in and output.
1552     """
1553
1554 # Parse and complete the options::
1555
1556     options = PylitOptions()(args, **defaults)
1557     # print "infile", repr(options.infile)
1558
1559 # Special actions with early return::
1560
1561     if options.doctest:
1562         return run_doctest(**options.as_dict())
1563
1564     if options.diff:
1565         return diff(**options.as_dict())
1566
1567     if options.execute:
1568         return execute(**options.as_dict())
1569
1570 # Open in- and output streams::
1571
1572     try:
1573         (data, out_stream) = open_streams(**options.as_dict())
1574     except IOError, ex:
1575         print "IOError: %s %s" % (ex.filename, ex.strerror)
1576         sys.exit(ex.errno)
1577
1578 # Get a converter instance::
1579
1580     converter = get_converter(data, **options.as_dict())
1581
1582 # Convert and write to out_stream::
1583
1584     out_stream.write(str(converter))
1585
1586     if out_stream is not sys.stdout:
1587         print "extract written to", out_stream.name
1588         out_stream.close()
1589
1590 # If input and output are from files, set the modification time (`mtime`) of
1591 # the output file to the one of the input file to indicate that the contained
1592 # information is equal. [#]_ ::
1593
1594         try:
1595             os.utime(options.outfile, (os.path.getatime(options.outfile),
1596                                        os.path.getmtime(options.infile))
1597                     )
1598         except OSError:
1599             pass
1600
1601     ## print "mtime", os.path.getmtime(options.infile),  options.infile
1602     ## print "mtime", os.path.getmtime(options.outfile), options.outfile
1603
1604
1605 # .. [#] Make sure the corresponding file object (here `out_stream`) is
1606 #        closed, as otherwise the change will be overwritten when `close` is
1607 #        called afterwards (either explicitely or at program exit).
1608 #
1609 #
1610 # Rename the infile to a backup copy if ``--replace`` is set::
1611
1612     if options.replace:
1613         os.rename(options.infile, options.infile + "~")
1614
1615
1616 # Run main, if called from the command line::
1617
1618 if __name__ == '__main__':
1619     main()
1620
1621
1622 # Open questions
1623 # ==============
1624 #
1625 # Open questions and ideas for further development
1626 #
1627 # Clean code
1628 # ----------
1629 #
1630 # * can we gain from using "shutils" over "os.path" and "os"?
1631 # * use pylint or pyChecker to enfoce a consistent style?
1632 #
1633 # Options
1634 # -------
1635 #
1636 # * Use templates for the "intelligent guesses" (with Python syntax for string
1637 #   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
1638 #
1639 # * Is it sensible to offer the `header_string` option also as command line
1640 #   option?
1641 #
1642 # * treatment of blank lines:
1643 #
1644 #   * Alternatives: Keep blank lines blank
1645 #
1646 #     + "always",
1647 #
1648 #     + "if empty" (no whitespace). Comment if there is whitespace.
1649 #
1650 #       This would allow non-obstructing markup but unfortunately this is (in
1651 #       most editors) also non-visible markup -> bad.
1652 #
1653 #     + "if double" (if there is more than one consecutive blank line)
1654 #
1655 #     + "never" (current setting)
1656 #
1657 #   So the setting could be something like::
1658
1659 #     defaults.keep_blank_lines = { "python": "if double",
1660 #                                   "elisp": "always"}
1661 #
1662 #
1663 # Parsing Problems
1664 # ----------------------
1665 #
1666 # * Ignore "matching comments" in literal strings?
1667 #
1668 #   Too complicated: Would need a specific detection algorithm for every
1669 #   language that supports multi-line literal strings (C++, PHP, Python)
1670 #
1671 # * Warn if a comment in code will become documentation after round-trip?
1672 #
1673 #
1674 # doctstrings in code blocks
1675 # --------------------------
1676 #
1677 # * How to handle docstrings in code blocks? (it would be nice to convert them
1678 #   to rst-text if ``__docformat__ == restructuredtext``)
1679 #
1680 # TODO: Ask at docutils users|developers
1681 #
1682 # .. References
1683 #
1684 # .. _docutils:
1685 #     http://docutils.sourceforge.net/
1686 # .. _indented literal block:
1687 # .. _indented literal blocks:
1688 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#indented-literal-blocks
1689 # .. _quoted literal block:
1690 # .. _quoted literal blocks:
1691 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
1692 # .. _doctest block:
1693 # .. _doctest blocks:
1694 #     http://docutils.sf.net/docs/ref/rst/restructuredtext.html#doctest-blocks
1695 # .. _pygments: http://pygments.org/
1696 # .. _syntax highlight: ../features/syntax-highlight.html
1697 # .. _parsed-literal blocks:
1698 #     http://docutils.sf.net/docs/ref/rst/directives.html#parsed-literal-block