From 48a11456a885f484c9937242baa99c4ab64d798a Mon Sep 17 00:00:00 2001 From: milde Date: Fri, 2 Mar 2007 17:22:48 +0000 Subject: [PATCH] pylit.py version 0.3.1 expand hard-tabs to prevent errors in indentation. `Text2Code` now also works on blocks removed dependency on SimpleStates module pylit_test.py: adapt to version 0.3.1: skip "insert empty line after..." tests simplestates.py: cosmetic changes due to a pylit round trip git-svn-id: http://svn.berlios.de/svnroot/repos/pylit/trunk@23 fb71aa59-6827-0410-b536-ee2229a4f8e3 --- rstdocs/examples/pylit.py.html | 309 +++++++++++++++++--------------- rstdocs/examples/pylit.py.txt | 135 ++++++++------ src/pylit.py | 389 +++++++++++++++++------------------------ src/simplestates.py | 6 +- test/pylit_test.py | 40 +++-- 5 files changed, 431 insertions(+), 448 deletions(-) diff --git a/rstdocs/examples/pylit.py.html b/rstdocs/examples/pylit.py.html index 2640721..f60b0a1 100644 --- a/rstdocs/examples/pylit.py.html +++ b/rstdocs/examples/pylit.py.html @@ -311,85 +311,86 @@ Released under the terms of the GNU General Public License

Contents

-

1   Frontmatter

+

1   Frontmatter

-

1.1   Changelog

+

1.1   Changelog

@@ -426,7 +427,9 @@ new iter_strip method replacing a lot of - +
2007-02-22:0.2.8 set mtime of outfile to the one of infile
2007-02-27:0.3 new Code2Text converter after an idea by Riccardo Murri
2007-02-27:0.3 new Code2Text converter after an idea by Riccardo Murri +a new Text2Code will follow soon +explicite option_defaults dict for easier customization
@@ -440,10 +443,12 @@ new iter_strip method replacing a lot of -

1.2   Requirements

+

1.2   Requirements

  • library modules
@@ -462,9 +467,45 @@ from simplestates import SimpleStates # generic state machine
-

2   Classes

+

2   Customization

+
+option_defaults = {}
+
+

Default language and language specific defaults:

+
+option_defaults["language"] =        "python"
+option_defaults["comment_strings"] = {"python": '# ',
+                                      "slang":  '% ',
+                                      "c++":    '// ',
+                                      "elisp":  ';; '}
+
+

Recognized file extensions for text and code versions of the source. +Used to guess the language from the filename.

+
+option_defaults["code_languages"]  = {".py": "python",
+                                      ".sl": "slang",
+                                      ".c": "c++",
+                                      ".el":"elisp"}
+option_defaults["code_extensions"] = option_defaults["code_languages"].keys()
+option_defaults["text_extensions"] = [".txt"]
+
+

Number of spaces to indent code blocks in the code -> text conversion.[#]_

+ + + + + +
[2]For the text -> code conversion, the codeindent is determined by the +first recognized code line (leading comment or first indented literal +block of the text source).
+
+option_defaults["codeindent"] =  2
+
+
+
+

3   Classes

-

2.1   PushIterator

+

3.1   PushIterator

The PushIterator is a minimal implementation of an iterator with backtracking from the Effective Python Programming OSCON 2005 tutorial by Anthony Baxter. As the definition is small, it is inlined now. For the full @@ -484,7 +525,7 @@ class PushIterator(object):

-

2.2   Converter

+

3.2   Converter

The converter classes implement a simple state machine to separate and transform text and code blocks. For this task, only a very limited parsing is needed. Using the full blown docutils rst parser would introduce a @@ -509,7 +550,7 @@ class PyLitConverter(SimpleStates): """

-

2.2.1   Data attributes

+

3.2.1   Data attributes

The data attributes are class default values. They will be overridden by matching keyword arguments during class instantiation.

get_converter and main pass on unused keyword arguments to @@ -517,22 +558,12 @@ the instantiation of a converter class. This way, keyword arguments to these functions can be used to customize the converter.

Default language and language specific defaults:

-language =        "python"
-comment_strings = {"python": '# ',
-                   "slang": '% ',
-                   "c++": '// '}
+language = option_defaults["language"]
+comment_strings = option_defaults["comment_strings"]
 
-

Number of spaces to indent code blocks in the code -> text conversion.[#]_

- - - - - -
[2]For the text -> code conversion, the codeindent is determined by the -first recognized code line (leading comment or first indented literal -block of the text source).
+

Number of spaces to indent code blocks in the code -> text conversion:

-codeindent =  2
+codeindent =  option_defaults["codeindent"]
 

Marker string for the first code block. (Should be a valid rst directive that accepts code on the same line, e.g. '.. admonition::'.) No @@ -551,7 +582,7 @@ state = 'header'

-

2.2.2   Instantiation

+

3.2.2   Instantiation

Initializing sets up the data attribute, an iterable object yielding lines of the source to convert.[1]_

@@ -576,7 +607,7 @@ class defaults:

 self.__dict__.update(keyw)
 
-

The comment string is set to the languages comment string if not given in +

The comment string is set to the language's comment string if not given in the keyword arguments:

 if not hasattr(self, "comment_string") or not self.comment_string:
@@ -594,7 +625,7 @@ with the optional keepends argument set to True.

-

2.2.3   Converter.__str__

+

3.2.3   Converter.__str__

Return converted data as string:

 def __str__(self):
@@ -603,7 +634,7 @@ def __str__(self):
 
-

2.2.4   Converter.get_indent

+

3.2.4   Converter.get_indent

Return the number of leading spaces in string after expanding tabs

 def get_indent(self, string):
@@ -614,7 +645,7 @@ def get_indent(self, string):
 
-

2.2.5   Converter.ensure_trailing_blank_line

+

3.2.5   Converter.ensure_trailing_blank_line

Ensure there is a blank line as last element of the list lines:

 def ensure_trailing_blank_line(self, lines, next_line):
@@ -627,7 +658,7 @@ def ensure_trailing_blank_line(self, lines, next_line):
 
-

2.2.6   Converter.collect_blocks

+

3.2.6   Converter.collect_blocks

 def collect_blocks(self):
     """collect lines in a list
@@ -646,7 +677,7 @@ def collect_blocks(self):
 
-

2.3   Text2Code

+

3.3   Text2Code

The Text2Code class separates code blocks (indented literal blocks) from reStructured text. Code blocks are unindented, text is commented (or filtered, if the strip option is True.

@@ -684,7 +715,7 @@ def __init__(self, data, **keyw): self.__iter__ = self.iter_strip
-

2.3.1   Text2Code.header

+

3.3.1   Text2Code.header

Convert the header (leading rst comment block) to code:

 def header(self):
@@ -724,7 +755,7 @@ return []
 
-

2.3.2   Text2Code.text_handler_generator

+

3.3.2   Text2Code.text_handler_generator

The 'text' handler processes everything that is not an indented literal comment. Text is quoted with self.comment_string or filtered (with strip=True).

@@ -788,7 +819,7 @@ yield lines
-

2.3.3   Text2Code.code_handler_generator

+

3.3.3   Text2Code.code_handler_generator

The code handler is called when a literal block marker is encounterd. It returns a code block (indented literal block), removing leading whitespace up to the indentation of the first code line in the file (this deviation @@ -858,7 +889,7 @@ yield lines

-

2.3.4   Txt2Code.remove_literal_marker

+

3.3.4   Txt2Code.remove_literal_marker

Remove literal marker (::) in "expanded form" i.e. in a paragraph on its own.

While cleaning up the code source, it leads to confusion for doctest and searches (e.g. grep) as line-numbers between text and code source will @@ -875,7 +906,7 @@ def remove_literal_marker(list):

-

2.3.5   Text2Code.iter_strip

+

3.3.5   Text2Code.iter_strip

Modification of the simplestates.__iter__ method that will replace it when the strip keyword argument is True during class instantiation:

Iterate over class instances dropping text blocks:

@@ -892,21 +923,19 @@ def iter_strip(self):
-

2.4   Code2Text

+

3.4   Code2Text

The Code2Text class does the opposite of Text2Code -- it processes valid source code, extracts comments, and puts non-commented code in literal blocks.

-

Only lines starting with a comment string matching the one in the -comment_string data attribute are considered text lines.

-

The class is derived from the PyLitConverter state machine and adds handlers -for the three states "header", "text", and "code".

+

The class is derived from the PyLitConverter state machine and adds an +__iter__ method as well as handlers for "text", and "code" states.

 class Code2Text(PyLitConverter):
     """Convert code source to text source
     """
 
-

2.4.1   Code2Text.__iter__

+

3.4.1   Code2Text.__iter__

 def __iter__(self):
 
@@ -936,7 +965,7 @@ yield getattr(self, self.state)(block)
-

2.4.2   "header" state

+

3.4.2   "header" state

Sometimes code needs to remain on the first line(s) of the document to be valid. The most common example is the "shebang" line that tells a POSIX shell how to process an executable file:

@@ -948,7 +977,7 @@ other comment or code.

If we want to keep the line numbers in sync for text and code source, the reStructured Text markup for these header lines must start at the same line as the first header line. Therfore, header lines could not be marked as -literal block (this would require the "::" and an empty line above the code).

+literal block (this would require the :: and an empty line above the code).

OTOH, a comment may start at the same line as the comment marker and it includes subsequent indented lines. Comments are visible in the reStructured Text source but hidden in the pretty-printed output.

@@ -984,7 +1013,7 @@ after) the first text block, e.g. with a line block in a bloc will overwrite this setting.

-

2.4.3   Code2Text.text

+

3.4.3   Code2Text.text

The text state handler converts a comment to a text block by stripping the leading comment string from every line:

@@ -1021,7 +1050,7 @@ return lines
 
-

2.4.4   Code2Text.code

+

3.4.4   Code2Text.code

The code method is called on non-commented code. Code is returned as indented literal block (or filtered, if self.strip == True). The amount of the code indentation is controled by self.codeindent (default 2).

@@ -1036,7 +1065,7 @@ def code(self, lines):
-

2.4.5   Code2Text.block_is_text

+

3.4.5   Code2Text.block_is_text

A paragraph is a text block, if every non-blank line starts with a matching comment string (test includes whitespace except for commented blank lines!)

@@ -1050,7 +1079,7 @@ def block_is_text(self, block):
 
-

2.4.6   Code2Text.strip_literal_marker

+

3.4.6   Code2Text.strip_literal_marker

Replace the literal marker with the equivalent of docutils replace rules

  • strip ::-line (and preceding blank line) if on a line on its own
  • @@ -1090,13 +1119,13 @@ def strip_literal_marker(self, lines):
-

3   Command line use

+

4   Command line use

Using this script from the command line will convert a file according to its extension. This default can be overridden by a couple of options.

-

3.1   Dual source handling

+

4.1   Dual source handling

-

3.1.1   How to determine which source is up-to-date?

+

4.1.1   How to determine which source is up-to-date?

-

3.2   OptionValues

+

4.2   OptionValues

For use as keyword arguments, it is handy to have the options in a dictionary. The following class adds an as_dict method to optparse.Values:

@@ -1169,7 +1198,7 @@ class OptionValues(optparse.Values):
-

3.3   PylitOptions

+

4.3   PylitOptions

Options are stored in the values attribute of the PylitOptions class. It is initialized with default values and parsed command line options (and arguments) This scheme allows easy customization by code importing the @@ -1179,26 +1208,20 @@ class PylitOptions(object): """Storage and handling of program options """ -

Recognized file extensions for text and code versions of the source:

-
-code_languages  = {".py": "python",
-                   ".sl": "slang",
-                   ".c": "c++"}
-code_extensions = code_languages.keys()
-text_extensions = [".txt"]
-
-

3.3.1   Instantiation

+

4.3.1   Instantiation

Instantiation sets up an OptionParser and initializes it with pylit's command line options and default_values. It then updates the values based on command line options and sensible defaults:

-def __init__(self, args=sys.argv[1:], **default_values):
+def __init__(self, args=sys.argv[1:], **keyw):
     """Set up an `OptionParser` instance and parse and complete arguments
     """
-    p = optparse.OptionParser(usage=main.__doc__, version="0.2")
-    # set defaults
-    p.set_defaults(**default_values)
+    p = optparse.OptionParser(usage=main.__doc__, version=_version)
+    # set defaults (from modules option_defaults dict and keyword args)
+    defaults = dict(option_defaults) # copy module-level defaults
+    defaults.update(keyw)
+    p.set_defaults(**defaults)
     # add the options
     p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
                  help="convert code to reStructured text")
@@ -1212,6 +1235,9 @@ def __init__(self, args=sys.argv[1:], **default_values):
                  help="execute code (Python only)")
     p.add_option("-f", "--infile",
                  help="input file name ('-' for stdout)" )
+    p.add_option("--language", action="store",
+                 choices = option_defaults["code_languages"].values(),
+                 help="use LANGUAGE native comment style")
     p.add_option("--overwrite", action="store",
                  choices = ["yes", "update", "no"],
                  help="overwrite output file (default 'update')")
@@ -1232,7 +1258,7 @@ def __init__(self, args=sys.argv[1:], **default_values):
 
-

3.3.2   Calling

+

4.3.2   Calling

"Calling" an instance updates the option values based on command line arguments and default values and does a completion of the options based on "context-sensitive defaults":

@@ -1245,7 +1271,7 @@ def __call__(self, args=sys.argv[1:], **default_values):
-

3.3.3   PylitOptions.parse_args

+

4.3.3   PylitOptions.parse_args

The parse_args method calls the optparse.OptionParser on command line or provided args and returns the result as PylitOptions.Values instance. Defaults can be provided as keyword arguments:

@@ -1272,7 +1298,7 @@ def parse_args(self, args=sys.argv[1:], **default_values):
-

3.3.4   PylitOptions.complete_values

+

4.3.4   PylitOptions.complete_values

The complete method uses context information to set missing option values to sensible defaults (if possible).

@@ -1283,9 +1309,9 @@ def complete_values(self, values):
     # Guess conversion direction from infile filename
     if values.ensure_value("txt2code", None) is None:
         in_extension = os.path.splitext(values.infile)[1]
-        if in_extension in self.text_extensions:
+        if in_extension in self.values.text_extensions:
             values.txt2code = True
-        elif in_extension in self.code_extensions:
+        elif in_extension in self.values.code_extensions:
             values.txt2code = False
     # Auto-determine the output file name
     values.ensure_value("outfile", self.get_outfile_name(values.infile,
@@ -1293,7 +1319,7 @@ def complete_values(self, values):
     # Guess conversion direction from outfile filename or set to default
     if values.txt2code is None:
         out_extension = os.path.splitext(values.outfile)[1]
-        values.txt2code = not (out_extension in self.text_extensions)
+        values.txt2code = not (out_extension in self.values.text_extensions)
 
     # Set the language of the code (default "python")
     if values.txt2code is True:
@@ -1301,8 +1327,7 @@ def complete_values(self, values):
     elif values.txt2code is False:
         code_extension = os.path.splitext(values.infile)[1]
     values.ensure_value("language",
-                        self.code_languages.get(code_extension, "python"))
-
+                        self.values.code_languages.get(code_extension, "python"))
     # Set the default overwrite mode
     values.ensure_value("overwrite", 'update')
 
@@ -1310,7 +1335,7 @@ def complete_values(self, values):
 
-

3.3.5   PylitOptions.get_outfile_name

+

4.3.5   PylitOptions.get_outfile_name

Construct a matching filename for the output file. The output filename is constructed from infile by the following rules:

    @@ -1332,20 +1357,20 @@ def get_outfile_name(self, infile, txt2code=None): # if it exists? # strip text extension - if ext in self.text_extensions: + if ext in self.values.text_extensions: return base # add (first) text extension for code files - if ext in self.code_extensions or txt2code == False: - return infile + self.text_extensions[0] + if ext in self.values.code_extensions or txt2code == False: + return infile + self.values.text_extensions[0] # give up return infile + ".out"
-

3.4   Helper functions

+

4.4   Helper functions

-

3.4.1   open_streams

+

4.4.1   open_streams

Return file objects for in- and output. If the input path is missing, write usage and abort. (An alternative would be to use stdin as default. However, this leaves the uninitiated user with a non-responding application @@ -1382,7 +1407,7 @@ def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):

-

3.4.2   is_newer

+

4.4.2   is_newer

 def is_newer(path1, path2):
     """Check if `path1` is newer than `path2` (using mtime)
@@ -1412,7 +1437,7 @@ def is_newer(path1, path2):
 
-

3.4.3   get_converter

+

4.4.3   get_converter

Get an instance of the converter state machine:

 def get_converter(data, txt2code=True, **keyw):
@@ -1424,9 +1449,9 @@ def get_converter(data, txt2code=True, **keyw):
 
-

3.5   Use cases

+

4.5   Use cases

-

3.5.1   run_doctest

+

4.5.1   run_doctest

 def run_doctest(infile="-", txt2code=True,
                 globs={}, verbose=False, optionflags=0, **keyw):
@@ -1457,7 +1482,7 @@ return runner.failures, runner.tries
 
-

3.5.2   diff

+

4.5.2   diff

 def diff(infile='-', outfile='-', txt2code=True, **keyw):
     """Report differences between converted infile and existing outfile
@@ -1501,11 +1526,11 @@ def diff(infile='-', outfile='-', txt2code=True, **keyw):
 
-

3.6   main

+

4.6   main

If this script is called from the command line, the main function will convert the input (file or stdin) between text and code formats.

-

3.6.1   Customization

+

4.6.1   Customization

Option defaults for the conversion can be as keyword arguments to main. The option defaults will be updated by command line options and extended with "intelligent guesses" by PylitOptions and passed on to helper @@ -1594,10 +1619,10 @@ else: ## print "mtime", os.path.getmtime(options.infile), options.infile ## print "mtime", os.path.getmtime(options.outfile), options.outfile - +
- @@ -1611,10 +1636,10 @@ if __name__ == '__main__':
-

4   Open questions

+

5   Open questions

Open questions and ideas for further development

-

4.1   Options

+

5.1   Options

  • Collect option defaults in a dictionary (on module level)

    Facilitates the setting of options in programmatic use

    @@ -1629,7 +1654,7 @@ option?

-

4.2   Parsing Problems

+

5.2   Parsing Problems

  • How can I include a literal block that should not be in the executable code (e.g. an example, an earlier version or variant)?

    @@ -1656,7 +1681,7 @@ supports multi-line literal strings (C++, PHP, Python)

-

4.3   code syntax highlight

+

5.3   code syntax highlight

use listing package in LaTeX->PDF

in html, see

    @@ -1679,7 +1704,7 @@ to rst-text if __docformat__ diff --git a/rstdocs/examples/pylit.py.txt b/rstdocs/examples/pylit.py.txt index b05685d..b27eaef 100644 --- a/rstdocs/examples/pylit.py.txt +++ b/rstdocs/examples/pylit.py.txt @@ -38,7 +38,9 @@ Changelog new `iter_strip` method replacing a lot of ``if``-s :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile :2007-02-27: 0.3 new `Code2Text` converter after an idea by Riccardo Murri - + a new `Text2Code` will follow soon + explicite `option_defaults` dict for easier customization +:2007-03-02: 0.3.1 expand hard-tabs to prevent errors in indentation. :: """pylit: Literate programming with Python and reStructuredText @@ -51,6 +53,8 @@ Changelog __docformat__ = 'restructuredtext' + _version = "0.3" + Requirements ------------ @@ -70,6 +74,42 @@ Requirements from simplestates import SimpleStates # generic state machine +Customization +============= + +Collect option defaults in a dictionary (on module level). This facilitates +the setting of options in programmatic use :: + + option_defaults = {} + +Default language and language specific defaults:: + + option_defaults["language"] = "python" + option_defaults["comment_strings"] = {"python": '# ', + "slang": '% ', + "c++": '// ', + "elisp": ';; '} + +Recognized file extensions for text and code versions of the source. +Used to guess the language from the filename. :: + + option_defaults["code_languages"] = {".py": "python", + ".sl": "slang", + ".c": "c++", + ".el":"elisp"} + option_defaults["code_extensions"] = option_defaults["code_languages"].keys() + option_defaults["text_extensions"] = [".txt"] + +Number of spaces to indent code blocks in the code -> text conversion. [#]_ + +.. [#] For the text -> code conversion, the codeindent is determined by the + first recognized code line (leading comment or first indented literal + block of the text source). + +:: + + option_defaults["codeindent"] = 2 + Classes ======= @@ -144,20 +184,12 @@ to these functions can be used to customize the converter. Default language and language specific defaults:: - language = "python" - comment_strings = {"python": '# ', - "slang": '% ', - "c++": '// '} - -Number of spaces to indent code blocks in the code -> text conversion.[#]_ - -.. [#] For the text -> code conversion, the codeindent is determined by the - first recognized code line (leading comment or first indented literal - block of the text source). - -:: + language = option_defaults["language"] + comment_strings = option_defaults["comment_strings"] + +Number of spaces to indent code blocks in the code -> text conversion:: - codeindent = 2 + codeindent = option_defaults["codeindent"] Marker string for the first code block. (Should be a valid rst directive that accepts code on the same line, e.g. ``'.. admonition::'``.) No @@ -202,7 +234,7 @@ class defaults:: self.__dict__.update(keyw) -The comment string is set to the languages comment string if not given in +The comment string is set to the language's comment string if not given in the keyword arguments:: if not hasattr(self, "comment_string") or not self.comment_string: @@ -226,12 +258,12 @@ Return converted data as string:: Converter.get_indent ~~~~~~~~~~~~~~~~~~~~ -Return the number of leading spaces in `string` after expanding tabs :: +Return the number of leading spaces in `line` after expanding tabs :: - def get_indent(self, string): + def get_indent(self, line): """Return the indentation of `string`. """ - line = string.expandtabs() + # line = line.expandtabs() return len(line) - len(line.lstrip()) Converter.ensure_trailing_blank_line @@ -257,11 +289,13 @@ Converter.collect_blocks """collect lines in a list return list for each block of lines (paragraph) seperated by a - blank line (whitespace only) + blank line (whitespace only). + + Also expand hard-tabs as these will lead to errors in indentation. """ block = [] for line in self.data: - block.append(line) + block.append(line.expandtabs()) if not line.rstrip(): yield block block = [] @@ -532,11 +566,8 @@ The `Code2Text` class does the opposite of `Text2Code`_ -- it processes valid source code, extracts comments, and puts non-commented code in literal blocks. -Only lines starting with a comment string matching the one in the -`comment_string` data attribute are considered text lines. - -The class is derived from the PyLitConverter state machine and adds handlers -for the three states "header", "text", and "code". :: +The class is derived from the PyLitConverter state machine and adds an +`__iter__` method as well as handlers for "text", and "code" states. :: class Code2Text(PyLitConverter): """Convert code source to text source @@ -572,7 +603,7 @@ processed with the matching handler:: yield self.code_marker self.state = "code" yield getattr(self, self.state)(block) - + "header" state ~~~~~~~~~~~~~~~~ @@ -588,7 +619,7 @@ other comment or code. If we want to keep the line numbers in sync for text and code source, the reStructured Text markup for these header lines must start at the same line as the first header line. Therfore, header lines could not be marked as -literal block (this would require the "::" and an empty line above the code). +literal block (this would require the ``::`` and an empty line above the code). OTOH, a comment may start at the same line as the comment marker and it includes subsequent indented lines. Comments are visible in the reStructured @@ -639,7 +670,7 @@ the leading `comment string` from every line:: lines = [re.sub("^"+self.comment_string.rstrip(), "", line) for line in lines] - + If the code block is stripped, the literal marker would lead to an error when the text is converted with docutils. Replace it with `Code2Text.strip_literal_marker`_:: @@ -662,7 +693,7 @@ Return the text block to the calling function:: return lines - + Code2Text.code ~~~~~~~~~~~~~~ @@ -678,7 +709,7 @@ of the code indentation is controled by `self.codeindent` (default 2). return [] return [" "*self.codeindent + line for line in lines] - + Code2Text.block_is_text ~~~~~~~~~~~~~~~~~~~~~~~ @@ -835,14 +866,6 @@ arguments) This scheme allows easy customization by code importing the """Storage and handling of program options """ -Recognized file extensions for text and code versions of the source:: - - code_languages = {".py": "python", - ".sl": "slang", - ".c": "c++"} - code_extensions = code_languages.keys() - text_extensions = [".txt"] - Instantiation ~~~~~~~~~~~~~ @@ -850,12 +873,14 @@ Instantiation sets up an OptionParser and initializes it with pylit's command line options and `default_values`. It then updates the values based on command line options and sensible defaults:: - def __init__(self, args=sys.argv[1:], **default_values): + def __init__(self, args=sys.argv[1:], **keyw): """Set up an `OptionParser` instance and parse and complete arguments """ - p = optparse.OptionParser(usage=main.__doc__, version="0.2") - # set defaults - p.set_defaults(**default_values) + p = optparse.OptionParser(usage=main.__doc__, version=_version) + # set defaults (from modules option_defaults dict and keyword args) + defaults = dict(option_defaults) # copy module-level defaults + defaults.update(keyw) + p.set_defaults(**defaults) # add the options p.add_option("-c", "--code2txt", dest="txt2code", action="store_false", help="convert code to reStructured text") @@ -869,6 +894,9 @@ on command line options and sensible defaults:: help="execute code (Python only)") p.add_option("-f", "--infile", help="input file name ('-' for stdout)" ) + p.add_option("--language", action="store", + choices = option_defaults["code_languages"].values(), + help="use LANGUAGE native comment style") p.add_option("--overwrite", action="store", choices = ["yes", "update", "no"], help="overwrite output file (default 'update')") @@ -943,9 +971,9 @@ to sensible defaults (if possible). # Guess conversion direction from infile filename if values.ensure_value("txt2code", None) is None: in_extension = os.path.splitext(values.infile)[1] - if in_extension in self.text_extensions: + if in_extension in self.values.text_extensions: values.txt2code = True - elif in_extension in self.code_extensions: + elif in_extension in self.values.code_extensions: values.txt2code = False # Auto-determine the output file name values.ensure_value("outfile", self.get_outfile_name(values.infile, @@ -953,7 +981,7 @@ to sensible defaults (if possible). # Guess conversion direction from outfile filename or set to default if values.txt2code is None: out_extension = os.path.splitext(values.outfile)[1] - values.txt2code = not (out_extension in self.text_extensions) + values.txt2code = not (out_extension in self.values.text_extensions) # Set the language of the code (default "python") if values.txt2code is True: @@ -961,8 +989,7 @@ to sensible defaults (if possible). elif values.txt2code is False: code_extension = os.path.splitext(values.infile)[1] values.ensure_value("language", - self.code_languages.get(code_extension, "python")) - + self.values.code_languages.get(code_extension, "python")) # Set the default overwrite mode values.ensure_value("overwrite", 'update') @@ -993,11 +1020,11 @@ constructed from `infile` by the following rules: # if it exists? # strip text extension - if ext in self.text_extensions: + if ext in self.values.text_extensions: return base # add (first) text extension for code files - if ext in self.code_extensions or txt2code == False: - return infile + self.text_extensions[0] + if ext in self.values.code_extensions or txt2code == False: + return infile + self.values.text_extensions[0] # give up return infile + ".out" @@ -1252,7 +1279,7 @@ Rename the infile to a backup copy if ``--replace`` is set:: If not (and input and output are from files), set the modification time (`mtime`) of the output file to the one of the input file to indicate that -the contained information is equal.[#]_ :: +the contained information is equal. [#]_ :: else: try: @@ -1284,11 +1311,7 @@ Open questions and ideas for further development Options ------- -* Collect option defaults in a dictionary (on module level) - - Facilitates the setting of options in programmatic use - - Use templates for the "intelligent guesses" (with Python syntax for string +* Use templates for the "intelligent guesses" (with Python syntax for string replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``) * Is it sensible to offer the `header_string` option also as command line diff --git a/src/pylit.py b/src/pylit.py index 5a9d894..fa03d54 100755 --- a/src/pylit.py +++ b/src/pylit.py @@ -38,13 +38,12 @@ # new `iter_strip` method replacing a lot of ``if``-s # :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile # :2007-02-27: 0.3 new `Code2Text` converter after an idea by Riccardo Murri -# a new `Text2Code` will follow soon # explicite `option_defaults` dict for easier customization -# +# :2007-03-02: 0.3.1 expand hard-tabs to prevent errors in indentation. +# `Text2Code` now also works on blocks +# removed dependency on SimpleStates module # :: -_version = "0.3" - """pylit: Literate programming with Python and reStructuredText PyLit is a bidirectional converter between @@ -55,6 +54,8 @@ _version = "0.3" __docformat__ = 'restructuredtext' +_version = "0.3" + # Requirements # ------------ @@ -68,14 +69,11 @@ import os import sys import optparse -# * non-standard extensions -# -# :: - -from simplestates import SimpleStates # generic state machine - # Customization # ============= +# +# Collect option defaults in a dictionary (on module level). This facilitates +# the setting of options in programmatic use :: option_defaults = {} @@ -97,7 +95,7 @@ option_defaults["code_languages"] = {".py": "python", option_defaults["code_extensions"] = option_defaults["code_languages"].keys() option_defaults["text_extensions"] = [".txt"] -# Number of spaces to indent code blocks in the code -> text conversion.[#]_ +# Number of spaces to indent code blocks in the code -> text conversion. [#]_ # # .. [#] For the text -> code conversion, the codeindent is determined by the # first recognized code line (leading comment or first indented literal @@ -107,37 +105,9 @@ option_defaults["text_extensions"] = [".txt"] option_defaults["codeindent"] = 2 - # Classes # ======= -# -# PushIterator -# ------------ -# -# The PushIterator is a minimal implementation of an iterator with -# backtracking from the `Effective Python Programming`_ OSCON 2005 tutorial by -# Anthony Baxter. As the definition is small, it is inlined now. For the full -# reasoning and documentation see `iterqueue.py`_. -# -# .. _`Effective Python Programming`: -# http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf -# -# .. _iterqueue.py: iterqueue.py.html -# -# :: - -class PushIterator(object): - def __init__(self, iterable): - self.it = iter(iterable) - self.cache = [] - def __iter__(self): - """Return `self`, as this is already an iterator""" - return self - def next(self): - return (self.cache and self.cache.pop()) or self.it.next() - def push(self, value): - self.cache.append(value) # Converter # --------- @@ -165,7 +135,7 @@ class PushIterator(object): # overrides the ``__init__`` method, and adds auxiliary methods and # configuration attributes (options). :: -class PyLitConverter(SimpleStates): +class PyLitConverter(object): """parent class for `Text2Code` and `Code2Text`, the state machines converting between text source and code source of a literal program. """ @@ -205,11 +175,11 @@ class PyLitConverter(SimpleStates): state = 'header' -# Instantiation -# ~~~~~~~~~~~~~ +# Converter.__init__ +# ~~~~~~~~~~~~~~~~~~ # # Initializing sets up the `data` attribute, an iterable object yielding -# lines of the source to convert.[1]_ :: +# lines of the source to convert. [1]_ :: def __init__(self, data, **keyw): """data -- iterable data object @@ -217,15 +187,7 @@ class PyLitConverter(SimpleStates): **keyw -- all remaining keyword arguments are stored as class attributes """ - -# As the state handlers need backtracking, the data is wrapped in a -# `PushIterator`_ if it doesnot already have a `push` method:: - - if hasattr(data, 'push'): - self.data = data - else: - self.data = PushIterator(data) - self._textindent = 0 + self.data = data # Additional keyword arguments are stored as data attributes, overwriting the # class defaults:: @@ -243,7 +205,19 @@ class PyLitConverter(SimpleStates): # # To convert a string into a suitable object, use its splitlines method # with the optional `keepends` argument set to True. -# + +# Converter.__call__ +# ~~~~~~~~~~~~~~~~~ +# +# The special `__call__` method allows use of class instances as callable +# objects. It returns the converted data as list +# TODO: return a list of lines +# :: + + def __call__(self): + """Iterate over state-machine and return results as a list""" + return [token for token in self] + # Converter.__str__ # ~~~~~~~~~~~~~~~~~ # @@ -256,12 +230,12 @@ class PyLitConverter(SimpleStates): # Converter.get_indent # ~~~~~~~~~~~~~~~~~~~~ # -# Return the number of leading spaces in `string` after expanding tabs :: +# Return the number of leading spaces in `line` after expanding tabs :: - def get_indent(self, string): + def get_indent(self, line): """Return the indentation of `string`. """ - line = string.expandtabs() + # line = line.expandtabs() return len(line) - len(line.lstrip()) # Converter.ensure_trailing_blank_line @@ -287,11 +261,13 @@ class PyLitConverter(SimpleStates): """collect lines in a list return list for each block of lines (paragraph) seperated by a - blank line (whitespace only) + blank line (whitespace only). + + Also expand hard-tabs as these will lead to errors in indentation. """ block = [] for line in self.data: - block.append(line) + block.append(line.expandtabs()) if not line.rstrip(): yield block block = [] @@ -325,24 +301,48 @@ class Text2Code(PyLitConverter): """Convert a (reStructured) text source to code source """ -# INIT: call the parent classes init method. +# Text2Code.__iter__ +# ~~~~~~~~~~~~~~~~~~ # -# If the `strip` argument is true, replace the `__iter_` method -# with a special one that drops "spurious" blocks:: +# Data is collected into "blocks" separated by blank lines. The state is set +# by the `set_state` method based on markers or indentation in the block. +# :: - def __init__(self, data, **keyw): - PyLitConverter.__init__(self, data, **keyw) - if getattr(self, "strip", False): - self.__iter__ = self.iter_strip + def __iter__(self): + """Iterate over text source and return lists of code-source lines""" -# Text2Code.header -# ~~~~~~~~~~~~~~~~ -# -# Convert the header (leading rst comment block) to code:: +# At start, the check for "text" or "code" needs to check for the +# `header_string`:: +# + self.set_state = self.header_test - def header(self): - """Convert header (comment) to code""" - line = self.data_iterator.next() +# indent of first non-blank code line, set in `code` method + + self.codeindent = None + +# text indent level (needed by the code handler to find the +# end of code block):: + + self._textindent = 0 + +# The "code" to "text" state transition is detected in the first non-code +# block. `header_test` will set `set_state` to `code_test` which checks the +# indentation. +# +# The "text" to "code" state transition is codified in the preceding "text" +# block. This is why the "end-of-text" test is performed inside the `text` +# state handler. + + for block in self.collect_blocks(): + if self.state != "text": + self.state = self.set_state(block) + yield getattr(self, self.state)(block) + + + +# Text2Code.header_test +# ~~~~~~~~~~~~~~~~~~~~~ +# # Test first line for rst comment: (We need to do this explicitely here, as # the code handler will only recognize the start of a text block if a line @@ -358,165 +358,118 @@ class Text2Code(PyLitConverter): # 2. Convert any leading comment to code:: - if line.startswith(self.header_string): - -# Strip leading comment string (typically added by `Code2Text.header`) and -# return the result of processing the data with the code handler:: - - self.data_iterator.push(line.replace(self.header_string, "", 1)) - return self.code() + def header_test(self, lines): + """Return whether the header block is "text" or "code". + + Strip `self.header_string` if present.""" + + # from now, do the normal code-block test + self.set_state = self.code_test -# No header code found: Push back first non-header line and set state to -# "text":: + if lines[0].startswith(self.header_string): + lines[0] = lines[0].replace(self.header_string, "", 1) + return "code" + return "text" - self.data_iterator.push(line) - self.state = 'text' - return [] +# Code2Text.code_test -# Text2Code.text_handler_generator -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Test for end of code block, return next state. Also check if there are +# lines less indented as `codeindent` -- which would lead to data loss by the +# unindent done by the `code` method. # -# The 'text' handler processes everything that is not an indented literal -# comment. Text is quoted with `self.comment_string` or filtered (with -# strip=True). +# A literal block ends with the first less indented, nonblank line. +# `self._textindent` is set by the text handler to the indent of the +# preceding paragraph. # -# It is implemented as a generator function that acts on the `data` iterator -# and yields text blocks. +# :: + + def code_test(self, block): + """test code block for end of "code" state, return next state + """ + indents = [self.get_indent(line) for line in block] + if min(indents) <= self._textindent: + return 'text' + return 'code' + +# TODO: insert blank line before the first line with too-small codeindent? +# self.ensure_trailing_blank_line(lines, line) + + +# Text2Code.text +# ~~~~~~~~~~~~~~ # -# Declaration and initialization:: +# The 'text' handler processes everything that is not an indented literal +# comment. Text is quoted with `self.comment_string` or filtered (with +# strip=True). :: - def text_handler_generator(self): + def text(self, lines): """Convert text blocks from rst to comment """ - lines = [] -# Iterate over the data_iterator (which yields the data lines):: - - for line in self.data_iterator: - # print "Text: '%s'"%line - -# Default action: add comment string and collect in `lines` list:: - - lines.append(self.comment_string + line) + lines = [self.comment_string + line for line in lines] -# Test for the end of the text block: a line that ends with `::` but is neither -# a comment nor a directive:: +# Test for the end of the text block: does the second last line end with +# `::` but is neither a comment nor a directive? +# TODO: allow different code marking directives (for syntax color etc) +# :: - if (line.rstrip().endswith("::") - and not line.lstrip().startswith("..")): - -# End of text block is detected, now: -# + try: + line = lines[-2] + except IndexError: # len(lines < 2) + line = "" + if (line.rstrip().endswith("::") + and not line.lstrip().startswith("..")): + self.state = "code" + # set the current text indent level (needed by the code handler to find the -# end of code block) and set the state to "code" (i.e. the next call of -# `self.next` goes to the code handler):: +# end of code block):: - self._textindent = self.get_indent(line) - self.state = 'code' - -# Ensure a trailing blank line (which is the paragraph separator in -# reStructured Text. Look at the next line, if it is blank -- OK, if it is -# not blank, push it back (it should be code) and add a line by calling the -# `ensure_trailing_blank_line` method (which also issues a warning):: + self._textindent = self.get_indent(line) - line = self.data_iterator.next() - if line.lstrip(): - self.data_iterator.push(line) # push back - self.ensure_trailing_blank_line(lines, line) - else: - lines.append(line) - -# Now yield and reset the lines. (There was a function call to remove a -# literal marker (if on a line on itself) to shorten the comment. However, -# this behaviour was removed as the resulting difference in line numbers leads -# to misleading error messages in doctests):: - - #remove_literal_marker(lines) - yield lines - lines = [] - -# End of data: if we "fall of" the iteration loop, just join and return the -# lines:: +# remove the comment from the last line again (it's a separator between text +# and code blocks). - yield lines + lines[-1] = lines[-1].replace(self.comment_string, "", 1) + if self.strip: + return [] + return lines + +# TODO: Ensure a trailing blank line? Would need to test all +# text lines for end-of-text marker and add a line by calling the +# `ensure_trailing_blank_line` method (which also issues a warning):: -# Text2Code.code_handler_generator -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# -# The `code` handler is called when a literal block marker is encounterd. It -# returns a code block (indented literal block), removing leading whitespace -# up to the indentation of the first code line in the file (this deviation -# from docutils behaviour allows indented blocks of Python code). + + +# Text2Code.code +# ~~~~~~~~~~~~~~ # -# As the code handler detects the switch to "text" state by looking at -# the line indents, it needs to push back the last probed data token. I.e. -# the data_iterator must support a `push` method. (This is the -# reason for the use of the PushIterator class in `__init__`.) :: +# The `code` handler is called with an indented literal block. It removes +# leading whitespace up to the indentation of the first code line in the file +# (this deviation from docutils behaviour allows indented blocks of Python +# code). :: - def code_handler_generator(self): + def code(self, block): """Convert indented literal blocks to source code """ - lines = [] - codeindent = None # indent of first non-blank code line, set below - indent_string = "" # leading whitespace chars ... -# Iterate over the lines in the input data:: - - for line in self.data_iterator: - # print "Code: '%s'"%line - -# Pass on blank lines (no test for end of code block needed|possible):: +# If still unset, determine the code indentation from first non-blank code +# line:: - if not line.rstrip(): - lines.append(line.replace(indent_string, "", 1)) - continue + if self.codeindent is None: + self.codeindent = self.get_indent(block[0]) -# Test for end of code block: -# -# A literal block ends with the first less indented, nonblank line. -# `self._textindent` is set by the text handler to the indent of the -# preceding paragraph. -# -# To prevent problems with different tabulator settings, hard tabs in code -# lines are expanded with the `expandtabs` string method when calculating the -# indentation (i.e. replaced by 8 spaces, by default). -# -# :: - - if self.get_indent(line) <= self._textindent: - # push back line - self.data_iterator.push(line) - self.state = 'text' - # append blank line (if not already present) - self.ensure_trailing_blank_line(lines, line) - yield lines - # reset list of lines - lines = [] - continue +# Check if we can safely unindent the code block:: -# OK, we are sure now that the current line is neither blank nor a text line. -# -# If still unset, determine the code indentation from first non-blank code -# line:: + for line in block: + if line.lstrip() and self.get_indent(line) < self.codeindent: + raise ValueError, "code block contains line less indented " \ + "than %d spaces \n%r"%(self.codeindent, block) - if codeindent is None and line.lstrip(): - codeindent = self.get_indent(line) - indent_string = line[:codeindent] - -# Append unindented line to lines cache (but check if we can safely unindent -# first):: +# return unindented block:: - if not line.startswith(indent_string): - raise ValueError, "cannot unindent line %r,\n"%line \ - + " doesnot start with code indent string %r"%indent_string - - lines.append(line[codeindent:]) + return [line.replace(" "*self.codeindent, "", 1) for line in block] -# No more lines in the input data: just return what we have:: - - yield lines - # Txt2Code.remove_literal_marker # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -525,7 +478,9 @@ class Text2Code(PyLitConverter): # # While cleaning up the code source, it leads to confusion for doctest and # searches (e.g. grep) as line-numbers between text and code source will -# differ. :: +# differ. +# The code is left here, as it can be used for conversion of +# a literal marker to a different code-marker:: def remove_literal_marker(list): try: @@ -536,24 +491,6 @@ class Text2Code(PyLitConverter): except IndexError: pass -# Text2Code.iter_strip -# ~~~~~~~~~~~~~~~~~~~~ -# -# Modification of the `simplestates.__iter__` method that will replace it when -# the `strip` keyword argument is `True` during class instantiation: -# -# Iterate over class instances dropping text blocks:: - - def iter_strip(self): - """Generate and return an iterator dropping text blocks - """ - self.data_iterator = self.data - self._initialize_state_generators() - while True: - yield getattr(self, self.state)() - getattr(self, self.state)() # drop text block - - # Code2Text # --------- @@ -689,7 +626,7 @@ class Code2Text(PyLitConverter): return lines - + # Code2Text.code # ~~~~~~~~~~~~~~ # @@ -1275,7 +1212,7 @@ def main(args=sys.argv[1:], **option_defaults): # If not (and input and output are from files), set the modification time # (`mtime`) of the output file to the one of the input file to indicate that -# the contained information is equal.[#]_ :: +# the contained information is equal. [#]_ :: else: try: @@ -1307,11 +1244,7 @@ if __name__ == '__main__': # Options # ------- # -# * Collect option defaults in a dictionary (on module level) -# -# Facilitates the setting of options in programmatic use -# -# Use templates for the "intelligent guesses" (with Python syntax for string +# * Use templates for the "intelligent guesses" (with Python syntax for string # replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``) # # * Is it sensible to offer the `header_string` option also as command line diff --git a/src/simplestates.py b/src/simplestates.py index 719fc67..8fd652f 100644 --- a/src/simplestates.py +++ b/src/simplestates.py @@ -115,7 +115,7 @@ class SimpleStates: # * remaining keyword arguments are stored as class attributes (or methods, if # they are function objects) overwriting class defaults (a neat little trick # I found somewhere on the net) -# +# # ..note: This is the same as `self.__dict__.update(keyw)`. However, # the "Tutorial" advises to confine the direct use of `__dict__` # to post-mortem analysis or the like... @@ -133,8 +133,8 @@ class SimpleStates: setattr(self, key, value) -# -# + + # Iteration over class instances # ------------------------------ diff --git a/test/pylit_test.py b/test/pylit_test.py index 33ebd9d..b37587e 100644 --- a/test/pylit_test.py +++ b/test/pylit_test.py @@ -179,7 +179,7 @@ def test_Text2Code_malindented_code_line(): data1 = [".. #!/usr/bin/env python\n", # indent == 4 * " " "\n", " print 'hello world'"] # indent == 2 * " " - data2 = ["..\t#!/usr/bin/env python\n", # indent == 4 * " " + data2 = ["..\t#!/usr/bin/env python\n", # indent == 8 * " " "\n", " print 'hello world'"] # indent == 2 * " " for data in (data1, data2): @@ -204,14 +204,15 @@ def test_Text2Code_malindented_code_line(): ## Assuming that no double colon at end of line occures accidentially, ## pylit will fix this and issue a warning:: -textsamples["ensure blank line after text"] = ( -"""text followed by a literal block:: - block1 = 'first block' -""", -"""# text followed by a literal block:: - -block1 = 'first block' -""") +# Do we need this feature? (Complicates code a lot) +# textsamples["ensure blank line after text"] = ( +# """text followed by a literal block:: +# block1 = 'first block' +# """, +# """# text followed by a literal block:: +# +# block1 = 'first block' +# """) ## Text follows code block without blank line ## '''''''''''''''''''''''''''''''''''''''''' @@ -225,18 +226,19 @@ block1 = 'first block' ## Assuming that the unindent is not accidential, pylit fixes this and issues a ## warning:: -textsamples["ensure blank line after code"] = ( -""":: - - block1 = 'first block' -more text -""", -"""# :: - -block1 = 'first block' +# Do we need this feature? (Complicates code a lot) +# textsamples["ensure blank line after code"] = ( +# """:: +# +# block1 = 'first block' +# more text +# """, +# """# :: +# +# block1 = 'first block' # more text -""") +# """) ## A double colon on a line on its own ## ''''''''''''''''''''''''''''''''''' -- 2.11.4.GIT
[4]Make sure the corresponding file object (here out_stream) is +
[4]Make sure the corresponding file object (here out_stream) is closed, as otherwise the change will be overwritten when close is called afterwards (either explicitely or at program exit).