From 48a11456a885f484c9937242baa99c4ab64d798a Mon Sep 17 00:00:00 2001
From: milde <milde@fb71aa59-6827-0410-b536-ee2229a4f8e3>
Date: Fri, 2 Mar 2007 17:22:48 +0000
Subject: [PATCH] pylit.py       version 0.3.1 expand hard-tabs to prevent
 errors in indentation.                              `Text2Code` now also
 works on blocks                              removed dependency on
 SimpleStates module

pylit_test.py: adapt to version 0.3.1: skip "insert empty line after..." tests

simplestates.py: cosmetic changes due to a pylit round trip



git-svn-id: http://svn.berlios.de/svnroot/repos/pylit/trunk@23 fb71aa59-6827-0410-b536-ee2229a4f8e3
---
 rstdocs/examples/pylit.py.html | 309 +++++++++++++++++---------------
 rstdocs/examples/pylit.py.txt  | 135 ++++++++------
 src/pylit.py                   | 389 +++++++++++++++++------------------------
 src/simplestates.py            |   6 +-
 test/pylit_test.py             |  40 +++--
 5 files changed, 431 insertions(+), 448 deletions(-)
diff --git a/rstdocs/examples/pylit.py.html b/rstdocs/examples/pylit.py.html
index 2640721..f60b0a1 100644
--- a/rstdocs/examples/pylit.py.html
+++ b/rstdocs/examples/pylit.py.html
@@ -311,85 +311,86 @@ Released under the terms of the GNU General Public License
 <div class="contents topic">
 <p class="topic-title first"><a id="contents" name="contents">Contents</a></p>
 <ul class="auto-toc simple">
-<li><a class="reference" href="#frontmatter" id="id7" name="id7">1&nbsp;&nbsp;&nbsp;Frontmatter</a><ul class="auto-toc">
-<li><a class="reference" href="#changelog" id="id8" name="id8">1.1&nbsp;&nbsp;&nbsp;Changelog</a></li>
-<li><a class="reference" href="#requirements" id="id9" name="id9">1.2&nbsp;&nbsp;&nbsp;Requirements</a></li>
+<li><a class="reference" href="#frontmatter" id="id8" name="id8">1&nbsp;&nbsp;&nbsp;Frontmatter</a><ul class="auto-toc">
+<li><a class="reference" href="#changelog" id="id9" name="id9">1.1&nbsp;&nbsp;&nbsp;Changelog</a></li>
+<li><a class="reference" href="#requirements" id="id10" name="id10">1.2&nbsp;&nbsp;&nbsp;Requirements</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#classes" id="id10" name="id10">2&nbsp;&nbsp;&nbsp;Classes</a><ul class="auto-toc">
-<li><a class="reference" href="#pushiterator" id="id11" name="id11">2.1&nbsp;&nbsp;&nbsp;PushIterator</a></li>
-<li><a class="reference" href="#converter" id="id12" name="id12">2.2&nbsp;&nbsp;&nbsp;Converter</a><ul class="auto-toc">
-<li><a class="reference" href="#data-attributes" id="id13" name="id13">2.2.1&nbsp;&nbsp;&nbsp;Data attributes</a></li>
-<li><a class="reference" href="#instantiation" id="id14" name="id14">2.2.2&nbsp;&nbsp;&nbsp;Instantiation</a></li>
-<li><a class="reference" href="#converter-str" id="id15" name="id15">2.2.3&nbsp;&nbsp;&nbsp;Converter.__str__</a></li>
-<li><a class="reference" href="#converter-get-indent" id="id16" name="id16">2.2.4&nbsp;&nbsp;&nbsp;Converter.get_indent</a></li>
-<li><a class="reference" href="#converter-ensure-trailing-blank-line" id="id17" name="id17">2.2.5&nbsp;&nbsp;&nbsp;Converter.ensure_trailing_blank_line</a></li>
-<li><a class="reference" href="#converter-collect-blocks" id="id18" name="id18">2.2.6&nbsp;&nbsp;&nbsp;Converter.collect_blocks</a></li>
+<li><a class="reference" href="#customization" id="id11" name="id11">2&nbsp;&nbsp;&nbsp;Customization</a></li>
+<li><a class="reference" href="#classes" id="id12" name="id12">3&nbsp;&nbsp;&nbsp;Classes</a><ul class="auto-toc">
+<li><a class="reference" href="#pushiterator" id="id13" name="id13">3.1&nbsp;&nbsp;&nbsp;PushIterator</a></li>
+<li><a class="reference" href="#converter" id="id14" name="id14">3.2&nbsp;&nbsp;&nbsp;Converter</a><ul class="auto-toc">
+<li><a class="reference" href="#data-attributes" id="id15" name="id15">3.2.1&nbsp;&nbsp;&nbsp;Data attributes</a></li>
+<li><a class="reference" href="#instantiation" id="id16" name="id16">3.2.2&nbsp;&nbsp;&nbsp;Instantiation</a></li>
+<li><a class="reference" href="#converter-str" id="id17" name="id17">3.2.3&nbsp;&nbsp;&nbsp;Converter.__str__</a></li>
+<li><a class="reference" href="#converter-get-indent" id="id18" name="id18">3.2.4&nbsp;&nbsp;&nbsp;Converter.get_indent</a></li>
+<li><a class="reference" href="#converter-ensure-trailing-blank-line" id="id19" name="id19">3.2.5&nbsp;&nbsp;&nbsp;Converter.ensure_trailing_blank_line</a></li>
+<li><a class="reference" href="#converter-collect-blocks" id="id20" name="id20">3.2.6&nbsp;&nbsp;&nbsp;Converter.collect_blocks</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#text2code" id="id19" name="id19">2.3&nbsp;&nbsp;&nbsp;Text2Code</a><ul class="auto-toc">
-<li><a class="reference" href="#text2code-header" id="id20" name="id20">2.3.1&nbsp;&nbsp;&nbsp;Text2Code.header</a></li>
-<li><a class="reference" href="#text2code-text-handler-generator" id="id21" name="id21">2.3.2&nbsp;&nbsp;&nbsp;Text2Code.text_handler_generator</a></li>
-<li><a class="reference" href="#text2code-code-handler-generator" id="id22" name="id22">2.3.3&nbsp;&nbsp;&nbsp;Text2Code.code_handler_generator</a></li>
-<li><a class="reference" href="#txt2code-remove-literal-marker" id="id23" name="id23">2.3.4&nbsp;&nbsp;&nbsp;Txt2Code.remove_literal_marker</a></li>
-<li><a class="reference" href="#text2code-iter-strip" id="id24" name="id24">2.3.5&nbsp;&nbsp;&nbsp;Text2Code.iter_strip</a></li>
+<li><a class="reference" href="#text2code" id="id21" name="id21">3.3&nbsp;&nbsp;&nbsp;Text2Code</a><ul class="auto-toc">
+<li><a class="reference" href="#text2code-header" id="id22" name="id22">3.3.1&nbsp;&nbsp;&nbsp;Text2Code.header</a></li>
+<li><a class="reference" href="#text2code-text-handler-generator" id="id23" name="id23">3.3.2&nbsp;&nbsp;&nbsp;Text2Code.text_handler_generator</a></li>
+<li><a class="reference" href="#text2code-code-handler-generator" id="id24" name="id24">3.3.3&nbsp;&nbsp;&nbsp;Text2Code.code_handler_generator</a></li>
+<li><a class="reference" href="#txt2code-remove-literal-marker" id="id25" name="id25">3.3.4&nbsp;&nbsp;&nbsp;Txt2Code.remove_literal_marker</a></li>
+<li><a class="reference" href="#text2code-iter-strip" id="id26" name="id26">3.3.5&nbsp;&nbsp;&nbsp;Text2Code.iter_strip</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#code2text" id="id25" name="id25">2.4&nbsp;&nbsp;&nbsp;Code2Text</a><ul class="auto-toc">
-<li><a class="reference" href="#code2text-iter" id="id26" name="id26">2.4.1&nbsp;&nbsp;&nbsp;Code2Text.__iter__</a></li>
-<li><a class="reference" href="#header-state" id="id27" name="id27">2.4.2&nbsp;&nbsp;&nbsp;&quot;header&quot; state</a></li>
-<li><a class="reference" href="#code2text-text" id="id28" name="id28">2.4.3&nbsp;&nbsp;&nbsp;Code2Text.text</a></li>
-<li><a class="reference" href="#code2text-code" id="id29" name="id29">2.4.4&nbsp;&nbsp;&nbsp;Code2Text.code</a></li>
-<li><a class="reference" href="#code2text-block-is-text" id="id30" name="id30">2.4.5&nbsp;&nbsp;&nbsp;Code2Text.block_is_text</a></li>
-<li><a class="reference" href="#code2text-strip-literal-marker" id="id31" name="id31">2.4.6&nbsp;&nbsp;&nbsp;Code2Text.strip_literal_marker</a></li>
+<li><a class="reference" href="#code2text" id="id27" name="id27">3.4&nbsp;&nbsp;&nbsp;Code2Text</a><ul class="auto-toc">
+<li><a class="reference" href="#code2text-iter" id="id28" name="id28">3.4.1&nbsp;&nbsp;&nbsp;Code2Text.__iter__</a></li>
+<li><a class="reference" href="#header-state" id="id29" name="id29">3.4.2&nbsp;&nbsp;&nbsp;&quot;header&quot; state</a></li>
+<li><a class="reference" href="#code2text-text" id="id30" name="id30">3.4.3&nbsp;&nbsp;&nbsp;Code2Text.text</a></li>
+<li><a class="reference" href="#code2text-code" id="id31" name="id31">3.4.4&nbsp;&nbsp;&nbsp;Code2Text.code</a></li>
+<li><a class="reference" href="#code2text-block-is-text" id="id32" name="id32">3.4.5&nbsp;&nbsp;&nbsp;Code2Text.block_is_text</a></li>
+<li><a class="reference" href="#code2text-strip-literal-marker" id="id33" name="id33">3.4.6&nbsp;&nbsp;&nbsp;Code2Text.strip_literal_marker</a></li>
 </ul>
 </li>
 </ul>
 </li>
-<li><a class="reference" href="#command-line-use" id="id32" name="id32">3&nbsp;&nbsp;&nbsp;Command line use</a><ul class="auto-toc">
-<li><a class="reference" href="#dual-source-handling" id="id33" name="id33">3.1&nbsp;&nbsp;&nbsp;Dual source handling</a><ul class="auto-toc">
-<li><a class="reference" href="#how-to-determine-which-source-is-up-to-date" id="id34" name="id34">3.1.1&nbsp;&nbsp;&nbsp;How to determine which source is up-to-date?</a></li>
-<li><a class="reference" href="#recognised-filename-extensions" id="id35" name="id35">3.1.2&nbsp;&nbsp;&nbsp;Recognised Filename Extensions</a></li>
+<li><a class="reference" href="#command-line-use" id="id34" name="id34">4&nbsp;&nbsp;&nbsp;Command line use</a><ul class="auto-toc">
+<li><a class="reference" href="#dual-source-handling" id="id35" name="id35">4.1&nbsp;&nbsp;&nbsp;Dual source handling</a><ul class="auto-toc">
+<li><a class="reference" href="#how-to-determine-which-source-is-up-to-date" id="id36" name="id36">4.1.1&nbsp;&nbsp;&nbsp;How to determine which source is up-to-date?</a></li>
+<li><a class="reference" href="#recognised-filename-extensions" id="id37" name="id37">4.1.2&nbsp;&nbsp;&nbsp;Recognised Filename Extensions</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#optionvalues" id="id36" name="id36">3.2&nbsp;&nbsp;&nbsp;OptionValues</a></li>
-<li><a class="reference" href="#pylitoptions" id="id37" name="id37">3.3&nbsp;&nbsp;&nbsp;PylitOptions</a><ul class="auto-toc">
-<li><a class="reference" href="#id5" id="id38" name="id38">3.3.1&nbsp;&nbsp;&nbsp;Instantiation</a></li>
-<li><a class="reference" href="#calling" id="id39" name="id39">3.3.2&nbsp;&nbsp;&nbsp;Calling</a></li>
-<li><a class="reference" href="#pylitoptions-parse-args" id="id40" name="id40">3.3.3&nbsp;&nbsp;&nbsp;PylitOptions.parse_args</a></li>
-<li><a class="reference" href="#pylitoptions-complete-values" id="id41" name="id41">3.3.4&nbsp;&nbsp;&nbsp;PylitOptions.complete_values</a></li>
-<li><a class="reference" href="#pylitoptions-get-outfile-name" id="id42" name="id42">3.3.5&nbsp;&nbsp;&nbsp;PylitOptions.get_outfile_name</a></li>
+<li><a class="reference" href="#optionvalues" id="id38" name="id38">4.2&nbsp;&nbsp;&nbsp;OptionValues</a></li>
+<li><a class="reference" href="#pylitoptions" id="id39" name="id39">4.3&nbsp;&nbsp;&nbsp;PylitOptions</a><ul class="auto-toc">
+<li><a class="reference" href="#id5" id="id40" name="id40">4.3.1&nbsp;&nbsp;&nbsp;Instantiation</a></li>
+<li><a class="reference" href="#calling" id="id41" name="id41">4.3.2&nbsp;&nbsp;&nbsp;Calling</a></li>
+<li><a class="reference" href="#pylitoptions-parse-args" id="id42" name="id42">4.3.3&nbsp;&nbsp;&nbsp;PylitOptions.parse_args</a></li>
+<li><a class="reference" href="#pylitoptions-complete-values" id="id43" name="id43">4.3.4&nbsp;&nbsp;&nbsp;PylitOptions.complete_values</a></li>
+<li><a class="reference" href="#pylitoptions-get-outfile-name" id="id44" name="id44">4.3.5&nbsp;&nbsp;&nbsp;PylitOptions.get_outfile_name</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#helper-functions" id="id43" name="id43">3.4&nbsp;&nbsp;&nbsp;Helper functions</a><ul class="auto-toc">
-<li><a class="reference" href="#open-streams" id="id44" name="id44">3.4.1&nbsp;&nbsp;&nbsp;open_streams</a></li>
-<li><a class="reference" href="#is-newer" id="id45" name="id45">3.4.2&nbsp;&nbsp;&nbsp;is_newer</a></li>
-<li><a class="reference" href="#get-converter" id="id46" name="id46">3.4.3&nbsp;&nbsp;&nbsp;get_converter</a></li>
+<li><a class="reference" href="#helper-functions" id="id45" name="id45">4.4&nbsp;&nbsp;&nbsp;Helper functions</a><ul class="auto-toc">
+<li><a class="reference" href="#open-streams" id="id46" name="id46">4.4.1&nbsp;&nbsp;&nbsp;open_streams</a></li>
+<li><a class="reference" href="#is-newer" id="id47" name="id47">4.4.2&nbsp;&nbsp;&nbsp;is_newer</a></li>
+<li><a class="reference" href="#get-converter" id="id48" name="id48">4.4.3&nbsp;&nbsp;&nbsp;get_converter</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#use-cases" id="id47" name="id47">3.5&nbsp;&nbsp;&nbsp;Use cases</a><ul class="auto-toc">
-<li><a class="reference" href="#run-doctest" id="id48" name="id48">3.5.1&nbsp;&nbsp;&nbsp;run_doctest</a></li>
-<li><a class="reference" href="#diff" id="id49" name="id49">3.5.2&nbsp;&nbsp;&nbsp;diff</a></li>
+<li><a class="reference" href="#use-cases" id="id49" name="id49">4.5&nbsp;&nbsp;&nbsp;Use cases</a><ul class="auto-toc">
+<li><a class="reference" href="#run-doctest" id="id50" name="id50">4.5.1&nbsp;&nbsp;&nbsp;run_doctest</a></li>
+<li><a class="reference" href="#diff" id="id51" name="id51">4.5.2&nbsp;&nbsp;&nbsp;diff</a></li>
 </ul>
 </li>
-<li><a class="reference" href="#main" id="id50" name="id50">3.6&nbsp;&nbsp;&nbsp;main</a><ul class="auto-toc">
-<li><a class="reference" href="#customization" id="id51" name="id51">3.6.1&nbsp;&nbsp;&nbsp;Customization</a></li>
+<li><a class="reference" href="#main" id="id52" name="id52">4.6&nbsp;&nbsp;&nbsp;main</a><ul class="auto-toc">
+<li><a class="reference" href="#id6" id="id53" name="id53">4.6.1&nbsp;&nbsp;&nbsp;Customization</a></li>
 </ul>
 </li>
 </ul>
 </li>
-<li><a class="reference" href="#open-questions" id="id52" name="id52">4&nbsp;&nbsp;&nbsp;Open questions</a><ul class="auto-toc">
-<li><a class="reference" href="#options" id="id53" name="id53">4.1&nbsp;&nbsp;&nbsp;Options</a></li>
-<li><a class="reference" href="#parsing-problems" id="id54" name="id54">4.2&nbsp;&nbsp;&nbsp;Parsing Problems</a></li>
-<li><a class="reference" href="#code-syntax-highlight" id="id55" name="id55">4.3&nbsp;&nbsp;&nbsp;code syntax highlight</a></li>
+<li><a class="reference" href="#open-questions" id="id54" name="id54">5&nbsp;&nbsp;&nbsp;Open questions</a><ul class="auto-toc">
+<li><a class="reference" href="#options" id="id55" name="id55">5.1&nbsp;&nbsp;&nbsp;Options</a></li>
+<li><a class="reference" href="#parsing-problems" id="id56" name="id56">5.2&nbsp;&nbsp;&nbsp;Parsing Problems</a></li>
+<li><a class="reference" href="#code-syntax-highlight" id="id57" name="id57">5.3&nbsp;&nbsp;&nbsp;code syntax highlight</a></li>
 </ul>
 </li>
 </ul>
 </div>
 <div class="section">
-<h1><a class="toc-backref" href="#id7" id="frontmatter" name="frontmatter">1&nbsp;&nbsp;&nbsp;Frontmatter</a></h1>
+<h1><a class="toc-backref" href="#id8" id="frontmatter" name="frontmatter">1&nbsp;&nbsp;&nbsp;Frontmatter</a></h1>
 <div class="section">
-<h2><a class="toc-backref" href="#id8" id="changelog" name="changelog">1.1&nbsp;&nbsp;&nbsp;Changelog</a></h2>
+<h2><a class="toc-backref" href="#id9" id="changelog" name="changelog">1.1&nbsp;&nbsp;&nbsp;Changelog</a></h2>
 <table class="docutils field-list" frame="void" rules="none">
 <col class="field-name" />
 <col class="field-body" />
@@ -426,7 +427,9 @@ new <cite>iter_strip</cite> method replacing a lot of <tt class="docutils litera
 </tr>
 <tr class="field"><th class="field-name">2007-02-22:</th><td class="field-body">0.2.8 set <cite>mtime</cite> of outfile to the one of infile</td>
 </tr>
-<tr class="field"><th class="field-name">2007-02-27:</th><td class="field-body">0.3   new <cite>Code2Text</cite> converter after an idea by Riccardo Murri</td>
+<tr class="field"><th class="field-name">2007-02-27:</th><td class="field-body">0.3   new <cite>Code2Text</cite> converter after an idea by Riccardo Murri
+a new <cite>Text2Code</cite> will follow soon
+explicite <cite>option_defaults</cite> dict for easier customization</td>
 </tr>
 </tbody>
 </table>
@@ -440,10 +443,12 @@ new <cite>iter_strip</cite> method replacing a lot of <tt class="docutils litera
 &quot;&quot;&quot;
 
 __docformat__ = 'restructuredtext'
+
+_version = &quot;0.3&quot;
 </pre>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id9" id="requirements" name="requirements">1.2&nbsp;&nbsp;&nbsp;Requirements</a></h2>
+<h2><a class="toc-backref" href="#id10" id="requirements" name="requirements">1.2&nbsp;&nbsp;&nbsp;Requirements</a></h2>
 <ul class="simple">
 <li>library modules</li>
 </ul>
@@ -462,9 +467,45 @@ from simplestates import SimpleStates  # generic state machine
 </div>
 </div>
 <div class="section">
-<h1><a class="toc-backref" href="#id10" id="classes" name="classes">2&nbsp;&nbsp;&nbsp;Classes</a></h1>
+<h1><a class="toc-backref" href="#id11" id="customization" name="customization">2&nbsp;&nbsp;&nbsp;Customization</a></h1>
+<pre class="literal-block">
+option_defaults = {}
+</pre>
+<p>Default language and language specific defaults:</p>
+<pre class="literal-block">
+option_defaults[&quot;language&quot;] =        &quot;python&quot;
+option_defaults[&quot;comment_strings&quot;] = {&quot;python&quot;: '# ',
+                                      &quot;slang&quot;:  '% ',
+                                      &quot;c++&quot;:    '// ',
+                                      &quot;elisp&quot;:  ';; '}
+</pre>
+<p>Recognized file extensions for text and code versions of the source.
+Used to guess the language from the filename.</p>
+<pre class="literal-block">
+option_defaults[&quot;code_languages&quot;]  = {&quot;.py&quot;: &quot;python&quot;,
+                                      &quot;.sl&quot;: &quot;slang&quot;,
+                                      &quot;.c&quot;: &quot;c++&quot;,
+                                      &quot;.el&quot;:&quot;elisp&quot;}
+option_defaults[&quot;code_extensions&quot;] = option_defaults[&quot;code_languages&quot;].keys()
+option_defaults[&quot;text_extensions&quot;] = [&quot;.txt&quot;]
+</pre>
+<p>Number of spaces to indent code blocks in the code -&gt; text conversion.[#]_</p>
+<table class="docutils footnote" frame="void" id="id1" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id3" name="id1">[2]</a></td><td>For the text -&gt; code conversion, the codeindent is determined by the
+first recognized code line (leading comment or first indented literal
+block of the text source).</td></tr>
+</tbody>
+</table>
+<pre class="literal-block">
+option_defaults[&quot;codeindent&quot;] =  2
+</pre>
+</div>
+<div class="section">
+<h1><a class="toc-backref" href="#id12" id="classes" name="classes">3&nbsp;&nbsp;&nbsp;Classes</a></h1>
 <div class="section">
-<h2><a class="toc-backref" href="#id11" id="pushiterator" name="pushiterator">2.1&nbsp;&nbsp;&nbsp;PushIterator</a></h2>
+<h2><a class="toc-backref" href="#id13" id="pushiterator" name="pushiterator">3.1&nbsp;&nbsp;&nbsp;PushIterator</a></h2>
 <p>The PushIterator is a minimal implementation of an iterator with
 backtracking from the <a class="reference" href="http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf">Effective Python Programming</a> OSCON 2005 tutorial by
 Anthony&nbsp;Baxter. As the definition is small, it is inlined now. For the full
@@ -484,7 +525,7 @@ class PushIterator(object):
 </pre>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id12" id="converter" name="converter">2.2&nbsp;&nbsp;&nbsp;Converter</a></h2>
+<h2><a class="toc-backref" href="#id14" id="converter" name="converter">3.2&nbsp;&nbsp;&nbsp;Converter</a></h2>
 <p>The converter classes implement a simple <cite>state machine</cite> to separate and
 transform text and code blocks. For this task, only a very limited parsing
 is needed.  Using the full blown <a class="reference" href="http://docutils.sourceforge.net/">docutils</a> rst parser would introduce a
@@ -509,7 +550,7 @@ class PyLitConverter(SimpleStates):
     &quot;&quot;&quot;
 </pre>
 <div class="section">
-<h3><a class="toc-backref" href="#id13" id="data-attributes" name="data-attributes">2.2.1&nbsp;&nbsp;&nbsp;Data attributes</a></h3>
+<h3><a class="toc-backref" href="#id15" id="data-attributes" name="data-attributes">3.2.1&nbsp;&nbsp;&nbsp;Data attributes</a></h3>
 <p>The data attributes are class default values. They will be overridden by
 matching keyword arguments during class instantiation.</p>
 <p><a class="reference" href="#get-converter">get_converter</a> and <a class="reference" href="#main">main</a> pass on unused keyword arguments to
@@ -517,22 +558,12 @@ the instantiation of a converter class. This way, keyword arguments
 to these functions can be used to customize the converter.</p>
 <p>Default language and language specific defaults:</p>
 <pre class="literal-block">
-language =        &quot;python&quot;
-comment_strings = {&quot;python&quot;: '# ',
-                   &quot;slang&quot;: '% ',
-                   &quot;c++&quot;: '// '}
+language = option_defaults[&quot;language&quot;]
+comment_strings = option_defaults[&quot;comment_strings&quot;]
 </pre>
-<p>Number of spaces to indent code blocks in the code -&gt; text conversion.[#]_</p>
-<table class="docutils footnote" frame="void" id="id1" rules="none">
-<colgroup><col class="label" /><col /></colgroup>
-<tbody valign="top">
-<tr><td class="label"><a class="fn-backref" href="#id3" name="id1">[2]</a></td><td>For the text -&gt; code conversion, the codeindent is determined by the
-first recognized code line (leading comment or first indented literal
-block of the text source).</td></tr>
-</tbody>
-</table>
+<p>Number of spaces to indent code blocks in the code -&gt; text conversion:</p>
 <pre class="literal-block">
-codeindent =  2
+codeindent =  option_defaults[&quot;codeindent&quot;]
 </pre>
 <p>Marker string for the first code block. (Should be a valid rst directive
 that accepts code on the same line, e.g. <tt class="docutils literal"><span class="pre">'..</span> <span class="pre">admonition::'</span></tt>.)  No
@@ -551,7 +582,7 @@ state = 'header'
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id14" id="instantiation" name="instantiation">2.2.2&nbsp;&nbsp;&nbsp;Instantiation</a></h3>
+<h3><a class="toc-backref" href="#id16" id="instantiation" name="instantiation">3.2.2&nbsp;&nbsp;&nbsp;Instantiation</a></h3>
 <p>Initializing sets up the <cite>data</cite> attribute, an iterable object yielding
 lines of the source to convert.[1]_</p>
 <pre class="literal-block">
@@ -576,7 +607,7 @@ class defaults:</p>
 <pre class="literal-block">
 self.__dict__.update(keyw)
 </pre>
-<p>The comment string is set to the languages comment string if not given in
+<p>The comment string is set to the language's comment string if not given in
 the keyword arguments:</p>
 <pre class="literal-block">
 if not hasattr(self, &quot;comment_string&quot;) or not self.comment_string:
@@ -594,7 +625,7 @@ with the optional <cite>keepends</cite> argument set to True.</p>
 </table>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id15" id="converter-str" name="converter-str">2.2.3&nbsp;&nbsp;&nbsp;Converter.__str__</a></h3>
+<h3><a class="toc-backref" href="#id17" id="converter-str" name="converter-str">3.2.3&nbsp;&nbsp;&nbsp;Converter.__str__</a></h3>
 <p>Return converted data as string:</p>
 <pre class="literal-block">
 def __str__(self):
@@ -603,7 +634,7 @@ def __str__(self):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id16" id="converter-get-indent" name="converter-get-indent">2.2.4&nbsp;&nbsp;&nbsp;Converter.get_indent</a></h3>
+<h3><a class="toc-backref" href="#id18" id="converter-get-indent" name="converter-get-indent">3.2.4&nbsp;&nbsp;&nbsp;Converter.get_indent</a></h3>
 <p>Return the number of leading spaces in <cite>string</cite> after expanding tabs</p>
 <pre class="literal-block">
 def get_indent(self, string):
@@ -614,7 +645,7 @@ def get_indent(self, string):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id17" id="converter-ensure-trailing-blank-line" name="converter-ensure-trailing-blank-line">2.2.5&nbsp;&nbsp;&nbsp;Converter.ensure_trailing_blank_line</a></h3>
+<h3><a class="toc-backref" href="#id19" id="converter-ensure-trailing-blank-line" name="converter-ensure-trailing-blank-line">3.2.5&nbsp;&nbsp;&nbsp;Converter.ensure_trailing_blank_line</a></h3>
 <p>Ensure there is a blank line as last element of the list <cite>lines</cite>:</p>
 <pre class="literal-block">
 def ensure_trailing_blank_line(self, lines, next_line):
@@ -627,7 +658,7 @@ def ensure_trailing_blank_line(self, lines, next_line):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id18" id="converter-collect-blocks" name="converter-collect-blocks">2.2.6&nbsp;&nbsp;&nbsp;Converter.collect_blocks</a></h3>
+<h3><a class="toc-backref" href="#id20" id="converter-collect-blocks" name="converter-collect-blocks">3.2.6&nbsp;&nbsp;&nbsp;Converter.collect_blocks</a></h3>
 <pre class="literal-block">
 def collect_blocks(self):
     &quot;&quot;&quot;collect lines in a list
@@ -646,7 +677,7 @@ def collect_blocks(self):
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id19" id="text2code" name="text2code">2.3&nbsp;&nbsp;&nbsp;Text2Code</a></h2>
+<h2><a class="toc-backref" href="#id21" id="text2code" name="text2code">3.3&nbsp;&nbsp;&nbsp;Text2Code</a></h2>
 <p>The <cite>Text2Code</cite> class separates code blocks (indented literal blocks) from
 reStructured text. Code blocks are unindented, text is commented (or
 filtered, if the <tt class="docutils literal"><span class="pre">strip</span></tt> option is True.</p>
@@ -684,7 +715,7 @@ def __init__(self, data, **keyw):
         self.__iter__ = self.iter_strip
 </pre>
 <div class="section">
-<h3><a class="toc-backref" href="#id20" id="text2code-header" name="text2code-header">2.3.1&nbsp;&nbsp;&nbsp;Text2Code.header</a></h3>
+<h3><a class="toc-backref" href="#id22" id="text2code-header" name="text2code-header">3.3.1&nbsp;&nbsp;&nbsp;Text2Code.header</a></h3>
 <p>Convert the header (leading rst comment block) to code:</p>
 <pre class="literal-block">
 def header(self):
@@ -724,7 +755,7 @@ return []
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id21" id="text2code-text-handler-generator" name="text2code-text-handler-generator">2.3.2&nbsp;&nbsp;&nbsp;Text2Code.text_handler_generator</a></h3>
+<h3><a class="toc-backref" href="#id23" id="text2code-text-handler-generator" name="text2code-text-handler-generator">3.3.2&nbsp;&nbsp;&nbsp;Text2Code.text_handler_generator</a></h3>
 <p>The 'text' handler processes everything that is not an indented literal
 comment. Text is quoted with <cite>self.comment_string</cite> or filtered (with
 strip=True).</p>
@@ -788,7 +819,7 @@ yield lines
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id22" id="text2code-code-handler-generator" name="text2code-code-handler-generator">2.3.3&nbsp;&nbsp;&nbsp;Text2Code.code_handler_generator</a></h3>
+<h3><a class="toc-backref" href="#id24" id="text2code-code-handler-generator" name="text2code-code-handler-generator">3.3.3&nbsp;&nbsp;&nbsp;Text2Code.code_handler_generator</a></h3>
 <p>The <cite>code</cite> handler is called when a literal block marker is encounterd. It
 returns a code block (indented literal block), removing leading whitespace
 up to the indentation of the first code line in the file (this deviation
@@ -858,7 +889,7 @@ yield lines
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id23" id="txt2code-remove-literal-marker" name="txt2code-remove-literal-marker">2.3.4&nbsp;&nbsp;&nbsp;Txt2Code.remove_literal_marker</a></h3>
+<h3><a class="toc-backref" href="#id25" id="txt2code-remove-literal-marker" name="txt2code-remove-literal-marker">3.3.4&nbsp;&nbsp;&nbsp;Txt2Code.remove_literal_marker</a></h3>
 <p>Remove literal marker (::) in &quot;expanded form&quot; i.e. in a paragraph on its own.</p>
 <p>While cleaning up the code source, it leads to confusion for doctest and
 searches (e.g. grep) as line-numbers between text and code source will
@@ -875,7 +906,7 @@ def remove_literal_marker(list):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id24" id="text2code-iter-strip" name="text2code-iter-strip">2.3.5&nbsp;&nbsp;&nbsp;Text2Code.iter_strip</a></h3>
+<h3><a class="toc-backref" href="#id26" id="text2code-iter-strip" name="text2code-iter-strip">3.3.5&nbsp;&nbsp;&nbsp;Text2Code.iter_strip</a></h3>
 <p>Modification of the <cite>simplestates.__iter__</cite> method that will replace it when
 the <cite>strip</cite> keyword argument is <cite>True</cite> during class instantiation:</p>
 <p>Iterate over class instances dropping text blocks:</p>
@@ -892,21 +923,19 @@ def iter_strip(self):
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id25" id="code2text" name="code2text">2.4&nbsp;&nbsp;&nbsp;Code2Text</a></h2>
+<h2><a class="toc-backref" href="#id27" id="code2text" name="code2text">3.4&nbsp;&nbsp;&nbsp;Code2Text</a></h2>
 <p>The <cite>Code2Text</cite> class does the opposite of <a class="reference" href="#text2code">Text2Code</a> -- it processes
 valid source code, extracts comments, and puts non-commented code in literal
 blocks.</p>
-<p>Only lines starting with a comment string matching the one in the
-<cite>comment_string</cite> data attribute are considered text lines.</p>
-<p>The class is derived from the PyLitConverter state machine and adds handlers
-for the three states &quot;header&quot;, &quot;text&quot;, and &quot;code&quot;.</p>
+<p>The class is derived from the PyLitConverter state machine and adds  an
+<cite>__iter__</cite> method as well as handlers for &quot;text&quot;, and &quot;code&quot; states.</p>
 <pre class="literal-block">
 class Code2Text(PyLitConverter):
     &quot;&quot;&quot;Convert code source to text source
     &quot;&quot;&quot;
 </pre>
 <div class="section">
-<h3><a class="toc-backref" href="#id26" id="code2text-iter" name="code2text-iter">2.4.1&nbsp;&nbsp;&nbsp;Code2Text.__iter__</a></h3>
+<h3><a class="toc-backref" href="#id28" id="code2text-iter" name="code2text-iter">3.4.1&nbsp;&nbsp;&nbsp;Code2Text.__iter__</a></h3>
 <pre class="literal-block">
 def __iter__(self):
 </pre>
@@ -936,7 +965,7 @@ yield getattr(self, self.state)(block)
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id27" id="header-state" name="header-state">2.4.2&nbsp;&nbsp;&nbsp;&quot;header&quot; state</a></h3>
+<h3><a class="toc-backref" href="#id29" id="header-state" name="header-state">3.4.2&nbsp;&nbsp;&nbsp;&quot;header&quot; state</a></h3>
 <p>Sometimes code needs to remain on the first line(s) of the document to be
 valid. The most common example is the &quot;shebang&quot; line that tells a POSIX
 shell how to process an executable file:</p>
@@ -948,7 +977,7 @@ other comment or code.</p>
 <p>If we want to keep the line numbers in sync for text and code source, the
 reStructured Text markup for these header lines must start at the same line
 as the first header line. Therfore, header lines could not be marked as
-literal block (this would require the &quot;::&quot; and an empty line above the code).</p>
+literal block (this would require the <tt class="docutils literal"><span class="pre">::</span></tt> and an empty line above the code).</p>
 <p>OTOH, a comment may start at the same line as the comment marker and it
 includes subsequent indented lines. Comments are visible in the reStructured
 Text source but hidden in the pretty-printed output.</p>
@@ -984,7 +1013,7 @@ after) the first text block, e.g. with a <cite>line block</cite> in a <cite>bloc
 will overwrite this setting.</p>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id28" id="code2text-text" name="code2text-text">2.4.3&nbsp;&nbsp;&nbsp;Code2Text.text</a></h3>
+<h3><a class="toc-backref" href="#id30" id="code2text-text" name="code2text-text">3.4.3&nbsp;&nbsp;&nbsp;Code2Text.text</a></h3>
 <p>The <em>text state handler</em> converts a comment to a text block by stripping
 the leading <cite>comment string</cite> from every line:</p>
 <pre class="literal-block">
@@ -1021,7 +1050,7 @@ return lines
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id29" id="code2text-code" name="code2text-code">2.4.4&nbsp;&nbsp;&nbsp;Code2Text.code</a></h3>
+<h3><a class="toc-backref" href="#id31" id="code2text-code" name="code2text-code">3.4.4&nbsp;&nbsp;&nbsp;Code2Text.code</a></h3>
 <p>The <cite>code</cite> method is called on non-commented code. Code is returned as
 indented literal block (or filtered, if <tt class="docutils literal"><span class="pre">self.strip</span> <span class="pre">==</span> <span class="pre">True</span></tt>). The amount
 of the code indentation is controled by <cite>self.codeindent</cite> (default 2).</p>
@@ -1036,7 +1065,7 @@ def code(self, lines):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id30" id="code2text-block-is-text" name="code2text-block-is-text">2.4.5&nbsp;&nbsp;&nbsp;Code2Text.block_is_text</a></h3>
+<h3><a class="toc-backref" href="#id32" id="code2text-block-is-text" name="code2text-block-is-text">3.4.5&nbsp;&nbsp;&nbsp;Code2Text.block_is_text</a></h3>
 <p>A paragraph is a text block, if every non-blank line starts with a matching
 comment string  (test includes whitespace except for commented blank lines!)</p>
 <pre class="literal-block">
@@ -1050,7 +1079,7 @@ def block_is_text(self, block):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id31" id="code2text-strip-literal-marker" name="code2text-strip-literal-marker">2.4.6&nbsp;&nbsp;&nbsp;Code2Text.strip_literal_marker</a></h3>
+<h3><a class="toc-backref" href="#id33" id="code2text-strip-literal-marker" name="code2text-strip-literal-marker">3.4.6&nbsp;&nbsp;&nbsp;Code2Text.strip_literal_marker</a></h3>
 <p>Replace the literal marker with the equivalent of docutils replace rules</p>
 <ul class="simple">
 <li>strip <cite>::</cite>-line (and preceding blank line) if on a line on its own</li>
@@ -1090,13 +1119,13 @@ def strip_literal_marker(self, lines):
 </div>
 </div>
 <div class="section">
-<h1><a class="toc-backref" href="#id32" id="command-line-use" name="command-line-use">3&nbsp;&nbsp;&nbsp;Command line use</a></h1>
+<h1><a class="toc-backref" href="#id34" id="command-line-use" name="command-line-use">4&nbsp;&nbsp;&nbsp;Command line use</a></h1>
 <p>Using this script from the command line will convert a file according to its
 extension. This default can be overridden by a couple of options.</p>
 <div class="section">
-<h2><a class="toc-backref" href="#id33" id="dual-source-handling" name="dual-source-handling">3.1&nbsp;&nbsp;&nbsp;Dual source handling</a></h2>
+<h2><a class="toc-backref" href="#id35" id="dual-source-handling" name="dual-source-handling">4.1&nbsp;&nbsp;&nbsp;Dual source handling</a></h2>
 <div class="section">
-<h3><a class="toc-backref" href="#id34" id="how-to-determine-which-source-is-up-to-date" name="how-to-determine-which-source-is-up-to-date">3.1.1&nbsp;&nbsp;&nbsp;How to determine which source is up-to-date?</a></h3>
+<h3><a class="toc-backref" href="#id36" id="how-to-determine-which-source-is-up-to-date" name="how-to-determine-which-source-is-up-to-date">4.1.1&nbsp;&nbsp;&nbsp;How to determine which source is up-to-date?</a></h3>
 <ul>
 <li><p class="first">set modification date of <cite>oufile</cite> to the one of <cite>infile</cite></p>
 <p>Points out that the source files are 'synchronized'.</p>
@@ -1121,7 +1150,7 @@ function <cite>pylit_check()</cite> in <a class="reference" href="http://jedmode
 </ul>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id35" id="recognised-filename-extensions" name="recognised-filename-extensions">3.1.2&nbsp;&nbsp;&nbsp;Recognised Filename Extensions</a></h3>
+<h3><a class="toc-backref" href="#id37" id="recognised-filename-extensions" name="recognised-filename-extensions">4.1.2&nbsp;&nbsp;&nbsp;Recognised Filename Extensions</a></h3>
 <p>Finding an easy to remember, unused filename extension is not easy.</p>
 <dl class="docutils">
 <dt>.py.txt</dt>
@@ -1154,7 +1183,7 @@ the conversion to executable code. i.e. for a program foo:</p>
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id36" id="optionvalues" name="optionvalues">3.2&nbsp;&nbsp;&nbsp;OptionValues</a></h2>
+<h2><a class="toc-backref" href="#id38" id="optionvalues" name="optionvalues">4.2&nbsp;&nbsp;&nbsp;OptionValues</a></h2>
 <p>For use as keyword arguments, it is handy to have the options
 in a dictionary. The following class adds an <cite>as_dict</cite> method
 to  <cite>optparse.Values</cite>:</p>
@@ -1169,7 +1198,7 @@ class OptionValues(optparse.Values):
 </pre>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id37" id="pylitoptions" name="pylitoptions">3.3&nbsp;&nbsp;&nbsp;PylitOptions</a></h2>
+<h2><a class="toc-backref" href="#id39" id="pylitoptions" name="pylitoptions">4.3&nbsp;&nbsp;&nbsp;PylitOptions</a></h2>
 <p>Options are stored in the values attribute of the <cite>PylitOptions</cite> class.
 It is initialized with default values and parsed command line options (and
 arguments)  This scheme allows easy customization by code importing the
@@ -1179,26 +1208,20 @@ class PylitOptions(object):
     &quot;&quot;&quot;Storage and handling of program options
     &quot;&quot;&quot;
 </pre>
-<p>Recognized file extensions for text and code versions of the source:</p>
-<pre class="literal-block">
-code_languages  = {&quot;.py&quot;: &quot;python&quot;,
-                   &quot;.sl&quot;: &quot;slang&quot;,
-                   &quot;.c&quot;: &quot;c++&quot;}
-code_extensions = code_languages.keys()
-text_extensions = [&quot;.txt&quot;]
-</pre>
 <div class="section">
-<h3><a class="toc-backref" href="#id38" id="id5" name="id5">3.3.1&nbsp;&nbsp;&nbsp;Instantiation</a></h3>
+<h3><a class="toc-backref" href="#id40" id="id5" name="id5">4.3.1&nbsp;&nbsp;&nbsp;Instantiation</a></h3>
 <p>Instantiation sets up an OptionParser and initializes it with pylit's
 command line options and <cite>default_values</cite>. It then updates the values based
 on command line options and sensible defaults:</p>
 <pre class="literal-block">
-def __init__(self, args=sys.argv[1:], **default_values):
+def __init__(self, args=sys.argv[1:], **keyw):
     &quot;&quot;&quot;Set up an `OptionParser` instance and parse and complete arguments
     &quot;&quot;&quot;
-    p = optparse.OptionParser(usage=main.__doc__, version=&quot;0.2&quot;)
-    # set defaults
-    p.set_defaults(**default_values)
+    p = optparse.OptionParser(usage=main.__doc__, version=_version)
+    # set defaults (from modules option_defaults dict and keyword args)
+    defaults = dict(option_defaults) # copy module-level defaults
+    defaults.update(keyw)
+    p.set_defaults(**defaults)
     # add the options
     p.add_option(&quot;-c&quot;, &quot;--code2txt&quot;, dest=&quot;txt2code&quot;, action=&quot;store_false&quot;,
                  help=&quot;convert code to reStructured text&quot;)
@@ -1212,6 +1235,9 @@ def __init__(self, args=sys.argv[1:], **default_values):
                  help=&quot;execute code (Python only)&quot;)
     p.add_option(&quot;-f&quot;, &quot;--infile&quot;,
                  help=&quot;input file name ('-' for stdout)&quot; )
+    p.add_option(&quot;--language&quot;, action=&quot;store&quot;,
+                 choices = option_defaults[&quot;code_languages&quot;].values(),
+                 help=&quot;use LANGUAGE native comment style&quot;)
     p.add_option(&quot;--overwrite&quot;, action=&quot;store&quot;,
                  choices = [&quot;yes&quot;, &quot;update&quot;, &quot;no&quot;],
                  help=&quot;overwrite output file (default 'update')&quot;)
@@ -1232,7 +1258,7 @@ def __init__(self, args=sys.argv[1:], **default_values):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id39" id="calling" name="calling">3.3.2&nbsp;&nbsp;&nbsp;Calling</a></h3>
+<h3><a class="toc-backref" href="#id41" id="calling" name="calling">4.3.2&nbsp;&nbsp;&nbsp;Calling</a></h3>
 <p>&quot;Calling&quot; an instance updates the option values based on command line
 arguments and default values and does a completion of the options based on
 &quot;context-sensitive defaults&quot;:</p>
@@ -1245,7 +1271,7 @@ def __call__(self, args=sys.argv[1:], **default_values):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id40" id="pylitoptions-parse-args" name="pylitoptions-parse-args">3.3.3&nbsp;&nbsp;&nbsp;PylitOptions.parse_args</a></h3>
+<h3><a class="toc-backref" href="#id42" id="pylitoptions-parse-args" name="pylitoptions-parse-args">4.3.3&nbsp;&nbsp;&nbsp;PylitOptions.parse_args</a></h3>
 <p>The <cite>parse_args</cite> method calls the <cite>optparse.OptionParser</cite> on command
 line or provided args and returns the result as <cite>PylitOptions.Values</cite>
 instance.  Defaults can be provided as keyword arguments:</p>
@@ -1272,7 +1298,7 @@ def parse_args(self, args=sys.argv[1:], **default_values):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id41" id="pylitoptions-complete-values" name="pylitoptions-complete-values">3.3.4&nbsp;&nbsp;&nbsp;PylitOptions.complete_values</a></h3>
+<h3><a class="toc-backref" href="#id43" id="pylitoptions-complete-values" name="pylitoptions-complete-values">4.3.4&nbsp;&nbsp;&nbsp;PylitOptions.complete_values</a></h3>
 <p>The <cite>complete</cite> method uses context information to set missing option values
 to sensible defaults (if possible).</p>
 <pre class="literal-block">
@@ -1283,9 +1309,9 @@ def complete_values(self, values):
     # Guess conversion direction from infile filename
     if values.ensure_value(&quot;txt2code&quot;, None) is None:
         in_extension = os.path.splitext(values.infile)[1]
-        if in_extension in self.text_extensions:
+        if in_extension in self.values.text_extensions:
             values.txt2code = True
-        elif in_extension in self.code_extensions:
+        elif in_extension in self.values.code_extensions:
             values.txt2code = False
     # Auto-determine the output file name
     values.ensure_value(&quot;outfile&quot;, self.get_outfile_name(values.infile,
@@ -1293,7 +1319,7 @@ def complete_values(self, values):
     # Guess conversion direction from outfile filename or set to default
     if values.txt2code is None:
         out_extension = os.path.splitext(values.outfile)[1]
-        values.txt2code = not (out_extension in self.text_extensions)
+        values.txt2code = not (out_extension in self.values.text_extensions)
 
     # Set the language of the code (default &quot;python&quot;)
     if values.txt2code is True:
@@ -1301,8 +1327,7 @@ def complete_values(self, values):
     elif values.txt2code is False:
         code_extension = os.path.splitext(values.infile)[1]
     values.ensure_value(&quot;language&quot;,
-                        self.code_languages.get(code_extension, &quot;python&quot;))
-
+                        self.values.code_languages.get(code_extension, &quot;python&quot;))
     # Set the default overwrite mode
     values.ensure_value(&quot;overwrite&quot;, 'update')
 
@@ -1310,7 +1335,7 @@ def complete_values(self, values):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id42" id="pylitoptions-get-outfile-name" name="pylitoptions-get-outfile-name">3.3.5&nbsp;&nbsp;&nbsp;PylitOptions.get_outfile_name</a></h3>
+<h3><a class="toc-backref" href="#id44" id="pylitoptions-get-outfile-name" name="pylitoptions-get-outfile-name">4.3.5&nbsp;&nbsp;&nbsp;PylitOptions.get_outfile_name</a></h3>
 <p>Construct a matching filename for the output file. The output filename is
 constructed from <cite>infile</cite> by the following rules:</p>
 <ul class="simple">
@@ -1332,20 +1357,20 @@ def get_outfile_name(self, infile, txt2code=None):
     #       if it exists?
 
     # strip text extension
-    if ext in self.text_extensions:
+    if ext in self.values.text_extensions:
         return base
     # add (first) text extension for code files
-    if ext in self.code_extensions or txt2code == False:
-        return infile + self.text_extensions[0]
+    if ext in self.values.code_extensions or txt2code == False:
+        return infile + self.values.text_extensions[0]
     # give up
     return infile + &quot;.out&quot;
 </pre>
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id43" id="helper-functions" name="helper-functions">3.4&nbsp;&nbsp;&nbsp;Helper functions</a></h2>
+<h2><a class="toc-backref" href="#id45" id="helper-functions" name="helper-functions">4.4&nbsp;&nbsp;&nbsp;Helper functions</a></h2>
 <div class="section">
-<h3><a class="toc-backref" href="#id44" id="open-streams" name="open-streams">3.4.1&nbsp;&nbsp;&nbsp;open_streams</a></h3>
+<h3><a class="toc-backref" href="#id46" id="open-streams" name="open-streams">4.4.1&nbsp;&nbsp;&nbsp;open_streams</a></h3>
 <p>Return file objects for in- and output. If the input path is missing,
 write usage and abort. (An alternative would be to use stdin as default.
 However,  this leaves the uninitiated user with a non-responding application
@@ -1382,7 +1407,7 @@ def open_streams(infile = '-', outfile = '-', overwrite='update', **keyw):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id45" id="is-newer" name="is-newer">3.4.2&nbsp;&nbsp;&nbsp;is_newer</a></h3>
+<h3><a class="toc-backref" href="#id47" id="is-newer" name="is-newer">4.4.2&nbsp;&nbsp;&nbsp;is_newer</a></h3>
 <pre class="literal-block">
 def is_newer(path1, path2):
     &quot;&quot;&quot;Check if `path1` is newer than `path2` (using mtime)
@@ -1412,7 +1437,7 @@ def is_newer(path1, path2):
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id46" id="get-converter" name="get-converter">3.4.3&nbsp;&nbsp;&nbsp;get_converter</a></h3>
+<h3><a class="toc-backref" href="#id48" id="get-converter" name="get-converter">4.4.3&nbsp;&nbsp;&nbsp;get_converter</a></h3>
 <p>Get an instance of the converter state machine:</p>
 <pre class="literal-block">
 def get_converter(data, txt2code=True, **keyw):
@@ -1424,9 +1449,9 @@ def get_converter(data, txt2code=True, **keyw):
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id47" id="use-cases" name="use-cases">3.5&nbsp;&nbsp;&nbsp;Use cases</a></h2>
+<h2><a class="toc-backref" href="#id49" id="use-cases" name="use-cases">4.5&nbsp;&nbsp;&nbsp;Use cases</a></h2>
 <div class="section">
-<h3><a class="toc-backref" href="#id48" id="run-doctest" name="run-doctest">3.5.1&nbsp;&nbsp;&nbsp;run_doctest</a></h3>
+<h3><a class="toc-backref" href="#id50" id="run-doctest" name="run-doctest">4.5.1&nbsp;&nbsp;&nbsp;run_doctest</a></h3>
 <pre class="literal-block">
 def run_doctest(infile=&quot;-&quot;, txt2code=True,
                 globs={}, verbose=False, optionflags=0, **keyw):
@@ -1457,7 +1482,7 @@ return runner.failures, runner.tries
 </pre>
 </div>
 <div class="section">
-<h3><a class="toc-backref" href="#id49" id="diff" name="diff">3.5.2&nbsp;&nbsp;&nbsp;diff</a></h3>
+<h3><a class="toc-backref" href="#id51" id="diff" name="diff">4.5.2&nbsp;&nbsp;&nbsp;diff</a></h3>
 <pre class="literal-block">
 def diff(infile='-', outfile='-', txt2code=True, **keyw):
     &quot;&quot;&quot;Report differences between converted infile and existing outfile
@@ -1501,11 +1526,11 @@ def diff(infile='-', outfile='-', txt2code=True, **keyw):
 </div>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id50" id="main" name="main">3.6&nbsp;&nbsp;&nbsp;main</a></h2>
+<h2><a class="toc-backref" href="#id52" id="main" name="main">4.6&nbsp;&nbsp;&nbsp;main</a></h2>
 <p>If this script is called from the command line, the <cite>main</cite> function will
 convert the input (file or stdin) between text and code formats.</p>
 <div class="section">
-<h3><a class="toc-backref" href="#id51" id="customization" name="customization">3.6.1&nbsp;&nbsp;&nbsp;Customization</a></h3>
+<h3><a class="toc-backref" href="#id53" id="id6" name="id6">4.6.1&nbsp;&nbsp;&nbsp;Customization</a></h3>
 <p>Option defaults for the conversion can be as keyword arguments to <a class="reference" href="#main">main</a>.
 The option defaults will be updated by command line options and extended
 with &quot;intelligent guesses&quot; by <cite>PylitOptions</cite> and passed on to helper
@@ -1594,10 +1619,10 @@ else:
 ## print &quot;mtime&quot;, os.path.getmtime(options.infile),  options.infile
 ## print &quot;mtime&quot;, os.path.getmtime(options.outfile), options.outfile
 </pre>
-<table class="docutils footnote" frame="void" id="id6" rules="none">
+<table class="docutils footnote" frame="void" id="id7" rules="none">
 <colgroup><col class="label" /><col /></colgroup>
 <tbody valign="top">
-<tr><td class="label"><a name="id6">[4]</a></td><td>Make sure the corresponding file object (here <cite>out_stream</cite>) is
+<tr><td class="label"><a name="id7">[4]</a></td><td>Make sure the corresponding file object (here <cite>out_stream</cite>) is
 closed, as otherwise the change will be overwritten when <cite>close</cite> is
 called afterwards (either explicitely or at program exit).</td></tr>
 </tbody>
@@ -1611,10 +1636,10 @@ if __name__ == '__main__':
 </div>
 </div>
 <div class="section">
-<h1><a class="toc-backref" href="#id52" id="open-questions" name="open-questions">4&nbsp;&nbsp;&nbsp;Open questions</a></h1>
+<h1><a class="toc-backref" href="#id54" id="open-questions" name="open-questions">5&nbsp;&nbsp;&nbsp;Open questions</a></h1>
 <p>Open questions and ideas for further development</p>
 <div class="section">
-<h2><a class="toc-backref" href="#id53" id="options" name="options">4.1&nbsp;&nbsp;&nbsp;Options</a></h2>
+<h2><a class="toc-backref" href="#id55" id="options" name="options">5.1&nbsp;&nbsp;&nbsp;Options</a></h2>
 <ul>
 <li><p class="first">Collect option defaults in a dictionary (on module level)</p>
 <p>Facilitates the setting of options in programmatic use</p>
@@ -1629,7 +1654,7 @@ option?</p>
 </ul>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id54" id="parsing-problems" name="parsing-problems">4.2&nbsp;&nbsp;&nbsp;Parsing Problems</a></h2>
+<h2><a class="toc-backref" href="#id56" id="parsing-problems" name="parsing-problems">5.2&nbsp;&nbsp;&nbsp;Parsing Problems</a></h2>
 <ul>
 <li><p class="first">How can I include a literal block that should not be in the
 executable code (e.g. an example, an earlier version or variant)?</p>
@@ -1656,7 +1681,7 @@ supports multi-line literal strings (C++, PHP, Python)</p>
 </ul>
 </div>
 <div class="section">
-<h2><a class="toc-backref" href="#id55" id="code-syntax-highlight" name="code-syntax-highlight">4.3&nbsp;&nbsp;&nbsp;code syntax highlight</a></h2>
+<h2><a class="toc-backref" href="#id57" id="code-syntax-highlight" name="code-syntax-highlight">5.3&nbsp;&nbsp;&nbsp;code syntax highlight</a></h2>
 <p>use <cite>listing</cite> package in LaTeX-&gt;PDF</p>
 <p>in html, see</p>
 <ul class="simple">
@@ -1679,7 +1704,7 @@ to rst-text if <tt class="docutils literal"><span class="pre">__docformat__</spa
 </div>
 <div class="footer">
 <hr class="footer" />
-Generated on: 2007-03-01.
+Generated on: 2007-03-02.
 
 </div>
 </body>
diff --git a/rstdocs/examples/pylit.py.txt b/rstdocs/examples/pylit.py.txt
index b05685d..b27eaef 100644
--- a/rstdocs/examples/pylit.py.txt
+++ b/rstdocs/examples/pylit.py.txt
@@ -38,7 +38,9 @@ Changelog
                    new `iter_strip` method replacing a lot of ``if``-s
 :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile
 :2007-02-27: 0.3   new `Code2Text` converter after an idea by Riccardo Murri
-
+                   a new `Text2Code` will follow soon
+                   explicite `option_defaults` dict for easier customization
+:2007-03-02: 0.3.1 expand hard-tabs to prevent errors in indentation.
 ::
 
   """pylit: Literate programming with Python and reStructuredText
@@ -51,6 +53,8 @@ Changelog
   
   __docformat__ = 'restructuredtext'
   
+  _version = "0.3"
+  
 
 Requirements
 ------------
@@ -70,6 +74,42 @@ Requirements
 
   from simplestates import SimpleStates  # generic state machine
   
+Customization
+=============
+
+Collect option defaults in a dictionary (on module level). This facilitates
+the setting of options in programmatic use ::
+
+  option_defaults = {}
+  
+Default language and language specific defaults::
+
+  option_defaults["language"] =        "python"        
+  option_defaults["comment_strings"] = {"python": '# ',
+                                        "slang":  '% ', 
+                                        "c++":    '// ',
+                                        "elisp":  ';; '}  
+  
+Recognized file extensions for text and code versions of the source.
+Used to guess the language from the filename. :: 
+
+  option_defaults["code_languages"]  = {".py": "python", 
+                                        ".sl": "slang", 
+                                        ".c": "c++",
+                                        ".el":"elisp"}
+  option_defaults["code_extensions"] = option_defaults["code_languages"].keys()
+  option_defaults["text_extensions"] = [".txt"]
+  
+Number of spaces to indent code blocks in the code -> text conversion. [#]_
+
+.. [#] For the text -> code conversion, the codeindent is determined by the
+       first recognized code line (leading comment or first indented literal
+       block of the text source).
+
+::
+
+  option_defaults["codeindent"] =  2
+  
  
 Classes
 =======
@@ -144,20 +184,12 @@ to these functions can be used to customize the converter.
 
 Default language and language specific defaults::
 
-      language =        "python"        
-      comment_strings = {"python": '# ',
-                         "slang": '% ', 
-                         "c++": '// '}  
-  
-Number of spaces to indent code blocks in the code -> text conversion.[#]_
-
-.. [#] For the text -> code conversion, the codeindent is determined by the
-       first recognized code line (leading comment or first indented literal
-       block of the text source).
-
-::
+      language = option_defaults["language"]
+      comment_strings = option_defaults["comment_strings"]
+      
+Number of spaces to indent code blocks in the code -> text conversion::
 
-      codeindent =  2
+      codeindent =  option_defaults["codeindent"]
   
 Marker string for the first code block. (Should be a valid rst directive
 that accepts code on the same line, e.g. ``'.. admonition::'``.)  No
@@ -202,7 +234,7 @@ class defaults::
 
           self.__dict__.update(keyw)
               
-The comment string is set to the languages comment string if not given in
+The comment string is set to the language's comment string if not given in
 the keyword arguments::
 
           if not hasattr(self, "comment_string") or not self.comment_string:
@@ -226,12 +258,12 @@ Return converted data as string::
 Converter.get_indent
 ~~~~~~~~~~~~~~~~~~~~
 
-Return the number of leading spaces in `string` after expanding tabs ::
+Return the number of leading spaces in `line` after expanding tabs ::
 
-      def get_indent(self, string):
+      def get_indent(self, line):
           """Return the indentation of `string`.
           """
-          line = string.expandtabs()
+          # line = line.expandtabs()
           return len(line) - len(line.lstrip())
   
 Converter.ensure_trailing_blank_line
@@ -257,11 +289,13 @@ Converter.collect_blocks
           """collect lines in a list 
           
           return list for each block of lines (paragraph) seperated by a 
-          blank line (whitespace only)
+          blank line (whitespace only).
+          
+          Also expand hard-tabs as these will lead to errors in indentation.
           """
           block = []
           for line in self.data:
-              block.append(line)
+              block.append(line.expandtabs())
               if not line.rstrip():
                   yield block
                   block = []
@@ -532,11 +566,8 @@ The `Code2Text` class does the opposite of `Text2Code`_ -- it processes
 valid source code, extracts comments, and puts non-commented code in literal
 blocks. 
 
-Only lines starting with a comment string matching the one in the
-`comment_string` data attribute are considered text lines.
-
-The class is derived from the PyLitConverter state machine and adds handlers
-for the three states "header", "text", and "code". ::
+The class is derived from the PyLitConverter state machine and adds  an
+`__iter__` method as well as handlers for "text", and "code" states. ::
 
   class Code2Text(PyLitConverter):
       """Convert code source to text source
@@ -572,7 +603,7 @@ processed with the matching handler::
                       yield self.code_marker
                   self.state = "code"
               yield getattr(self, self.state)(block)
-
+  
 "header" state
 ~~~~~~~~~~~~~~~~
 
@@ -588,7 +619,7 @@ other comment or code.
 If we want to keep the line numbers in sync for text and code source, the
 reStructured Text markup for these header lines must start at the same line
 as the first header line. Therfore, header lines could not be marked as
-literal block (this would require the "::" and an empty line above the code).
+literal block (this would require the ``::`` and an empty line above the code).
 
 OTOH, a comment may start at the same line as the comment marker and it
 includes subsequent indented lines. Comments are visible in the reStructured
@@ -639,7 +670,7 @@ the leading `comment string` from every line::
   
           lines = [re.sub("^"+self.comment_string.rstrip(), "", line)
                    for line in lines]
-
+  
 If the code block is stripped, the literal marker would lead to an error
 when the text is converted with docutils. Replace it with
 `Code2Text.strip_literal_marker`_::
@@ -662,7 +693,7 @@ Return the text block to the calling function::
 
           return lines
                        
-    
+  
 Code2Text.code
 ~~~~~~~~~~~~~~
 
@@ -678,7 +709,7 @@ of the code indentation is controled by `self.codeindent` (default 2).
               return []
   
           return [" "*self.codeindent + line for line in lines]
-
+  
 Code2Text.block_is_text
 ~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -835,14 +866,6 @@ arguments)  This scheme allows easy customization by code importing the
       """Storage and handling of program options
       """
   
-Recognized file extensions for text and code versions of the source:: 
-
-      code_languages  = {".py": "python", 
-                         ".sl": "slang", 
-                         ".c": "c++"}
-      code_extensions = code_languages.keys()
-      text_extensions = [".txt"]
-  
 Instantiation       
 ~~~~~~~~~~~~~
 
@@ -850,12 +873,14 @@ Instantiation sets up an OptionParser and initializes it with pylit's
 command line options and `default_values`. It then updates the values based
 on command line options and sensible defaults::
 
-      def __init__(self, args=sys.argv[1:], **default_values):
+      def __init__(self, args=sys.argv[1:], **keyw):
           """Set up an `OptionParser` instance and parse and complete arguments
           """
-          p = optparse.OptionParser(usage=main.__doc__, version="0.2")
-          # set defaults
-          p.set_defaults(**default_values)
+          p = optparse.OptionParser(usage=main.__doc__, version=_version)
+          # set defaults (from modules option_defaults dict and keyword args)
+          defaults = dict(option_defaults) # copy module-level defaults
+          defaults.update(keyw)
+          p.set_defaults(**defaults)
           # add the options
           p.add_option("-c", "--code2txt", dest="txt2code", action="store_false",
                        help="convert code to reStructured text")
@@ -869,6 +894,9 @@ on command line options and sensible defaults::
                        help="execute code (Python only)")
           p.add_option("-f", "--infile",
                        help="input file name ('-' for stdout)" )
+          p.add_option("--language", action="store", 
+                       choices = option_defaults["code_languages"].values(),
+                       help="use LANGUAGE native comment style")
           p.add_option("--overwrite", action="store", 
                        choices = ["yes", "update", "no"],
                        help="overwrite output file (default 'update')")
@@ -943,9 +971,9 @@ to sensible defaults (if possible).
           # Guess conversion direction from infile filename
           if values.ensure_value("txt2code", None) is None:
               in_extension = os.path.splitext(values.infile)[1]
-              if in_extension in self.text_extensions:
+              if in_extension in self.values.text_extensions:
                   values.txt2code = True
-              elif in_extension in self.code_extensions:
+              elif in_extension in self.values.code_extensions:
                   values.txt2code = False
           # Auto-determine the output file name
           values.ensure_value("outfile", self.get_outfile_name(values.infile, 
@@ -953,7 +981,7 @@ to sensible defaults (if possible).
           # Guess conversion direction from outfile filename or set to default
           if values.txt2code is None:
               out_extension = os.path.splitext(values.outfile)[1]
-              values.txt2code = not (out_extension in self.text_extensions)
+              values.txt2code = not (out_extension in self.values.text_extensions)
           
           # Set the language of the code (default "python")
           if values.txt2code is True:
@@ -961,8 +989,7 @@ to sensible defaults (if possible).
           elif values.txt2code is False:
               code_extension = os.path.splitext(values.infile)[1]
           values.ensure_value("language", 
-                              self.code_languages.get(code_extension, "python"))
-          
+                              self.values.code_languages.get(code_extension, "python"))
           # Set the default overwrite mode
           values.ensure_value("overwrite", 'update')
   
@@ -993,11 +1020,11 @@ constructed from `infile` by the following rules:
           #       if it exists?
           
           # strip text extension
-          if ext in self.text_extensions: 
+          if ext in self.values.text_extensions: 
               return base
           # add (first) text extension for code files
-          if ext in self.code_extensions or txt2code == False:
-              return infile + self.text_extensions[0]
+          if ext in self.values.code_extensions or txt2code == False:
+              return infile + self.values.text_extensions[0]
           # give up
           return infile + ".out"
   
@@ -1252,7 +1279,7 @@ Rename the infile to a backup copy if ``--replace`` is set::
           
 If not (and input and output are from files), set the modification time
 (`mtime`) of the output file to the one of the input file to indicate that
-the contained information is equal.[#]_ ::
+the contained information is equal. [#]_ ::
 
       else:
           try:
@@ -1284,11 +1311,7 @@ Open questions and ideas for further development
 Options
 -------
 
-* Collect option defaults in a dictionary (on module level)
-
-  Facilitates the setting of options in programmatic use
-  
-  Use templates for the "intelligent guesses" (with Python syntax for string
+* Use templates for the "intelligent guesses" (with Python syntax for string
   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
 
 * Is it sensible to offer the `header_string` option also as command line
diff --git a/src/pylit.py b/src/pylit.py
index 5a9d894..fa03d54 100755
--- a/src/pylit.py
+++ b/src/pylit.py
@@ -38,13 +38,12 @@
 #                    new `iter_strip` method replacing a lot of ``if``-s
 # :2007-02-22: 0.2.8 set `mtime` of outfile to the one of infile
 # :2007-02-27: 0.3   new `Code2Text` converter after an idea by Riccardo Murri
-#                    a new `Text2Code` will follow soon
 #                    explicite `option_defaults` dict for easier customization
-# 
+# :2007-03-02: 0.3.1 expand hard-tabs to prevent errors in indentation.
+#                    `Text2Code` now also works on blocks
+#                    removed dependency on SimpleStates module
 # ::
 
-_version = "0.3"
-
 """pylit: Literate programming with Python and reStructuredText
    
    PyLit is a bidirectional converter between
@@ -55,6 +54,8 @@ _version = "0.3"
 
 __docformat__ = 'restructuredtext'
 
+_version = "0.3"
+
 
 # Requirements
 # ------------
@@ -68,14 +69,11 @@ import os
 import sys
 import optparse
 
-# * non-standard extensions
-# 
-# ::
-
-from simplestates import SimpleStates  # generic state machine
-
 # Customization
 # =============
+# 
+# Collect option defaults in a dictionary (on module level). This facilitates
+# the setting of options in programmatic use ::
 
 option_defaults = {}
 
@@ -97,7 +95,7 @@ option_defaults["code_languages"]  = {".py": "python",
 option_defaults["code_extensions"] = option_defaults["code_languages"].keys()
 option_defaults["text_extensions"] = [".txt"]
 
-# Number of spaces to indent code blocks in the code -> text conversion.[#]_
+# Number of spaces to indent code blocks in the code -> text conversion. [#]_
 # 
 # .. [#] For the text -> code conversion, the codeindent is determined by the
 #        first recognized code line (leading comment or first indented literal
@@ -107,37 +105,9 @@ option_defaults["text_extensions"] = [".txt"]
 
 option_defaults["codeindent"] =  2
 
-
  
 # Classes
 # =======
-# 
-# PushIterator
-# ------------
-# 
-# The PushIterator is a minimal implementation of an iterator with
-# backtracking from the `Effective Python Programming`_ OSCON 2005 tutorial by
-# Anthony Baxter. As the definition is small, it is inlined now. For the full
-# reasoning and documentation see `iterqueue.py`_.
-# 
-# .. _`Effective Python Programming`: 
-#    http://www.interlink.com.au/anthony/tech/talks/OSCON2005/effective_r27.pdf
-# 
-# .. _iterqueue.py: iterqueue.py.html
-# 
-# ::
-
-class PushIterator(object):
-    def __init__(self, iterable):
-        self.it = iter(iterable)
-        self.cache = []
-    def __iter__(self):
-        """Return `self`, as this is already an iterator"""
-        return self
-    def next(self):
-        return (self.cache and self.cache.pop()) or self.it.next()
-    def push(self, value):
-        self.cache.append(value)
 
 # Converter
 # ---------
@@ -165,7 +135,7 @@ class PushIterator(object):
 # overrides the ``__init__`` method, and adds auxiliary methods and
 # configuration attributes (options). ::
 
-class PyLitConverter(SimpleStates):
+class PyLitConverter(object):
     """parent class for `Text2Code` and `Code2Text`, the state machines
     converting between text source and code source of a literal program.
     """
@@ -205,11 +175,11 @@ class PyLitConverter(SimpleStates):
     state = 'header' 
 
 
-# Instantiation
-# ~~~~~~~~~~~~~
+# Converter.__init__
+# ~~~~~~~~~~~~~~~~~~
 # 
 # Initializing sets up the `data` attribute, an iterable object yielding
-# lines of the source to convert.[1]_   ::
+# lines of the source to convert. [1]_ ::
 
     def __init__(self, data, **keyw):
         """data   --  iterable data object 
@@ -217,15 +187,7 @@ class PyLitConverter(SimpleStates):
            **keyw --  all remaining keyword arguments are 
                       stored as class attributes 
         """
-
-# As the state handlers need backtracking, the data is wrapped in a
-# `PushIterator`_ if it doesnot already have a `push` method::
-
-        if hasattr(data, 'push'):
-            self.data = data
-        else:
-            self.data = PushIterator(data)
-        self._textindent = 0
+        self.data = data
 
 # Additional keyword arguments are stored as data attributes, overwriting the
 # class defaults::
@@ -243,7 +205,19 @@ class PyLitConverter(SimpleStates):
 # 
 #        To convert a string into a suitable object, use its splitlines method
 #        with the optional `keepends` argument set to True.
-# 
+
+# Converter.__call__
+# ~~~~~~~~~~~~~~~~~
+#
+# The special `__call__` method allows use of class instances as callable
+# objects. It returns the converted data as list
+# TODO: return a list of lines
+# ::
+
+    def __call__(self):
+        """Iterate over state-machine and return results as a list"""
+        return [token for token in self]
+
 # Converter.__str__
 # ~~~~~~~~~~~~~~~~~
 # 
@@ -256,12 +230,12 @@ class PyLitConverter(SimpleStates):
 # Converter.get_indent
 # ~~~~~~~~~~~~~~~~~~~~
 # 
-# Return the number of leading spaces in `string` after expanding tabs ::
+# Return the number of leading spaces in `line` after expanding tabs ::
 
-    def get_indent(self, string):
+    def get_indent(self, line):
         """Return the indentation of `string`.
         """
-        line = string.expandtabs()
+        # line = line.expandtabs()
         return len(line) - len(line.lstrip())
 
 # Converter.ensure_trailing_blank_line
@@ -287,11 +261,13 @@ class PyLitConverter(SimpleStates):
         """collect lines in a list 
         
         return list for each block of lines (paragraph) seperated by a 
-        blank line (whitespace only)
+        blank line (whitespace only).
+        
+        Also expand hard-tabs as these will lead to errors in indentation.
         """
         block = []
         for line in self.data:
-            block.append(line)
+            block.append(line.expandtabs())
             if not line.rstrip():
                 yield block
                 block = []
@@ -325,24 +301,48 @@ class Text2Code(PyLitConverter):
     """Convert a (reStructured) text source to code source
     """
 
-# INIT: call the parent classes init method. 
+# Text2Code.__iter__
+# ~~~~~~~~~~~~~~~~~~
 # 
-# If the `strip` argument is true, replace the `__iter_` method
-# with a special one that drops "spurious" blocks::
+# Data is collected into "blocks" separated by blank lines. The state is set
+# by the `set_state` method based on markers or indentation in the block.
+# ::
 
-    def __init__(self, data, **keyw):
-        PyLitConverter.__init__(self, data, **keyw)
-        if getattr(self, "strip", False):
-            self.__iter__ = self.iter_strip
+    def __iter__(self):
+        """Iterate over text source and return lists of code-source lines"""
 
-# Text2Code.header
-# ~~~~~~~~~~~~~~~~
-# 
-# Convert the header (leading rst comment block) to code::
+# At start, the check for "text" or "code" needs to check for the 
+# `header_string`::
+#
+        self.set_state = self.header_test
 
-    def header(self):
-        """Convert header (comment) to code"""
-        line = self.data_iterator.next()
+# indent of first non-blank code line, set in `code` method
+
+        self.codeindent = None  
+
+# text indent level (needed by the code handler to find the
+# end of code block)::
+
+        self._textindent = 0
+
+# The "code" to "text" state transition is detected in the  first non-code
+# block. `header_test` will set `set_state` to `code_test` which checks the
+# indentation.
+#
+# The "text" to "code" state transition is codified in the preceding "text"
+# block. This is why the "end-of-text" test is performed inside the `text`
+# state handler.
+        
+        for block in self.collect_blocks():
+            if self.state != "text":
+                self.state = self.set_state(block)
+            yield getattr(self, self.state)(block)
+            
+
+
+# Text2Code.header_test
+# ~~~~~~~~~~~~~~~~~~~~~
+# 
 
 # Test first line for rst comment: (We need to do this explicitely here, as
 # the code handler will only recognize the start of a text block if a line
@@ -358,165 +358,118 @@ class Text2Code(PyLitConverter):
 
 # 2. Convert any leading comment to code::
 
-        if line.startswith(self.header_string):
-            
-# Strip leading comment string (typically added by `Code2Text.header`) and
-# return the result of processing the data with the code handler::
-
-            self.data_iterator.push(line.replace(self.header_string, "", 1))
-            return self.code()
+    def header_test(self, lines):
+        """Return whether the header block is "text" or "code".
+        
+        Strip `self.header_string` if present."""
+        
+        # from now, do the normal code-block test
+        self.set_state = self.code_test
         
-# No header code found: Push back first non-header line and set state to
-# "text"::
+        if lines[0].startswith(self.header_string):
+            lines[0] = lines[0].replace(self.header_string, "", 1)
+            return "code"
+        return "text"
 
-        self.data_iterator.push(line)
-        self.state = 'text'
-        return []
+# Code2Text.code_test
 
-# Text2Code.text_handler_generator
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Test for end of code block, return next state. Also check if there are
+# lines less indented as `codeindent` -- which would lead to data loss by the
+# unindent done by the `code` method.
 # 
-# The 'text' handler processes everything that is not an indented literal
-# comment. Text is quoted with `self.comment_string` or filtered (with
-# strip=True). 
+# A literal block ends with the first less indented, nonblank line.
+# `self._textindent` is set by the text handler to the indent of the
+# preceding paragraph. 
 # 
-# It is implemented as a generator function that acts on the `data` iterator
-# and yields text blocks.
+# ::
+
+    def code_test(self, block):
+        """test code block for end of "code" state, return next state
+        """
+        indents = [self.get_indent(line) for line in block]
+        if min(indents) <= self._textindent:
+            return 'text'
+        return 'code'
+
+# TODO: insert blank line before the first line with too-small codeindent?
+# self.ensure_trailing_blank_line(lines, line)
+
+
+# Text2Code.text
+# ~~~~~~~~~~~~~~
 # 
-# Declaration and initialization::
+# The 'text' handler processes everything that is not an indented literal
+# comment. Text is quoted with `self.comment_string` or filtered (with
+# strip=True). ::
 
-    def text_handler_generator(self):
+    def text(self, lines):
         """Convert text blocks from rst to comment
         """
-        lines = []
         
-# Iterate over the data_iterator (which yields the data lines)::
-          
-        for line in self.data_iterator:
-            # print "Text: '%s'"%line
-            
-# Default action: add comment string and collect in `lines` list::
-
-            lines.append(self.comment_string + line)
+        lines = [self.comment_string + line for line in lines]
                 
-# Test for the end of the text block: a line that ends with `::` but is neither
-# a comment nor a directive::
+# Test for the end of the text block: does the second last line end with
+# `::` but is neither a comment nor a directive?
+# TODO: allow different code marking directives (for syntax color etc)
+# ::
 
-            if (line.rstrip().endswith("::")
-                and not line.lstrip().startswith("..")):
-                
-# End of text block is detected, now:
-# 
+        try:
+            line = lines[-2]
+        except IndexError:  # len(lines < 2)
+            line = ""
+        if (line.rstrip().endswith("::") 
+            and not line.lstrip().startswith("..")):
+            self.state = "code"
+ 
 # set the current text indent level (needed by the code handler to find the
-# end of code block) and set the state to "code" (i.e. the next call of
-# `self.next` goes to the code handler)::
+# end of code block)::
 
-                self._textindent = self.get_indent(line)
-                self.state = 'code'
-                
-# Ensure a trailing blank line (which is the paragraph separator in
-# reStructured Text. Look at the next line, if it is blank -- OK, if it is
-# not blank, push it back (it should be code) and add a line by calling the
-# `ensure_trailing_blank_line` method (which also issues a warning)::
+            self._textindent = self.get_indent(line)
 
-                line = self.data_iterator.next()
-                if line.lstrip():
-                    self.data_iterator.push(line) # push back
-                    self.ensure_trailing_blank_line(lines, line)
-                else:
-                    lines.append(line)
-
-# Now yield and reset the lines. (There was a function call to remove a
-# literal marker (if on a line on itself) to shorten the comment. However,
-# this behaviour was removed as the resulting difference in line numbers leads
-# to misleading error messages in doctests)::
-
-                #remove_literal_marker(lines)
-                yield lines
-                lines = []
-                
-# End of data: if we "fall of" the iteration loop, just join and return the
-# lines::
+# remove the comment from the last line again (it's a separator between text
+# and code blocks).
 
-        yield lines
+            lines[-1] = lines[-1].replace(self.comment_string, "", 1)
 
+        if self.strip:
+            return []
+        return lines
+    
+# TODO: Ensure a trailing blank line? Would need to test all
+# text lines for end-of-text marker and add a line by calling the
+# `ensure_trailing_blank_line` method (which also issues a warning)::
 
-# Text2Code.code_handler_generator
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# 
-# The `code` handler is called when a literal block marker is encounterd. It
-# returns a code block (indented literal block), removing leading whitespace
-# up to the indentation of the first code line in the file (this deviation
-# from docutils behaviour allows indented blocks of Python code).
+
+
+# Text2Code.code
+# ~~~~~~~~~~~~~~
 # 
-# As the code handler detects the switch to "text" state by looking at
-# the line indents, it needs to push back the last probed data token. I.e.
-# the  data_iterator must support a `push` method. (This is the
-# reason for the use of the PushIterator class in `__init__`.) ::
+# The `code` handler is called with an indented literal block. It removes
+# leading whitespace up to the indentation of the first code line in the file
+# (this deviation from docutils behaviour allows indented blocks of Python
+# code). ::
 
-    def code_handler_generator(self):
+    def code(self, block): 
         """Convert indented literal blocks to source code
         """
-        lines = []
-        codeindent = None  # indent of first non-blank code line, set below
-        indent_string = "" # leading whitespace chars ...
         
-# Iterate over the lines in the input data::
-
-        for line in self.data_iterator:
-            # print "Code: '%s'"%line
-            
-# Pass on blank lines (no test for end of code block needed|possible)::
+# If still unset, determine the code indentation from first non-blank code
+# line::
 
-            if not line.rstrip():
-                lines.append(line.replace(indent_string, "", 1))
-                continue
+        if self.codeindent is None:
+            self.codeindent = self.get_indent(block[0])
 
-# Test for end of code block:
-# 
-# A literal block ends with the first less indented, nonblank line.
-# `self._textindent` is set by the text handler to the indent of the
-# preceding paragraph. 
-# 
-# To prevent problems with different tabulator settings, hard tabs in code
-# lines  are expanded with the `expandtabs` string method when calculating the
-# indentation (i.e. replaced by 8 spaces, by default).
-# 
-# ::
-
-            if self.get_indent(line) <= self._textindent:
-                # push back line
-                self.data_iterator.push(line) 
-                self.state = 'text'
-                # append blank line (if not already present)
-                self.ensure_trailing_blank_line(lines, line)
-                yield lines
-                # reset list of lines
-                lines = []
-                continue
+# Check if we can safely unindent the code block::
 
-# OK, we are sure now that the current line is neither blank nor a text line.
-# 
-# If still unset, determine the code indentation from first non-blank code
-# line::
+        for line in block:
+            if line.lstrip() and self.get_indent(line) < self.codeindent:
+                raise ValueError, "code block contains line less indented " \
+                                "than %d spaces \n%r"%(self.codeindent, block)
 
-            if codeindent is None and line.lstrip():
-                codeindent = self.get_indent(line)
-                indent_string = line[:codeindent]
-            
-# Append unindented line to lines cache (but check if we can safely unindent
-# first)::
+# return unindented block::
 
-            if not line.startswith(indent_string):
-                raise ValueError, "cannot unindent line %r,\n"%line \
-                + "  doesnot start with code indent string %r"%indent_string
-            
-            lines.append(line[codeindent:])
+        return [line.replace(" "*self.codeindent, "", 1) for line in block]
 
-# No more lines in the input data: just return what we have::
-            
-        yield lines
-                        
 
 # Txt2Code.remove_literal_marker
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -525,7 +478,9 @@ class Text2Code(PyLitConverter):
 # 
 # While cleaning up the code source, it leads to confusion for doctest and
 # searches (e.g. grep) as line-numbers between text and code source will
-# differ. ::
+# differ. 
+# The code is left here, as it can be used for conversion of
+# a literal marker to a different code-marker::
 
     def remove_literal_marker(list):
         try:
@@ -536,24 +491,6 @@ class Text2Code(PyLitConverter):
         except IndexError:
             pass
 
-# Text2Code.iter_strip
-# ~~~~~~~~~~~~~~~~~~~~
-# 
-# Modification of the `simplestates.__iter__` method that will replace it when
-# the `strip` keyword argument is `True` during class instantiation: 
-# 
-# Iterate over class instances dropping text blocks::
-
-    def iter_strip(self):
-        """Generate and return an iterator dropping text blocks
-        """
-        self.data_iterator = self.data
-        self._initialize_state_generators()
-        while True:
-            yield getattr(self, self.state)()
-            getattr(self, self.state)() # drop text block
-
-
 
 # Code2Text
 # ---------
@@ -689,7 +626,7 @@ class Code2Text(PyLitConverter):
 
         return lines
                      
-  
+
 # Code2Text.code
 # ~~~~~~~~~~~~~~
 # 
@@ -1275,7 +1212,7 @@ def main(args=sys.argv[1:], **option_defaults):
         
 # If not (and input and output are from files), set the modification time
 # (`mtime`) of the output file to the one of the input file to indicate that
-# the contained information is equal.[#]_ ::
+# the contained information is equal. [#]_ ::
 
     else:
         try:
@@ -1307,11 +1244,7 @@ if __name__ == '__main__':
 # Options
 # -------
 # 
-# * Collect option defaults in a dictionary (on module level)
-# 
-#   Facilitates the setting of options in programmatic use
-#   
-#   Use templates for the "intelligent guesses" (with Python syntax for string
+# * Use templates for the "intelligent guesses" (with Python syntax for string
 #   replacement with dicts: ``"hello %(what)s" % {'what': 'world'}``)
 # 
 # * Is it sensible to offer the `header_string` option also as command line
diff --git a/src/simplestates.py b/src/simplestates.py
index 719fc67..8fd652f 100644
--- a/src/simplestates.py
+++ b/src/simplestates.py
@@ -115,7 +115,7 @@ class SimpleStates:
 # * remaining keyword arguments are stored as class attributes (or methods, if
 #   they are function objects) overwriting class defaults (a neat little trick
 #   I found somewhere on the net)
-#
+# 
 #   ..note: This is the same as `self.__dict__.update(keyw)`. However,
 #           the "Tutorial" advises to confine the direct use of `__dict__`
 #           to post-mortem analysis or the like...
@@ -133,8 +133,8 @@ class SimpleStates:
             setattr(self, key, value)
 
 
-#
-#
+
+
 
 # Iteration over class instances
 # ------------------------------
diff --git a/test/pylit_test.py b/test/pylit_test.py
index 33ebd9d..b37587e 100644
--- a/test/pylit_test.py
+++ b/test/pylit_test.py
@@ -179,7 +179,7 @@ def test_Text2Code_malindented_code_line():
     data1 = ["..    #!/usr/bin/env python\n", # indent == 4 * " "
             "\n",
             "  print 'hello world'"]          # indent == 2 * " "
-    data2 = ["..\t#!/usr/bin/env python\n",   # indent == 4 * " "
+    data2 = ["..\t#!/usr/bin/env python\n",   # indent == 8 * " "
             "\n",
             "  print 'hello world'"]          # indent == 2 * " "
     for data in (data1, data2):
@@ -204,14 +204,15 @@ def test_Text2Code_malindented_code_line():
 ## Assuming that no double colon at end of line occures accidentially,
 ## pylit will fix this and issue a warning::
 
-textsamples["ensure blank line after text"] = (
-"""text followed by a literal block::
-  block1 = 'first block'
-""",
-"""# text followed by a literal block::
-
-block1 = 'first block'
-""")
+# Do we need this feature? (Complicates code a lot)
+# textsamples["ensure blank line after text"] = (
+# """text followed by a literal block::
+#   block1 = 'first block'
+# """,
+# """# text followed by a literal block::
+# 
+# block1 = 'first block'
+# """)
 
 ## Text follows code block without blank line
 ## ''''''''''''''''''''''''''''''''''''''''''
@@ -225,18 +226,19 @@ block1 = 'first block'
 ## Assuming that the unindent is not accidential, pylit fixes this and issues a
 ## warning::
 
-textsamples["ensure blank line after code"] = (
-"""::
-
-  block1 = 'first block'
-more text
-""",
-"""# ::
-
-block1 = 'first block'
+# Do we need this feature? (Complicates code a lot)
+# textsamples["ensure blank line after code"] = (
+# """::
+# 
+#   block1 = 'first block'
+# more text
+# """,
+# """# ::
+# 
+# block1 = 'first block'
 
 # more text
-""")
+# """)
 
 ## A double colon on a line on its own
 ## '''''''''''''''''''''''''''''''''''
-- 
2.11.4.GIT


2007-02-22:	0.2.8 set mtime of outfile to the one of infile
2007-02-27:	0.3 new Code2Text converter after an idea by Riccardo Murri
2007-02-27:	0.3 new Code2Text converter after an idea by Riccardo Murri +a new Text2Code will follow soon +explicite option_defaults dict for easier customization
[4]	Make sure the corresponding file object (here out_stream) is +
[4]	Make sure the corresponding file object (here out_stream) is closed, as otherwise the change will be overwritten when close is called afterwards (either explicitely or at program exit).