r21325: delete children in reverse order since the array is manipulated during the...
[Samba/gbeck.git] / webapps / qooxdoo-0.6.3-sdk / frontend / framework / tool / modules / textile.py
bloba54e89272eb4b4f88ac20899f27a92c7a386cb47
1 #!/usr/bin/env python
2 # _*_ coding: latin1 _*_
4 """This is Textile
5 A Humane Web Text Generator
7 TODO:
8 * Make it work with Python 2.1.
9 * Make it work with Python 1.5.2? Or that's too optimistic?
11 ---
12 To get an overview of all PyTextile's features, simply
13 type 'tell me about textile.' in a single line.
14 """
16 __authors__ = ["Roberto A. F. De Almeida (roberto@dealmeida.net)",
17 "Mark Pilgrim (f8dy@diveintomark.org)"]
18 __version__ = "2.0.10"
19 __date__ = "2004/10/06"
20 __copyright__ = """
21 Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
22 Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
23 All rights reserved.
25 Original PHP version:
26 Version 1.0
27 21 Feb, 2003
29 Copyright (c) 2003, Dean Allen, www.textism.com
30 All rights reserved.
32 Parts of the documentation and some of the regular expressions are (c) Brad
33 Choate, http://bradchoate.com/. Thanks, Brad!
34 """
35 __license__ = """
36 Redistribution and use in source and binary forms, with or without
37 modification, are permitted provided that the following conditions are met:
39 * Redistributions of source code must retain the above copyright notice,
40 this list of conditions and the following disclaimer.
42 * Redistributions in binary form must reproduce the above copyright notice,
43 this list of conditions and the following disclaimer in the documentation
44 and/or other materials provided with the distribution.
46 * Neither the name Textile nor the names of its contributors may be used to
47 endorse or promote products derived from this software without specific
48 prior written permission.
50 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 POSSIBILITY OF SUCH DAMAGE.
61 """
62 __history__ = """
63 1.0 - 2003/03/19 - MAP - initial release
64 1.01 - 2003/03/19 - MAP - don't strip whitespace within <pre> tags;
65 map high-bit ASCII to HTML numeric entities
66 1.02 - 2003/03/19 - MAP - changed hyperlink qtag expression to only
67 match valid URL characters (per RFC 2396); fixed preg_replace to
68 not match across line breaks (solves lots of problems with
69 mistakenly matching overlapping inline markup); fixed whitespace
70 stripping to only strip whitespace from beginning and end of lines,
71 not immediately before and after HTML tags.
72 1.03 - 2003/03/20 - MAP - changed hyperlink qtag again to more
73 closely match original Textile (fixes problems with links
74 immediately followed by punctuation -- somewhere Dean is
75 grinning right now); handle curly apostrophe with "ve"
76 contraction; clean up empty titles at end.
77 1.04 - 2003/03/23 - MAP - lstrip input to deal with extra spaces at
78 beginning of first line; tweaked list loop to handle consecutive lists
79 1.1 - 2003/06/06 - MAP - created initial test suite for links and images,
80 and fixed a bunch of related bugs to pass them
81 1.11 - 2003/07/20 - CL - don't demoronise unicode strings; handle
82 "they're" properly
83 1.12 - 2003/07/23 - GW - print debug messages to stderr; handle bq(cite).
84 1.13 - 2003/07/23 - MAP - wrap bq. text in <p>...</p>
85 2 - 2004/03/26 - RAFA - rewritten from (almost) scratch to include
86 all features from Textile 2 and a little bit more.
87 2.0.1 - 2004/04/02 - RAFA - Fixed validating function that uses uTidyLib.
88 2.0.2 - 2004/04/02 - RAFA - Fixed problem with caps letters in URLs.
89 2.0.3 - 2004/04/19 - RAFA - Multiple classes are allowed, thanks to Dave
90 Anderson. The "lang" attribute is now removed from <code>, to be valid
91 XHTML. Fixed <span class="caps">UCAS</span> problem.
92 2.0.4 - 2004/05/20 - RAFA, CLB - Added inline formatting to table cells.
93 Curt Bergmann fixed a bug with the colspan formatting. Added Amazon
94 Associated id.
95 2.0.5 - 2004/06/01 - CL - Applied patch from Chris Lawrence to (1) fix
96 that Amazon associates ID was being added to all search URIs, (2)
97 customize the Amazon site used with the AMAZON variable, and (3) added
98 an "isbn" URI type that links directly to an Amazon product by ISBN or
99 Amazon ASIN.
100 2.0.6 - 2004/06/02 - RAFA - Fixed CAPS problem, again. I hope this is
101 the last time.
102 2.0.7 - 2004/06/04 - RAFA, MW - Fixed bullet macro, thanks to Adam
103 Messinger. Added patch from Michal Wallace changing {}.pop() for
104 compatibility with Python 2.2.x.
105 2.0.8 - 2004/06/25 - RAFA - Strip tags when adding the content from a
106 footnote to the reference link. Escaped '<' and '>' in the self-
107 generated documentation.
108 2.0.9 - 2004/10/04 - RAFA - In images, if ALT is not defined, add an
109 empty attribute. Added "LaTeX" style open/close quotes. Fixed a bug
110 where the acronym definition was being formatted with inline rules.
111 Handle "broken" lines correctly, removing the <br /> from inside
112 split HTML tags.
113 2.0.10 - 2004/10/06 - RAFA, LO - Escape all non-escaped ampersands.
114 Applied "trivial patch" from Ludvig Omholt to remove newline right
115 after the <pre> tag.
118 # Set your encoding here.
119 ENCODING = 'utf-8'
121 # Output? Non-ASCII characters will be automatically
122 # converted to XML entities if you choose ASCII.
123 OUTPUT = 'utf-8'
125 # PyTextile can optionally validate the generated
126 # XHTML code. We can use either mxTidy or uTidyLib.
127 # You can change the default behaviour here.
128 VALIDATE = 0
130 # If you want h1. to be translated to something other
131 # than <h1>, change this offset. You can also pass it
132 # as an argument to textile().
133 HEAD_OFFSET = 0
135 # If you want to use itex2mml, specify the full path
136 # to the binary here. You can download it from here:
137 # http://golem.ph.utexas.edu/~distler/blog/files/itexToMML.tar.gz
138 itex2mml = None
139 #itex2mml = '/usr/local/bin/itex2MML'
140 #itex2mml = '/usr/people/almeida/bin/itex2MML'
142 # PyTextile can optionally sanitize the generated XHTML,
143 # which is good for weblog comments or if you don't trust
144 # yourself.
145 SANITIZE = 1
147 # Turn debug on?
148 DEBUGLEVEL = 0
150 # Amazon associate for links: "keywords":amazon
151 # If you don't have one, please consider leaving mine here as
152 # a small compensation for writing PyTextile. It's commented
153 # off as default.
154 #amazon_associate_id = 'bomtempo-21'
155 amazon_associate_id = None
157 #AMAZON = 'www.amazon.co.uk'
158 AMAZON = 'www.amazon.com'
160 import re
161 import sys
162 import os
163 import sgmllib
164 import unicodedata
167 def _in_tag(text, tag):
168 """Extracts text from inside a tag.
170 This function extracts the text from inside a given tag.
171 It's useful to get the text between <body></body> or
172 <pre></pre> when using the validators or the colorizer.
174 if text.count('<%s' % tag):
175 text = text.split('<%s' % tag, 1)[1]
176 if text.count('>'):
177 text = text.split('>', 1)[1]
178 if text.count('</%s' % tag):
179 text = text.split('</%s' % tag, 1)[0]
181 text = text.strip().replace('\r\n', '\n')
183 return text
186 # If you want PyTextile to automatically colorize
187 # your Python code, you need the htmlizer module
188 # from Twisted. (You can just grab this file from
189 # the distribution, it has no other dependencies.)
190 try:
191 #from twisted.python import htmlizer
192 import htmlizer
193 from StringIO import StringIO
195 def _color(code):
196 """Colorizer Python code.
198 This function wraps a text string in a StringIO,
199 and passes it to the htmlizer function from
200 Twisted.
202 # Fix line continuations.
203 code = preg_replace(r' \\\n', ' \\\\\n', code)
205 code_in = StringIO(code)
206 code_out = StringIO()
208 htmlizer.filter(code_in, code_out)
210 # Remove <pre></pre> from input.
211 code = _in_tag(code_out.getvalue(), 'pre')
213 # Fix newlines.
214 code = code.replace('<span class="py-src-newline">\n</span>', '<span class="py-src-newline"></span>\n')
216 return code
218 except ImportError:
219 htmlizer = None
222 # PyTextile can optionally validate the generated
223 # XHTML code using either mxTidy or uTidyLib.
224 try:
225 # This is mxTidy.
226 from mx.Tidy import Tidy
228 def _tidy1(text):
229 """mxTidy's XHTML validator.
231 This function is a wrapper to mxTidy's validator.
233 nerrors, nwarnings, text, errortext = Tidy.tidy(text, output_xhtml=1, numeric_entities=1, wrap=0)
234 return _in_tag(text, 'body')
236 _tidy = _tidy1
238 except ImportError:
239 try:
240 # This is uTidyLib.
241 import tidy
243 def _tidy2(text):
244 """uTidyLib's XHTML validator.
246 This function is a wrapper to uTidyLib's validator.
248 text = tidy.parseString(text, output_xhtml=1, add_xml_decl=0, indent=0, tidy_mark=0)
249 return _in_tag(str(text), 'body')
251 _tidy = _tidy2
253 except ImportError:
254 _tidy = None
257 # This is good for debugging.
258 def _debug(s, level=1):
259 """Outputs debug information to sys.stderr.
261 This function outputs debug information if DEBUGLEVEL is
262 higher than a given treshold.
264 if DEBUGLEVEL >= level: print >> sys.stderr, s
267 #############################
268 # Useful regular expressions.
269 parameters = {
270 # Horizontal alignment.
271 'align': r'''(?:(?:<>|[<>=]) # Either '<>', '<', '>' or '='
272 (?![^\s]*(?:<>|[<>=]))) # Look-ahead to ensure it happens once
273 ''',
275 # Horizontal padding.
276 'padding': r'''(?:[\(\)]+) # Any number of '(' and/or ')'
277 ''',
279 # Class and/or id.
280 'classid': r'''( #
281 (?:\(\#[\w]+\)) # (#id)
283 (?:\((?:[\w]+(?:\s[\w]+)*) #
284 (?:\#[\w]+)?\)) # (class1 class2 ... classn#id) or (class1 class2 ... classn)
286 (?![^\s]*(?:\([\w#]+\))) # must happen once
287 ''',
289 # Language.
290 'lang': r'''(?:\[[\w-]+\]) # [lang]
291 (?![^\s]*(?:\[.*?\])) # must happen once
292 ''',
294 # Style.
295 'style': r'''(?:{[^\}]+}) # {style}
296 (?![^\s]*(?:{.*?})) # must happen once
297 ''',
300 res = {
301 # Punctuation.
302 'punct': r'''[\!"#\$%&'()\*\+,\-\./:;<=>\?@\[\\\]\^_`{\|}\~]''',
304 # URL regular expression.
305 'url': r'''(?=[a-zA-Z0-9./#]) # Must start correctly
306 (?: # Match the leading part (proto://hostname, or just hostname)
307 (?:ftp|https?|telnet|nntp) # protocol
308 :// # ://
309 (?: # Optional 'username:password@'
310 \w+ # username
311 (?::\w+)? # optional :password
312 @ # @
313 )? #
314 [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
316 (?:mailto:)? # Optional mailto:
317 [-\+\w]+ # username
318 \@ # at
319 [-\w]+(?:\.\w[-\w]*)+ # hostname
321 (?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+ # domain without protocol
322 (?:com\b # TLD
323 | edu\b #
324 | biz\b #
325 | gov\b #
326 | in(?:t|fo)\b # .int or .info
327 | mil\b #
328 | net\b #
329 | org\b #
330 | museum\b #
331 | aero\b #
332 | coop\b #
333 | name\b #
334 | pro\b #
335 | [a-z][a-z]\b # two-letter country codes
337 )? #
338 (?::\d+)? # Optional port number
339 (?: # Rest of the URL, optional
340 /? # Start with '/'
341 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
342 (?: #
343 [.!,?;:]+ # One or more of these
344 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish with these
345 #'" # # or ' or "
346 )* #
347 )? #
348 ''',
351 # Block attributes.
352 'battr': r'''(?P<parameters> #
353 (?: %(align)s # alignment
354 | %(classid)s # class and/or id
355 | %(padding)s # padding tags
356 | %(lang)s # [lang]
357 | %(style)s # {style}
358 )+ #
359 )? #
360 ''' % parameters,
362 # (Un)ordered list attributes.
363 'olattr': r'''(?P<olparameters> #
364 (?: %(align)s # alignment
365 | ((?:\(\#[\w]+\)) # (#id)
367 (?:\((?:[\w]+(?:\s[\w]+)*) #
368 (?:\#[\w]+)?\)) # (class1 class2 ... classn#id) or (class1 class2 ... classn)
370 | %(padding)s # padding tags
371 | %(lang)s # [lang]
372 | %(style)s # {style}
373 )+ #
374 )? #
375 ''' % parameters,
377 # List item attributes.
378 'liattr': r'''(?P<liparameters> #
379 (?: %(align)s # alignment
380 | %(classid)s # class and/or id
381 | %(padding)s # padding tags
382 | %(lang)s # [lang]
383 | %(style)s # {style}
384 )+ #
385 )? #
386 ''' % parameters,
388 # Qtag attributes.
389 'qattr': r'''(?P<parameters> #
390 (?: %(classid)s # class and/or id
391 | %(lang)s # [lang]
392 | %(style)s # {style}
393 )+ #
394 )? #
395 ''' % parameters,
397 # Link attributes.
398 'lattr': r'''(?P<parameters> # Links attributes
399 (?: %(align)s # alignment
400 | %(classid)s # class and/or id
401 | %(lang)s # [lang]
402 | %(style)s # {style}
403 )+ #
404 )? #
405 ''' % parameters,
407 # Image attributes.
408 'iattr': r'''(?P<parameters> #
409 (?: #
410 (?: [<>]+ # horizontal alignment tags
411 (?![^\s]*(?:[<>]))) # (must happen once)
412 | #
413 (?: [\-\^~]+ # vertical alignment tags
414 (?![^\s]*(?:[\-\^~]))) # (must happen once)
415 | %(classid)s # class and/or id
416 | %(padding)s # padding tags
417 | %(style)s # {style}
418 )+ #
419 )? #
420 ''' % parameters,
422 # Resize attributes.
423 'resize': r'''(?: #
424 (?:([\d]+%?)x([\d]+%?)) # 20x10
426 (?: # or
427 (?:([\d]+)%?w\s([\d]+)%?h) # 10h 20w
428 | # or
429 (?:([\d]+)%?h\s([\d]+)%?w) # 20w 10h
431 )? #
432 ''',
434 # Table attributes.
435 'tattr': r'''(?P<parameters> #
436 (?: #
437 (?: [\^~] # vertical alignment
438 (?![^\s]*(?:[\^~]))) # (must happen once)
439 | %(align)s # alignment
440 | %(lang)s # [lang]
441 | %(style)s # {style}
442 | %(classid)s # class and/or id
443 | %(padding)s # padding
444 | _ # is this a header row/cell?
445 | \\\d+ # colspan
446 | /\d+ # rowspan
447 )+ #
448 )? #
449 ''' % parameters,
453 def preg_replace(pattern, replacement, text):
454 """Alternative re.sub that handles empty groups.
456 This acts like re.sub, except it replaces empty groups with ''
457 instead of raising an exception.
460 def replacement_func(matchobj):
461 counter = 1
462 rc = replacement
463 _debug(matchobj.groups())
464 for matchitem in matchobj.groups():
465 if not matchitem:
466 matchitem = ''
468 rc = rc.replace(r'\%s' % counter, matchitem)
469 counter += 1
471 return rc
473 p = re.compile(pattern)
474 _debug(pattern)
476 return p.sub(replacement_func, text)
479 def html_replace(pattern, replacement, text):
480 """Replacement outside HTML tags.
482 Does a preg_replace only outside HTML tags.
484 # If there is no html, do a simple search and replace.
485 if not re.search(r'''<.*>''', text):
486 return preg_replace(pattern, replacement, text)
488 else:
489 lines = []
490 # Else split the text into an array at <>.
491 for line in re.split('(<.*?>)', text):
492 if not re.match('<.*?>', line):
493 line = preg_replace(pattern, replacement, line)
495 lines.append(line)
497 return ''.join(lines)
500 # PyTextile can optionally sanitize the generated XHTML,
501 # which is good for weblog comments. This code is from
502 # Mark Pilgrim's feedparser.
503 class _BaseHTMLProcessor(sgmllib.SGMLParser):
504 elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
505 'img', 'input', 'isindex', 'link', 'meta', 'param']
507 def __init__(self):
508 sgmllib.SGMLParser.__init__(self)
510 def reset(self):
511 self.pieces = []
512 sgmllib.SGMLParser.reset(self)
514 def normalize_attrs(self, attrs):
515 # utility method to be called by descendants
516 attrs = [(k.lower(), sgmllib.charref.sub(lambda m: unichr(int(m.groups()[0])), v).strip()) for k, v in attrs]
517 attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
518 return attrs
520 def unknown_starttag(self, tag, attrs):
521 # called for each start tag
522 # attrs is a list of (attr, value) tuples
523 # e.g. for <pre class="screen">, tag="pre", attrs=[("class", "screen")]
524 strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
525 if tag in self.elements_no_end_tag:
526 self.pieces.append("<%(tag)s%(strattrs)s />" % locals())
527 else:
528 self.pieces.append("<%(tag)s%(strattrs)s>" % locals())
530 def unknown_endtag(self, tag):
531 # called for each end tag, e.g. for </pre>, tag will be "pre"
532 # Reconstruct the original end tag.
533 if tag not in self.elements_no_end_tag:
534 self.pieces.append("</%(tag)s>" % locals())
536 def handle_charref(self, ref):
537 # called for each character reference, e.g. for "&#160;", ref will be "160"
538 # Reconstruct the original character reference.
539 self.pieces.append("&#%(ref)s;" % locals())
541 def handle_entityref(self, ref):
542 # called for each entity reference, e.g. for "&copy;", ref will be "copy"
543 # Reconstruct the original entity reference.
544 self.pieces.append("&%(ref)s;" % locals())
546 def handle_data(self, text):
547 # called for each block of plain text, i.e. outside of any tag and
548 # not containing any character or entity references
549 # Store the original text verbatim.
550 self.pieces.append(text)
552 def handle_comment(self, text):
553 # called for each HTML comment, e.g. <!-- insert Javascript code here -->
554 # Reconstruct the original comment.
555 self.pieces.append("<!--%(text)s-->" % locals())
557 def handle_pi(self, text):
558 # called for each processing instruction, e.g. <?instruction>
559 # Reconstruct original processing instruction.
560 self.pieces.append("<?%(text)s>" % locals())
562 def handle_decl(self, text):
563 # called for the DOCTYPE, if present, e.g.
564 # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
565 # "http://www.w3.org/TR/html4/loose.dtd">
566 # Reconstruct original DOCTYPE
567 self.pieces.append("<!%(text)s>" % locals())
569 def output(self):
570 """Return processed HTML as a single string"""
571 return "".join(self.pieces)
574 class _HTMLSanitizer(_BaseHTMLProcessor):
575 acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
576 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
577 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
578 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
579 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
580 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
581 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
582 'thead', 'tr', 'tt', 'u', 'ul', 'var']
584 acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
585 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
586 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
587 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
588 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
589 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
590 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
591 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
592 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
593 'usemap', 'valign', 'value', 'vspace', 'width']
595 unacceptable_elements_with_end_tag = ['script', 'applet']
597 # This if for MathML.
598 mathml_elements = ['math', 'mi', 'mn', 'mo', 'mrow', 'msup']
599 mathml_attributes = ['mode', 'xmlns']
601 acceptable_elements = acceptable_elements + mathml_elements
602 acceptable_attributes = acceptable_attributes + mathml_attributes
604 def reset(self):
605 _BaseHTMLProcessor.reset(self)
606 self.unacceptablestack = 0
608 def unknown_starttag(self, tag, attrs):
609 if not tag in self.acceptable_elements:
610 if tag in self.unacceptable_elements_with_end_tag:
611 self.unacceptablestack += 1
612 return
613 attrs = self.normalize_attrs(attrs)
614 attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]
615 _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
617 def unknown_endtag(self, tag):
618 if not tag in self.acceptable_elements:
619 if tag in self.unacceptable_elements_with_end_tag:
620 self.unacceptablestack -= 1
621 return
622 _BaseHTMLProcessor.unknown_endtag(self, tag)
624 def handle_pi(self, text):
625 pass
627 def handle_decl(self, text):
628 pass
630 def handle_data(self, text):
631 if not self.unacceptablestack:
632 _BaseHTMLProcessor.handle_data(self, text)
635 class Textiler:
636 """Textile formatter.
638 This is the base class for the PyTextile text processor.
640 def __init__(self, text=''):
641 """Instantiate the class, passing the text to be formatted.
643 Here we pre-process the text and collect all the link
644 lookups for later.
646 self.text = text
648 # Basic regular expressions.
649 self.res = res
651 # Smart searches.
652 self.searches = {}
653 self.searches['imdb'] = 'http://www.imdb.com/Find?for=%s'
654 self.searches['google'] = 'http://www.google.com/search?q=%s'
655 self.searches['python'] = 'http://www.python.org/doc/current/lib/module-%s.html'
656 if amazon_associate_id:
657 self.searches['isbn'] = ''.join(['http://', AMAZON, '/exec/obidos/ASIN/%s/', amazon_associate_id])
658 self.searches['amazon'] = ''.join(['http://', AMAZON, '/exec/obidos/external-search?mode=blended&keyword=%s&tag=', amazon_associate_id])
659 else:
660 self.searches['isbn'] = ''.join(['http://', AMAZON, '/exec/obidos/ASIN/%s'])
661 self.searches['amazon'] = ''.join(['http://', AMAZON, '/exec/obidos/external-search?mode=blended&keyword=%s'])
663 # These are the blocks we know.
664 self.signatures = [
665 # Paragraph.
666 (r'''^p # Paragraph signature
667 %(battr)s # Paragraph attributes
668 (?P<dot>\.) # .
669 (?P<extend>\.)? # Extended paragraph denoted by a second dot
670 \s # whitespace
671 (?P<text>.*) # text
672 ''' % self.res, self.paragraph),
674 # Pre-formatted text.
675 (r'''^pre # Pre signature
676 %(battr)s # Pre attributes
677 (?P<dot>\.) # .
678 (?P<extend>\.)? # Extended pre denoted by a second dot
679 \s # whitespace
680 (?P<text>.*) # text
681 ''' % self.res, self.pre),
683 # Block code.
684 (r'''^bc # Blockcode signature
685 %(battr)s # Blockcode attributes
686 (?P<dot>\.) # .
687 (?P<extend>\.)? # Extended blockcode denoted by a second dot
688 \s # whitespace
689 (?P<text>.*) # text
690 ''' % self.res, self.bc),
692 # Blockquote.
693 (r'''^bq # Blockquote signature
694 %(battr)s # Blockquote attributes
695 (?P<dot>\.) # .
696 (?P<extend>\.)? # Extended blockquote denoted by a second dot
697 (:(?P<cite> # Optional cite attribute
699 %(url)s # URL
700 | "[\w]+(?:\s[\w]+)*" # "Name inside quotes"
701 )) #
702 )? #
703 \s # whitespace
704 (?P<text>.*) # text
705 ''' % self.res, self.blockquote),
707 # Header.
708 (r'''^h # Header signature
709 (?P<header>\d) # Header number
710 %(battr)s # Header attributes
711 (?P<dot>\.) # .
712 (?P<extend>\.)? # Extended header denoted by a second dot
713 \s # whitespace
714 (?P<text>.*) # text
715 ''' % self.res, self.header),
717 # Footnote.
718 (r'''^fn # Footnote signature
719 (?P<footnote>[\d]+) # Footnote number
720 (?P<dot>\.) # .
721 (?P<extend>\.)? # Extended footnote denoted by a second dot
722 \s # whitespace
723 (?P<text>.*) # text
724 ''', self.footnote),
726 # Definition list.
727 (r'''^dl # Definition list signature
728 %(battr)s # Definition list attributes
729 (?P<dot>\.) # .
730 (?P<extend>\.)? # Extended definition list denoted by a second dot
731 \s # whitespace
732 (?P<text>.*) # text
733 ''' % self.res, self.dl),
735 # Ordered list (attributes to first <li>).
736 (r'''^%(olattr)s # Ordered list attributes
737 \# # Ordered list signature
738 %(liattr)s # List item attributes
739 (?P<dot>\.)? # .
740 \s # whitespace
741 (?P<text>.*) # text
742 ''' % self.res, self.ol),
744 # Unordered list (attributes to first <li>).
745 (r'''^%(olattr)s # Unrdered list attributes
746 \* # Unordered list signature
747 %(liattr)s # Unordered list attributes
748 (?P<dot>\.)? # .
749 \s # whitespace
750 (?P<text>.*) # text
751 ''' % self.res, self.ul),
753 # Escaped text.
754 (r'''^==?(?P<text>.*?)(==)?$ # Escaped text
755 ''', self.escape),
757 (r'''^(?P<text><.*)$ # XHTML tag
758 ''', self.escape),
760 # itex code.
761 (r'''^(?P<text> # itex code
762 \\\[ # starts with \[
763 .*? # complicated mathematical equations go here
764 \\\]) # ends with \]
765 ''', self.itex),
767 # Tables.
768 (r'''^table # Table signature
769 %(tattr)s # Table attributes
770 (?P<dot>\.) # .
771 (?P<extend>\.)? # Extended blockcode denoted by a second dot
772 \s # whitespace
773 (?P<text>.*) # text
774 ''' % self.res, self.table),
776 # Simple tables.
777 (r'''^(?P<text>
780 ''', self.table),
782 # About.
783 (r'''^(?P<text>tell\sme\sabout\stextile\.)$''', self.about),
787 def preprocess(self):
788 """Pre-processing of the text.
790 Remove whitespace, fix carriage returns.
792 # Remove whitespace.
793 self.text = self.text.strip()
795 # Zap carriage returns.
796 self.text = self.text.replace("\r\n", "\n")
797 self.text = self.text.replace("\r", "\n")
799 # Minor sanitizing.
800 self.text = self.sanitize(self.text)
803 def grab_links(self):
804 """Grab link lookups.
806 Check the text for link lookups, store them in a
807 dictionary, and clean them up.
809 # Grab links like this: '[id]example.com'
810 links = {}
811 p = re.compile(r'''(?:^|\n)\[([\w]+?)\](%(url)s)(?:$|\n)''' % self.res, re.VERBOSE)
812 for key, link in p.findall(self.text):
813 links[key] = link
815 # And clear them from the text.
816 self.text = p.sub('', self.text)
818 return links
821 def process(self, head_offset=HEAD_OFFSET, validate=VALIDATE, sanitize=SANITIZE, output=OUTPUT, encoding=ENCODING):
822 """Process the text.
824 Here we actually process the text, splitting the text in
825 blocks and applying the corresponding function to each
826 one of them.
828 # Basic global changes.
829 self.preprocess()
831 # Grab lookup links and clean them from the text.
832 self._links = self.grab_links()
834 # Offset for the headers.
835 self.head_offset = head_offset
837 # Process each block.
838 self.blocks = self.split_text()
840 text = []
841 for [function, captures] in self.blocks:
842 text.append(function(**captures))
844 text = '\n\n'.join(text)
846 # Add titles to footnotes.
847 text = self.footnotes(text)
849 # Convert to desired output.
850 text = unicode(text, encoding)
851 text = text.encode(output, 'xmlcharrefreplace')
853 # Sanitize?
854 if sanitize:
855 p = _HTMLSanitizer()
856 p.feed(text)
857 text = p.output()
859 # Validate output.
860 if _tidy and validate:
861 text = _tidy(text)
863 return text
866 def sanitize(self, text):
867 """Fix single tags.
869 Fix tags like <img />, <br /> and <hr />.
872 h1. Sanitizing
874 Textile can help you generate valid XHTML(eXtensible HyperText Markup Language).
875 It will fix any single tags that are not properly closed, like
876 @<img />@, @<br />@ and @<hr />@.
878 If you have "mx.Tidy":http://www.egenix.com/files/python/mxTidy.html
879 and/or "&micro;TidyLib":http://utidylib.sourceforge.net/ installed,
880 it also can optionally validade the generated code with these wrappers
881 to ensure 100% valid XHTML(eXtensible HyperText Markup Language).
883 # Fix single tags like <img /> and <br />.
884 text = preg_replace(r'''<(img|br|hr)(.*?)(?:\s*/?\s*)?>''', r'''<\1\2 />''', text)
886 # Remove ampersands.
887 text = preg_replace(r'''&(?!#?[xX]?(?:[0-9a-fA-F]+|\w{1,8});)''', r'''&amp;''', text)
889 return text
892 def split_text(self):
893 """Process the blocks from the text.
895 Split the blocks according to the signatures, join extended
896 blocks and associate each one of them with a function to
897 process them.
900 h1. Blocks
902 Textile process your text by dividing it in blocks. Each block
903 is identified by a signature and separated from other blocks by
904 an empty line.
906 All signatures should end with a period followed by a space. A
907 header @<h1></h1>@ can be done this way:
909 pre. h1. This is a header 1.
911 Blocks may continue for multiple paragraphs of text. If you want
912 a block signature to stay "active", use two periods after the
913 signature instead of one. For example:
915 pre.. bq.. This is paragraph one of a block quote.
917 This is paragraph two of a block quote.
919 =p. Now we're back to a regular paragraph.
921 p. Becomes:
923 pre.. <blockquote>
924 <p>This is paragraph one of a block quote.</p>
926 <p>This is paragraph two of a block quote.</p>
927 </blockquote>
929 <p>Now we&#8217;re back to a regular paragraph.</p>
931 p. The blocks can be customised by adding parameters between the
932 signature and the period. These include:
934 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
935 [ll]:A language identifier (for a "lang" attribute).
936 (class) or (#id) or (class#id):For CSS(Cascading Style Sheets) class and id attributes.
937 &gt;, &lt;, =, &lt;&gt;:Modifier characters for alignment. Right-justification, left-justification, centered, and full-justification. The paragraph will also receive the class names "right", "left", "center" and "justify", respectively.
938 ( (one or more):Adds padding on the left. 1em per "(" character is applied. When combined with the align-left or align-right modifier, it makes the block float.
939 ) (one or more):Adds padding on the right. 1em per ")" character is applied. When combined with the align-left or align-right modifier, it makes the block float.
941 Here's an overloaded example:
943 pre. p(())>(class#id)[en]{color:red}. A simple paragraph.
945 Becomes:
947 pre. <p lang="en" style="color:red;padding-left:2em;padding-right:2em;float:right;" class="class right" id="id">A simple paragraph.</p>
949 # Clear signature.
950 clear_sig = r'''^clear(?P<alignment>[<>])?\.$'''
951 clear = None
953 extending = 0
955 # We capture the \n's because they are important inside "pre..".
956 blocks = re.split(r'''((\n\s*){2,})''', self.text)
957 output = []
958 for block in blocks:
959 # Check for the clear signature.
960 m = re.match(clear_sig, block)
961 if m:
962 clear = m.group('alignment')
963 if clear:
964 clear = {'<': 'clear:left;', '>': 'clear:right;'}[clear]
965 else:
966 clear = 'clear:both;'
968 else:
969 # Check each of the code signatures.
970 for regexp, function in self.signatures:
971 p = re.compile(regexp, (re.VERBOSE | re.DOTALL))
972 m = p.match(block)
973 if m:
974 # Put everything in a dictionary.
975 captures = m.groupdict()
977 # If we are extending a block, we require a dot to
978 # break it, so we can start lines with '#' inside
979 # an extended <pre> without matching an ordered list.
980 if extending and not captures.get('dot', None):
981 output[-1][1]['text'] += block
982 break
983 elif captures.has_key('dot'):
984 del captures['dot']
986 # If a signature matches, we are not extending a block.
987 extending = 0
989 # Check if we should extend this block.
990 if captures.has_key('extend'):
991 extending = captures['extend']
992 del captures['extend']
994 # Apply head_offset.
995 if captures.has_key('header'):
996 captures['header'] = int(captures['header']) + self.head_offset
998 # Apply clear.
999 if clear:
1000 captures['clear'] = clear
1001 clear = None
1003 # Save the block to be processed later.
1004 output.append([function, captures])
1006 break
1008 else:
1009 if extending:
1010 # Append the text to the last block.
1011 output[-1][1]['text'] += block
1012 elif block.strip():
1013 output.append([self.paragraph, {'text': block}])
1015 return output
1018 def parse_params(self, parameters, clear=None, align_type='block'):
1019 """Parse the parameters from a block signature.
1021 This function parses the parameters from a block signature,
1022 splitting the information about class, id, language and
1023 style. The positioning (indentation and alignment) is parsed
1024 and stored in the style.
1026 A paragraph like:
1028 p>(class#id){color:red}[en]. Paragraph.
1032 p{color:red}[en](class#id)>. Paragraph.
1034 will have its parameters parsed to:
1036 output = {'lang' : 'en',
1037 'class': 'class',
1038 'id' : 'id',
1039 'style': 'color:red;text-align:right;'}
1041 Note that order is not important.
1043 if not parameters:
1044 if clear:
1045 return {'style': clear}
1046 else:
1047 return {}
1049 output = {}
1051 # Match class from (class) or (class#id).
1052 m = re.search(r'''\((?P<class>[\w]+(\s[\w]+)*)(\#[\w]+)?\)''', parameters)
1053 if m: output['class'] = m.group('class')
1055 # Match id from (#id) or (class#id).
1056 m = re.search(r'''\([\w]*(\s[\w]+)*\#(?P<id>[\w]+)\)''', parameters)
1057 if m: output['id'] = m.group('id')
1059 # Match [language].
1060 m = re.search(r'''\[(?P<lang>[\w-]+)\]''', parameters)
1061 if m: output['lang'] = m.group('lang')
1063 # Match {style}.
1064 m = re.search(r'''{(?P<style>[^\}]+)}''', parameters)
1065 if m:
1066 output['style'] = m.group('style').replace('\n', '')
1068 # If necessary, apppend a semi-comma to the style.
1069 if not output['style'].endswith(';'):
1070 output['style'] += ';'
1072 # Clear the block?
1073 if clear:
1074 output['style'] = output.get('style', '') + clear
1076 # Remove classes, ids, langs and styles. This makes the
1077 # regular expression for the positioning much easier.
1078 parameters = preg_replace(r'''\([\#\w\s]+\)''', '', parameters)
1079 parameters = preg_replace(r'''\[[\w-]+\]''', '', parameters)
1080 parameters = preg_replace(r'''{[\w:;#%-]+}''', '', parameters)
1082 style = []
1084 # Count the left indentation.
1085 l_indent = parameters.count('(')
1086 if l_indent: style.append('padding-left:%dem;' % l_indent)
1088 # Count the right indentation.
1089 r_indent = parameters.count(')')
1090 if r_indent: style.append('padding-right:%dem;' % r_indent)
1092 # Add alignment.
1093 if align_type == 'image':
1094 align = [('<', 'float:left;', ' left'),
1095 ('>', 'float:right;', ' right')]
1097 valign = [('^', 'vertical-align:text-top;', ' top'),
1098 ('-', 'vertical-align:middle;', ' middle'),
1099 ('~', 'vertical-align:text-bottom;', ' bottom')]
1101 # Images can have both a vertical and a horizontal alignment.
1102 for alignments in [align, valign]:
1103 for _align, _style, _class in alignments:
1104 if parameters.count(_align):
1105 style.append(_style)
1107 # Append a class name related to the alignment.
1108 output['class'] = output.get('class', '') + _class
1109 break
1111 elif align_type == 'table':
1112 align = [('<', 'left'),
1113 ('>', 'right'),
1114 ('=', 'center'),
1115 ('<>', 'justify')]
1117 valign = [('^', 'top'),
1118 ('~', 'bottom')]
1120 # Horizontal alignment.
1121 for _align, _style, in align:
1122 if parameters.count(_align):
1123 output['align'] = _style
1125 # Vertical alignment.
1126 for _align, _style, in valign:
1127 if parameters.count(_align):
1128 output['valign'] = _style
1130 # Colspan and rowspan.
1131 m = re.search(r'''\\(\d+)''', parameters)
1132 if m:
1133 #output['colspan'] = m.groups()
1134 output['colspan'] = int(m.groups()[0])
1136 m = re.search(r'''/(\d+)''', parameters)
1137 if m:
1138 output['rowspan'] = int(m.groups()[0])
1140 else:
1141 if l_indent or r_indent:
1142 alignments = [('<>', 'text-align:justify;', ' justify'),
1143 ('=', 'text-align:center;', ' center'),
1144 ('<', 'float:left;', ' left'),
1145 ('>', 'float:right;', ' right')]
1146 else:
1147 alignments = [('<>', 'text-align:justify;', ' justify'),
1148 ('=', 'text-align:center;', ' center'),
1149 ('<', 'text-align:left;', ' left'),
1150 ('>', 'text-align:right;', ' right')]
1152 for _align, _style, _class in alignments:
1153 if parameters.count(_align):
1154 style.append(_style)
1156 # Append a class name related to the alignment.
1157 output['class'] = output.get('class', '') + _class
1158 break
1160 # Join all the styles.
1161 output['style'] = output.get('style', '') + ''.join(style)
1163 # Remove excess whitespace.
1164 if output.has_key('class'):
1165 output['class'] = output['class'].strip()
1167 return output
1170 def build_open_tag(self, tag, attributes={}, single=0):
1171 """Build the open tag with specified attributes.
1173 This function is used by all block builders to
1174 generate the opening tags with the attributes of
1175 the block.
1177 # Open tag.
1178 open_tag = ['<%s' % tag]
1179 for k,v in attributes.items():
1180 # The ALT attribute can be empty.
1181 if k == 'alt' or v: open_tag.append(' %s="%s"' % (k, v))
1183 if single:
1184 open_tag.append(' /')
1186 # Close tag.
1187 open_tag.append('>')
1189 return ''.join(open_tag)
1192 def paragraph(self, text, parameters=None, attributes=None, clear=None):
1193 """Process a paragraph.
1195 This function processes the paragraphs, enclosing the text in a
1196 <p> tag and breaking lines with <br />. Paragraphs are formatted
1197 with all the inline rules.
1200 h1. Paragraph
1202 This is how you write a paragraph:
1204 pre. p. This is a paragraph, although a short one.
1206 Since the paragraph is the default block, you can safely omit its
1207 signature ([@p@]). Simply write:
1209 pre. This is a paragraph, although a short one.
1211 Text in a paragraph block is wrapped in @<p></p>@ tags, and
1212 newlines receive a <br /> tag. In both cases Textile will process
1213 the text to:
1215 pre. <p>This is a paragraph, although a short one.</p>
1217 Text in a paragraph block is processed with all the inline rules.
1219 # Split the lines.
1220 lines = re.split('\n{2,}', text)
1222 # Get the attributes.
1223 attributes = attributes or self.parse_params(parameters, clear)
1225 output = []
1226 for line in lines:
1227 if line:
1228 # Clean the line.
1229 line = line.strip()
1231 # Build the tag.
1232 open_tag = self.build_open_tag('p', attributes)
1233 close_tag = '</p>'
1235 # Pop the id because it must be unique.
1236 if attributes.has_key('id'): del attributes['id']
1238 # Break lines.
1239 line = preg_replace(r'(<br />|\n)+', '<br />\n', line)
1241 # Remove <br /> from inside broken HTML tags.
1242 line = preg_replace(r'(<[^>]*)<br />\n(.*?>)', r'\1 \2', line)
1244 # Inline formatting.
1245 line = self.inline(line)
1247 output.append(open_tag + line + close_tag)
1249 return '\n\n'.join(output)
1252 def pre(self, text, parameters=None, clear=None):
1253 """Process pre-formatted text.
1255 This function processes pre-formatted text into a <pre> tag.
1256 No HTML is added for the lines, but @<@ and @>@ are translated into
1257 HTML entities.
1260 h1. Pre-formatted text
1262 Pre-formatted text can be specified using the @pre@ signature.
1263 Inside a "pre" block, whitespace is preserved and @<@ and @>@ are
1264 translated into HTML(HyperText Markup Language) entities
1265 automatically.
1267 Text in a "pre" block is _not processed_ with any inline rule.
1269 Here's a simple example:
1271 pre. pre. This text is pre-formatted.
1272 Nothing interesting happens inside here...
1274 Will become:
1276 pre. <pre>
1277 This text is pre-formatted.
1278 Nothing interesting happens inside here...
1279 </pre>
1282 # Remove trailing whitespace.
1283 text = text.rstrip()
1285 # Get the attributes.
1286 attributes = self.parse_params(parameters, clear)
1288 # Build the tag.
1289 #open_tag = self.build_open_tag('pre', attributes) + '\n'
1290 open_tag = self.build_open_tag('pre', attributes)
1291 close_tag = '\n</pre>'
1293 # Replace < and >.
1294 text = text.replace('<', '&lt;')
1295 text = text.replace('>', '&gt;')
1297 return open_tag + text + close_tag
1300 def bc(self, text, parameters=None, clear=None):
1301 """Process block code.
1303 This function processes block code into a <code> tag inside a
1304 <pre>. No HTML is added for the lines, but @<@ and @>@ are translated
1305 into HTML entities.
1308 h1. Block code
1310 A block code, specified by the @bc@ signature, is a block of
1311 pre-formatted text which also receives a @<code></code>@ tag. As
1312 with "pre", whitespace is preserved and @<@ and @>@ are translated
1313 into HTML(HyperText Markup Language) entities automatically.
1315 Text in a "bc" code is _not processed_ with the inline rules.
1317 If you have "Twisted":http://www.twistedmatrix.com/ installed,
1318 Textile can automatically colorize your Python code if you
1319 specify its language as "Python":
1321 pre. bc[python]. from twisted.python import htmlizer
1323 This will become:
1325 pre. <pre>
1326 <code lang="python">
1327 <span class="py-src-keyword">from</span> <span class="py-src-variable">twisted</span><span class="py-src-op">.</span><span class="py-src-variable">python</span> <span class="py-src-keyword">import</span> <span class="py-src-variable">htmlizer</span>
1328 </code>
1329 </pre>
1331 The colors can be specified in your CSS(Cascading Style Sheets)
1332 file. If you don't want to install Twisted, you can download just
1333 the @htmlizer@ module "independently":http://dealmeida.net/code/htmlizer.py.txt.
1336 # Get the attributes.
1337 attributes = self.parse_params(parameters, clear)
1339 # XHTML <code> can't have the attribute lang.
1340 if attributes.has_key('lang'):
1341 lang = attributes['lang']
1342 del attributes['lang']
1343 else:
1344 lang = None
1346 # Build the tag.
1347 open_tag = '<pre>\n' + self.build_open_tag('code', attributes) + '\n'
1348 close_tag = '\n</code>\n</pre>'
1350 # Colorize Python code?
1351 if htmlizer and lang == 'python':
1352 text = _color(text)
1353 else:
1354 # Replace < and >.
1355 text = text.replace('<', '&lt;')
1356 text = text.replace('>', '&gt;')
1358 return open_tag + text + close_tag
1361 def dl(self, text, parameters=None, clear=None):
1362 """Process definition list.
1364 This function process definition lists. The text inside
1365 the <dt> and <dd> tags is processed for inline formatting.
1368 h1. Definition list
1370 A definition list starts with the signature @dl@, and has
1371 its items separated by a @:@. Here's a simple example:
1373 pre. dl. name:Sir Lancelot of Camelot.
1374 quest:To seek the Holy Grail.
1375 color:Blue.
1377 Becomes:
1379 pre. <dl>
1380 <dt>name</dt>
1381 <dd>Sir Lancelot of Camelot.</dd>
1382 <dt>quest</dt>
1383 <dd>To seek the Holy Grail.</dd>
1384 <dt>color</dt>
1385 <dd>Blue.</dd>
1386 </dl>
1388 # Get the attributes.
1389 attributes = self.parse_params(parameters, clear)
1391 # Build the tag.
1392 open_tag = self.build_open_tag('dl', attributes) + '\n'
1393 close_tag = '\n</dl>'
1395 lines = text.split('\n')
1396 output = []
1397 for line in lines:
1398 if line.count(':'):
1399 [dt, dd] = line.split(':', 1)
1400 else:
1401 dt,dd = line, ''
1403 if dt: output.append('<dt>%s</dt>\n<dd>%s</dd>' % (dt, dd))
1405 text = '\n'.join(output)
1407 text = self.inline(text)
1409 return open_tag + text + close_tag
1412 def blockquote(self, text, parameters=None, cite=None, clear=None):
1413 """Process block quote.
1415 The block quote is inserted into a <blockquote> tag, and
1416 processed as a paragraph. An optional cite attribute can
1417 be appended on the last line after two dashes (--), or
1418 after the period following ':' for compatibility with the
1419 Perl version.
1422 h1. Blockquote
1424 A blockquote is denoted by the signature @bq@. The text in this
1425 block will be enclosed in @<blockquote></blockquote>@ and @<p></p>@,
1426 receiving the same formatting as a paragraph. For example:
1428 pre. bq. This is a blockquote.
1430 Becomes:
1432 pre. <blockquote>
1433 <p>This is a blockquote.</p>
1434 </blockquote>
1436 You can optionally specify the @cite@ attribute of the blockquote,
1437 using the following syntax:
1439 pre. bq.:http://example.com Some text.
1441 pre. bq.:"John Doe" Some other text.
1443 Becomes:
1445 pre. <blockquote cite="http://example.com">
1446 <p>Some text.</p>
1447 </blockquote>
1449 pre. <blockquote cite="John Doe">
1450 <p>Some other text.</p>
1451 </blockquote>
1453 You can also specify the @cite@ using a pair of dashes on the
1454 last line of the blockquote:
1456 pre. bq. Some text.
1457 -- http://example.com
1460 # Get the attributes.
1461 attributes = self.parse_params(parameters, clear)
1463 if cite:
1464 # Remove the quotes?
1465 cite = cite.strip('"')
1466 attributes['cite'] = cite
1467 else:
1468 # The citation should be on the last line.
1469 text = text.split('\n')
1470 if text[-1].startswith('-- '):
1471 attributes['cite'] = text.pop()[3:]
1473 text = '\n'.join(text)
1475 # Build the tag.
1476 open_tag = self.build_open_tag('blockquote', attributes) + '\n'
1477 close_tag = '\n</blockquote>'
1479 # Process the paragraph, passing the attributes.
1480 # Does it make sense to pass the id, class, etc. to
1481 # the paragraph instead of applying it to the
1482 # blockquote tag?
1483 text = self.paragraph(text)
1485 return open_tag + text + close_tag
1488 def header(self, text, parameters=None, header=1, clear=None):
1489 """Process a header.
1491 The header number is captured by the regular
1492 expression and lives in header. If head_offset is
1493 set, it is adjusted accordingly.
1496 h1. Header
1498 A header is produced by the signature @hn@, where @n@ goes
1499 from 1 to 6. You can adjust the relative output of the headers
1500 passing a @head_offset@ attribute when calling @textile()@.
1502 To make a header:
1504 pre. h1. This is a header.
1506 Becomes:
1508 pre. <h1>This is a header.</h1>
1510 # Get the attributes.
1511 attributes = self.parse_params(parameters, clear)
1513 # Get the header number and limit it between 1 and 6.
1514 n = header
1515 n = min(n,6)
1516 n = max(n,1)
1518 # Build the tag.
1519 open_tag = self.build_open_tag('h%d' % n, attributes)
1520 close_tag = '</h%d>' % n
1522 text = self.inline(text)
1524 return open_tag + text + close_tag
1527 def footnote(self, text, parameters=None, footnote=1, clear=None):
1528 """Process a footnote.
1530 A footnote is formatted as a paragraph of class
1531 'footnote' and id 'fn%d', starting with the footnote
1532 number in a <sup> tag. Here we just build the
1533 attributes and pass them directly to self.paragraph().
1536 h1. Footnote
1538 A footnote is produced by the signature @fn@ followed by
1539 a number. Footnotes are paragraphs of a special CSS(Cascading Style Sheets)
1540 class. An example:
1542 pre. fn1. This is footnote number one.
1544 Will produce this:
1546 pre. <p class="footnote" id="fn1"><sup>1</sup> This is footnote number one.</p>
1548 This footnote can be referenced anywhere on the text by the
1549 following way:
1551 pre. This is a reference[1] to footnote number one.
1553 Which becomes:
1555 pre. <p>This is a reference<sup class="footnote"><a href="#fn1" title="This is footnote number one.">1</a></sup> to footnote number 1.</p>
1557 Note that the text from the footnote appears in the @title@ of the
1558 link pointing to it.
1560 # Get the number.
1561 n = int(footnote)
1563 # Build the attributes to the paragraph.
1564 attributes = self.parse_params(parameters, clear)
1565 attributes['class'] = 'footnote'
1566 attributes['id'] = 'fn%d' % n
1568 # Build the paragraph text.
1569 text = ('<sup>%d</sup> ' % n) + text
1571 # And return the paragraph.
1572 return self.paragraph(text=text, attributes=attributes)
1575 def build_li(self, items, liattributes):
1576 """Build the list item.
1578 This function build the list item of an (un)ordered list. It
1579 works by peeking at the next list item, and searching for a
1580 multi-list. If a multi-list is found, it is processed and
1581 appended inside the list item tags, as it should be.
1583 lines = []
1584 while len(items):
1585 item = items.pop(0)
1587 # Clean the line.
1588 item = item.lstrip()
1589 item = item.replace('\n', '<br />\n')
1591 # Get list item attributes.
1592 p = re.compile(r'''^%(liattr)s\s''' % self.res, re.VERBOSE)
1593 m = p.match(item)
1594 if m:
1595 c = m.groupdict('')
1596 liparameters = c['liparameters']
1597 item = p.sub('', item)
1598 else:
1599 liparameters = ''
1601 liattributes = liattributes or self.parse_params(liparameters)
1603 # Build the item tag.
1604 open_tag_li = self.build_open_tag('li', liattributes)
1606 # Reset the attributes, which should be applied
1607 # only to the first <li>.
1608 liattributes = {}
1610 # Build the closing tag.
1611 close_tag_li = '</li>'
1613 # Multi-list recursive routine.
1614 # Here we check the _next_ items for a multi-list. If we
1615 # find one, we extract all items of the multi-list and
1616 # process them recursively.
1617 if len(items):
1618 inlist = []
1620 # Grab all the items that start with # or *.
1621 n_item = items.pop(0)
1623 # Grab the <ol> parameters.
1624 p = re.compile(r'''^%(olattr)s''' % self.res, re.VERBOSE)
1625 m = p.match(n_item)
1626 if m:
1627 c = m.groupdict('')
1628 olparameters = c['olparameters']
1629 tmp = p.sub('', n_item)
1630 else:
1631 olparameters = ''
1633 # Check for an ordered list inside this one.
1634 if tmp.startswith('#'):
1635 n_item = tmp
1636 inlist.append(n_item)
1637 while len(items):
1638 # Peek into the next item.
1639 n_item = items.pop(0)
1640 if n_item.startswith('#'):
1641 inlist.append(n_item)
1642 else:
1643 items.insert(0, n_item)
1644 break
1646 inlist = self.ol('\n'.join(inlist), olparameters=olparameters)
1647 item = item + '\n' + inlist + '\n'
1649 # Check for an unordered list inside this one.
1650 elif tmp.startswith('*'):
1651 n_item = tmp
1652 inlist.append(n_item)
1653 while len(items):
1654 # Peek into the next item.
1655 n_item = items.pop(0)
1656 if n_item.startswith('*'):
1657 inlist.append(n_item)
1658 else:
1659 items.insert(0, n_item)
1660 break
1662 inlist = self.ul('\n'.join(inlist), olparameters=olparameters)
1663 item = item + '\n' + inlist + '\n'
1665 # Otherwise we just put it back in the list.
1666 else:
1667 items.insert(0, n_item)
1669 item = self.inline(item)
1671 item = open_tag_li + item + close_tag_li
1672 lines.append(item)
1674 return '\n'.join(lines)
1677 def ol(self, text, liparameters=None, olparameters=None, clear=None):
1678 """Build an ordered list.
1680 This function basically just sets the <ol></ol> with the
1681 right attributes, and then pass everything inside to
1682 _build_li, which does the real tough recursive job.
1685 h1. Ordered lists
1687 Ordered lists can be constructed this way:
1689 pre. # Item number 1.
1690 # Item number 2.
1691 # Item number 3.
1693 And you get:
1695 pre. <ol>
1696 <li>Item number 1.</li>
1697 <li>Item number 2.</li>
1698 <li>Item number 3.</li>
1699 </ol>
1701 If you want a list to "break" an extended block, you should
1702 add a period after the hash. This is useful for writing
1703 Python code:
1705 pre.. bc[python].. #!/usr/bin/env python
1707 # This is a comment, not an ordered list!
1708 # So this won't break the extended "bc".
1710 p. Lists can be nested:
1712 pre. # Item number 1.
1713 ## Item number 1a.
1714 ## Item number 1b.
1715 # Item number 2.
1716 ## Item number 2a.
1718 Textile will transform this to:
1720 pre. <ol>
1721 <li>Item number 1.
1722 <ol>
1723 <li>Item number 1a.</li>
1724 <li>Item number 1b.</li>
1725 </ol>
1726 </li>
1727 <li>Item number 2.
1728 <ol>
1729 <li>Item number 2a.</li>
1730 </ol>
1731 </li>
1732 </ol>
1734 You can also mix ordered and unordered lists:
1736 pre. * To write well you need:
1737 *# to read every day
1738 *# to write every day
1739 *# and X
1741 You'll get this:
1743 pre. <ul>
1744 <li>To write well you need:
1745 <ol>
1746 <li>to read every day</li>
1747 <li>to write every day</li>
1748 <li>and X</li>
1749 </ol>
1750 </li>
1751 </ul>
1753 To style a list, the parameters should go before the hash if you want
1754 to set the attributes on the @<ol>@ tag:
1756 pre. (class#id)# one
1757 # two
1758 # three
1760 If you want to customize the firsr @<li>@ tag, apply the parameters
1761 after the hash:
1763 pre. #(class#id) one
1764 # two
1765 # three
1767 # Get the attributes.
1768 olattributes = self.parse_params(olparameters, clear)
1769 liattributes = self.parse_params(liparameters)
1771 # Remove list depth.
1772 if text.startswith('#'):
1773 text = text[1:]
1775 items = text.split('\n#')
1777 # Build the open tag.
1778 open_tag = self.build_open_tag('ol', olattributes) + '\n'
1780 close_tag = '\n</ol>'
1782 # Build the list items.
1783 text = self.build_li(items, liattributes)
1785 return open_tag + text + close_tag
1788 def ul(self, text, liparameters=None, olparameters=None, clear=None):
1789 """Build an unordered list.
1791 This function basically just sets the <ul></ul> with the
1792 right attributes, and then pass everything inside to
1793 _build_li, which does the real tough recursive job.
1796 h1. Unordered lists
1798 Unordered lists behave exactly like the ordered lists, and are
1799 defined using a star:
1801 pre. * Python
1802 * Perl
1803 * PHP
1805 Becomes:
1807 pre. <ul>
1808 <li>Python</li>
1809 <li>Perl</li>
1810 <li><span class="caps">PHP</span></li>
1811 </ul>
1813 # Get the attributes.
1814 olattributes = self.parse_params(olparameters, clear)
1815 liattributes = self.parse_params(liparameters)
1817 # Remove list depth.
1818 if text.startswith('*'):
1819 text = text[1:]
1821 items = text.split('\n*')
1823 # Build the open tag.
1824 open_tag = self.build_open_tag('ul', olattributes) + '\n'
1826 close_tag = '\n</ul>'
1828 # Build the list items.
1829 text = self.build_li(items, liattributes)
1831 return open_tag + text + close_tag
1834 def table(self, text, parameters=None, clear=None):
1835 """Build a table.
1837 To build a table we split the text in lines to get the
1838 rows, and split the rows between '|' to get the individual
1839 cells.
1842 h1. Tables
1844 Making a simple table is as easy as possible:
1846 pre. |a|b|c|
1847 |1|2|3|
1849 Will be processed into:
1851 pre. <table>
1852 <tr>
1853 <td>a</td>
1854 <td>b</td>
1855 <td>c</td>
1856 </tr>
1857 <tr>
1858 <td>1</td>
1859 <td>2</td>
1860 <td>3</td>
1861 </tr>
1862 </table>
1864 If you want to customize the @<table>@ tag, you must use the
1865 @table@ signature:
1867 pre. table(class#id)[en]. |a|b|c|
1868 |1|2|3|
1870 To customize a row, apply the modifier _before_ the first @|@:
1872 pre. table. (class)<>|a|b|c|
1873 |1|2|3|
1875 Individual cells can by customized by adding the parameters _after_
1876 the @|@, proceded by a period and a space:
1878 pre. |(#id). a|b|c|
1879 |1|2|3|
1881 The allowed modifiers are:
1883 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
1884 (class) or (#id) or (class#id):A CSS(Cascading Style Sheets) class and/or id attribute.
1885 ( (one or more):Adds 1em of padding to the left for each '(' character.
1886 ) (one or more):Adds 1em of padding to the right for each ')' character.
1887 &lt;:Aligns to the left (floats to left for tables if combined with the ')' modifier).
1888 &gt;:Aligns to the right (floats to right for tables if combined with the '(' modifier).
1889 =:Aligns to center (sets left, right margins to 'auto' for tables).
1890 &lt;&gt;:For cells only. Justifies text.
1891 ^:For rows and cells only. Aligns to the top.
1892 ~ (tilde):For rows and cells only. Aligns to the bottom.
1893 _ (underscore):Can be applied to a table row or cell to indicate a header row or cell.
1894 \\2 or \\3 or \\4, etc.:Used within cells to indicate a colspan of 2, 3, 4, etc. columns. When you see "\\", think "push forward".
1895 /2 or /3 or /4, etc.:Used within cells to indicate a rowspan of 2, 3, 4, etc. rows. When you see "/", think "push downward".
1897 When a cell is identified as a header cell and an alignment is
1898 specified, that becomes the default alignment for cells below it.
1899 You can always override this behavior by specifying an alignment
1900 for one of the lower cells.
1902 attributes = self.parse_params(parameters, clear, align_type='table')
1903 #attributes['cellspacing'] = '0'
1905 # Build the <table>.
1906 open_tag = self.build_open_tag('table', attributes) + '\n'
1907 close_tag = '</table>'
1909 output = []
1910 default_align = {}
1911 rows = re.split(r'''\n+''', text)
1912 for row in rows:
1913 # Get the columns.
1914 columns = row.split('|')
1916 # Build the <tr>.
1917 parameters = columns.pop(0)
1919 rowattr = self.parse_params(parameters, align_type='table')
1920 open_tr = self.build_open_tag('tr', rowattr) + '\n'
1921 output.append(open_tr)
1923 # Does the row define headers?
1924 if parameters.count('_'):
1925 td_tag = 'th'
1926 else:
1927 td_tag = 'td'
1929 col = 0
1930 for cell in columns[:-1]:
1931 p = re.compile(r'''(?:%(tattr)s\.\s)?(?P<text>.*)''' % self.res, re.VERBOSE)
1932 m = p.match(cell)
1933 if m:
1934 c = m.groupdict('')
1935 cellattr = self.parse_params(c['parameters'], align_type='table')
1937 # Get the width of this cell.
1938 width = cellattr.get('colspan', 1)
1940 # Is this a header?
1941 if c['parameters'].count('_'):
1942 td_tag = 'th'
1944 # If it is a header, let's set the default alignment.
1945 if td_tag == 'th':
1946 # Set the default aligment for all cells below this one.
1947 # This is a little tricky because this header can have
1948 # a colspan set.
1949 for i in range(col, col+width):
1950 default_align[i] = cellattr.get('align', None)
1952 else:
1953 # Apply the default align, if any.
1954 cellattr['align'] = cellattr.get('align', default_align.get(col, None))
1956 open_td = self.build_open_tag(td_tag, cellattr)
1957 close_td = '</%s>\n' % td_tag
1959 #output.append(open_td + c['text'].strip() + close_td)
1960 output.append(open_td + self.inline(c['text'].strip()) + close_td)
1962 col += width
1964 output.append('</tr>\n')
1966 text = open_tag + ''.join(output) + close_tag
1968 return text
1971 def escape(self, text):
1972 """Do nothing.
1974 This is used to match escaped text. Nothing to see here!
1977 h1. Escaping
1979 If you don't want Textile processing a block, you can simply
1980 enclose it inside @==@:
1982 pre. p. Regular paragraph
1984 pre. ==
1985 Escaped portion -- will not be formatted
1986 by Textile at all
1989 pre. p. Back to normal.
1991 This can also be used inline, disabling the formatting temporarily:
1993 pre. p. This is ==*a test*== of escaping.
1995 return text
1998 def itex(self, text):
1999 """Convert itex to MathML.
2001 If the itex2mml binary is set, we use it to convert the
2002 itex to MathML. Otherwise, the text is unprocessed and
2003 return as is.
2006 h1. itex
2008 Textile can automatically convert itex code to MathML(Mathematical Markup Language)
2009 for you, if you have the itex2MML binary (you can download it
2010 from the "Movable Type plugin":http://golem.ph.utexas.edu/~distler/blog/files/itexToMML.tar.gz).
2012 Block equations should be enclosed inbetween @\[@ and @\]@:
2014 pre. \[ e^{i\pi} + 1 = 0 \]
2016 Will be translated to:
2018 pre. <math xmlns='http://www.w3.org/1998/Math/MathML' mode='display'>
2019 <msup><mi>e</mi> <mrow><mi>i</mi>
2020 <mi>&amp;pi;</mi></mrow></msup>
2021 <mo>+</mo><mn>1</mn><mo>=</mo><mn>0</mn>
2022 </math>
2024 Equations can also be displayed inline:
2026 pre. Euler's formula, $e^{i\pi}+1=0$, ...
2028 (Note that if you want to display MathML(Mathematical Markup Language)
2029 your content must be served as @application/xhtml+xml@, which is not
2030 accepted by all browsers.)
2032 if itex2mml:
2033 try:
2034 text = os.popen("echo '%s' | %s" % (text, itex2mml)).read()
2035 except:
2036 pass
2038 return text
2041 def about(self, text=None):
2042 """Show PyTextile's functionalities.
2044 An introduction to PyTextile. Can be called when running the
2045 main script or if you write the following line:
2047 'tell me about textile.'
2049 But keep it a secret!
2052 about = []
2053 about.append(textile('h1. This is Textile', head_offset=self.head_offset))
2054 about.append(textile(__doc__.split('---', 1)[1], head_offset=self.head_offset))
2056 functions = [(self.split_text, 1),
2057 (self.paragraph, 2),
2058 (self.pre, 2),
2059 (self.bc, 2),
2060 (self.blockquote, 2),
2061 (self.dl, 2),
2062 (self.header, 2),
2063 (self.footnote, 2),
2064 (self.escape, 2),
2065 (self.itex, 2),
2066 (self.ol, 2),
2067 (self.ul, 2),
2068 (self.table, 2),
2069 (self.inline, 1),
2070 (self.qtags, 2),
2071 (self.glyphs, 2),
2072 (self.macros, 2),
2073 (self.acronym, 2),
2074 (self.images, 1),
2075 (self.links, 1),
2076 (self.sanitize, 1),
2079 for function, offset in functions:
2080 doc = function.__doc__.split('---', 1)[1]
2081 doc = doc.split('\n')
2082 lines = []
2083 for line in doc:
2084 line = line.strip()
2085 lines.append(line)
2087 doc = '\n'.join(lines)
2088 about.append(textile(doc, head_offset=self.head_offset+offset))
2090 about = '\n'.join(about)
2091 about = about.replace('<br />', '')
2093 return about
2096 def acronym(self, text):
2097 """Process acronyms.
2099 Acronyms can have letters in upper and lower caps, or even numbers,
2100 provided that the numbers and upper caps are the same in the
2101 abbreviation and in the description. For example:
2103 XHTML(eXtensible HyperText Markup Language)
2104 OPeNDAP(Open source Project for a Network Data Access Protocol)
2105 L94(Levitus 94)
2107 are all valid acronyms.
2110 h1. Acronyms
2112 You can define acronyms in your text the following way:
2114 pre. This is XHTML(eXtensible HyperText Markup Language).
2116 The resulting code is:
2118 pre. <p><acronym title="eXtensible HyperText Markup Language"><span class="caps">XHTML</span></acronym></p>
2120 Acronyms can have letters in upper and lower caps, or even numbers,
2121 provided that the numbers and upper caps are the same in the
2122 abbreviation and in the description. For example:
2124 pre. XHTML(eXtensible HyperText Markup Language)
2125 OPeNDAP(Open source Project for a Network Data Access Protocol)
2126 L94(Levitus 94)
2128 are all valid acronyms.
2130 # Find the acronyms.
2131 acronyms = r'''(?P<acronym>[\w]+)\((?P<definition>[^\(\)]+?)\)'''
2133 # Check all acronyms.
2134 for acronym, definition in re.findall(acronyms, text):
2135 caps_acronym = ''.join(re.findall('[A-Z\d]+', acronym))
2136 caps_definition = ''.join(re.findall('[A-Z\d]+', definition))
2137 if caps_acronym and caps_acronym == caps_definition:
2138 text = text.replace('%s(%s)' % (acronym, definition), '<acronym title="%s">%s</acronym>' % (definition, acronym))
2140 text = html_replace(r'''(^|\s)([A-Z]{3,})\b(?!\()''', r'''\1<span class="caps">\2</span>''', text)
2142 return text
2145 def footnotes(self, text):
2146 """Add titles to footnotes references.
2148 This function searches for footnotes references like this [1], and
2149 adds a title to the link containing the first paragraph of the
2150 footnote.
2152 # Search for footnotes.
2153 p = re.compile(r'''<p class="footnote" id="fn(?P<n>\d+)"><sup>(?P=n)</sup>(?P<note>.*)</p>''')
2154 for m in p.finditer(text):
2155 n = m.group('n')
2156 note = m.group('note').strip()
2158 # Strip HTML from note.
2159 note = re.sub('<.*?>', '', note)
2161 # Add the title.
2162 text = text.replace('<a href="#fn%s">' % n, '<a href="#fn%s" title="%s">' % (n, note))
2164 return text
2167 def macros(self, m):
2168 """Quick macros.
2170 This function replaces macros inside brackets using a built-in
2171 dictionary, and also unicode names if the key doesn't exist.
2174 h1. Macros
2176 Textile has support for character macros, which should be enclosed
2177 in curly braces. A few useful ones are:
2179 pre. {C=} or {=C}: euro sign
2180 {+-} or {-+}: plus-minus sign
2181 {L-} or {-L}: pound sign.
2183 You can also make accented characters:
2185 pre. Expos{e'}
2187 Becomes:
2189 pre. <p>Expos&amp;#233;</p>
2191 You can also specify Unicode names like:
2193 pre. {umbrella}
2194 {white smiling face}
2196 entity = m.group(1)
2198 macros = {'c|': '&#162;', # cent sign
2199 '|c': '&#162;', # cent sign
2200 'L-': '&#163;', # pound sign
2201 '-L': '&#163;', # pound sign
2202 'Y=': '&#165;', # yen sign
2203 '=Y': '&#165;', # yen sign
2204 '(c)': '&#169;', # copyright sign
2205 '<<': '&#171;', # left-pointing double angle quotation
2206 '(r)': '&#174;', # registered sign
2207 '+_': '&#177;', # plus-minus sign
2208 '_+': '&#177;', # plus-minus sign
2209 '>>': '&#187;', # right-pointing double angle quotation
2210 '1/4': '&#188;', # vulgar fraction one quarter
2211 '1/2': '&#189;', # vulgar fraction one half
2212 '3/4': '&#190;', # vulgar fraction three quarters
2213 'A`': '&#192;', # latin capital letter a with grave
2214 '`A': '&#192;', # latin capital letter a with grave
2215 'A\'': '&#193;', # latin capital letter a with acute
2216 '\'A': '&#193;', # latin capital letter a with acute
2217 'A^': '&#194;', # latin capital letter a with circumflex
2218 '^A': '&#194;', # latin capital letter a with circumflex
2219 'A~': '&#195;', # latin capital letter a with tilde
2220 '~A': '&#195;', # latin capital letter a with tilde
2221 'A"': '&#196;', # latin capital letter a with diaeresis
2222 '"A': '&#196;', # latin capital letter a with diaeresis
2223 'Ao': '&#197;', # latin capital letter a with ring above
2224 'oA': '&#197;', # latin capital letter a with ring above
2225 'AE': '&#198;', # latin capital letter ae
2226 'C,': '&#199;', # latin capital letter c with cedilla
2227 ',C': '&#199;', # latin capital letter c with cedilla
2228 'E`': '&#200;', # latin capital letter e with grave
2229 '`E': '&#200;', # latin capital letter e with grave
2230 'E\'': '&#201;', # latin capital letter e with acute
2231 '\'E': '&#201;', # latin capital letter e with acute
2232 'E^': '&#202;', # latin capital letter e with circumflex
2233 '^E': '&#202;', # latin capital letter e with circumflex
2234 'E"': '&#203;', # latin capital letter e with diaeresis
2235 '"E': '&#203;', # latin capital letter e with diaeresis
2236 'I`': '&#204;', # latin capital letter i with grave
2237 '`I': '&#204;', # latin capital letter i with grave
2238 'I\'': '&#205;', # latin capital letter i with acute
2239 '\'I': '&#205;', # latin capital letter i with acute
2240 'I^': '&#206;', # latin capital letter i with circumflex
2241 '^I': '&#206;', # latin capital letter i with circumflex
2242 'I"': '&#207;', # latin capital letter i with diaeresis
2243 '"I': '&#207;', # latin capital letter i with diaeresis
2244 'D-': '&#208;', # latin capital letter eth
2245 '-D': '&#208;', # latin capital letter eth
2246 'N~': '&#209;', # latin capital letter n with tilde
2247 '~N': '&#209;', # latin capital letter n with tilde
2248 'O`': '&#210;', # latin capital letter o with grave
2249 '`O': '&#210;', # latin capital letter o with grave
2250 'O\'': '&#211;', # latin capital letter o with acute
2251 '\'O': '&#211;', # latin capital letter o with acute
2252 'O^': '&#212;', # latin capital letter o with circumflex
2253 '^O': '&#212;', # latin capital letter o with circumflex
2254 'O~': '&#213;', # latin capital letter o with tilde
2255 '~O': '&#213;', # latin capital letter o with tilde
2256 'O"': '&#214;', # latin capital letter o with diaeresis
2257 '"O': '&#214;', # latin capital letter o with diaeresis
2258 'O/': '&#216;', # latin capital letter o with stroke
2259 '/O': '&#216;', # latin capital letter o with stroke
2260 'U`': '&#217;', # latin capital letter u with grave
2261 '`U': '&#217;', # latin capital letter u with grave
2262 'U\'': '&#218;', # latin capital letter u with acute
2263 '\'U': '&#218;', # latin capital letter u with acute
2264 'U^': '&#219;', # latin capital letter u with circumflex
2265 '^U': '&#219;', # latin capital letter u with circumflex
2266 'U"': '&#220;', # latin capital letter u with diaeresis
2267 '"U': '&#220;', # latin capital letter u with diaeresis
2268 'Y\'': '&#221;', # latin capital letter y with acute
2269 '\'Y': '&#221;', # latin capital letter y with acute
2270 'a`': '&#224;', # latin small letter a with grave
2271 '`a': '&#224;', # latin small letter a with grave
2272 'a\'': '&#225;', # latin small letter a with acute
2273 '\'a': '&#225;', # latin small letter a with acute
2274 'a^': '&#226;', # latin small letter a with circumflex
2275 '^a': '&#226;', # latin small letter a with circumflex
2276 'a~': '&#227;', # latin small letter a with tilde
2277 '~a': '&#227;', # latin small letter a with tilde
2278 'a"': '&#228;', # latin small letter a with diaeresis
2279 '"a': '&#228;', # latin small letter a with diaeresis
2280 'ao': '&#229;', # latin small letter a with ring above
2281 'oa': '&#229;', # latin small letter a with ring above
2282 'ae': '&#230;', # latin small letter ae
2283 'c,': '&#231;', # latin small letter c with cedilla
2284 ',c': '&#231;', # latin small letter c with cedilla
2285 'e`': '&#232;', # latin small letter e with grave
2286 '`e': '&#232;', # latin small letter e with grave
2287 'e\'': '&#233;', # latin small letter e with acute
2288 '\'e': '&#233;', # latin small letter e with acute
2289 'e^': '&#234;', # latin small letter e with circumflex
2290 '^e': '&#234;', # latin small letter e with circumflex
2291 'e"': '&#235;', # latin small letter e with diaeresis
2292 '"e': '&#235;', # latin small letter e with diaeresis
2293 'i`': '&#236;', # latin small letter i with grave
2294 '`i': '&#236;', # latin small letter i with grave
2295 'i\'': '&#237;', # latin small letter i with acute
2296 '\'i': '&#237;', # latin small letter i with acute
2297 'i^': '&#238;', # latin small letter i with circumflex
2298 '^i': '&#238;', # latin small letter i with circumflex
2299 'i"': '&#239;', # latin small letter i with diaeresis
2300 '"i': '&#239;', # latin small letter i with diaeresis
2301 'n~': '&#241;', # latin small letter n with tilde
2302 '~n': '&#241;', # latin small letter n with tilde
2303 'o`': '&#242;', # latin small letter o with grave
2304 '`o': '&#242;', # latin small letter o with grave
2305 'o\'': '&#243;', # latin small letter o with acute
2306 '\'o': '&#243;', # latin small letter o with acute
2307 'o^': '&#244;', # latin small letter o with circumflex
2308 '^o': '&#244;', # latin small letter o with circumflex
2309 'o~': '&#245;', # latin small letter o with tilde
2310 '~o': '&#245;', # latin small letter o with tilde
2311 'o"': '&#246;', # latin small letter o with diaeresis
2312 '"o': '&#246;', # latin small letter o with diaeresis
2313 ':-': '&#247;', # division sign
2314 '-:': '&#247;', # division sign
2315 'o/': '&#248;', # latin small letter o with stroke
2316 '/o': '&#248;', # latin small letter o with stroke
2317 'u`': '&#249;', # latin small letter u with grave
2318 '`u': '&#249;', # latin small letter u with grave
2319 'u\'': '&#250;', # latin small letter u with acute
2320 '\'u': '&#250;', # latin small letter u with acute
2321 'u^': '&#251;', # latin small letter u with circumflex
2322 '^u': '&#251;', # latin small letter u with circumflex
2323 'u"': '&#252;', # latin small letter u with diaeresis
2324 '"u': '&#252;', # latin small letter u with diaeresis
2325 'y\'': '&#253;', # latin small letter y with acute
2326 '\'y': '&#253;', # latin small letter y with acute
2327 'y"': '&#255', # latin small letter y with diaeresis
2328 '"y': '&#255', # latin small letter y with diaeresis
2329 'OE': '&#338;', # latin capital ligature oe
2330 'oe': '&#339;', # latin small ligature oe
2331 '*': '&#8226;', # bullet
2332 'Fr': '&#8355;', # french franc sign
2333 'L=': '&#8356;', # lira sign
2334 '=L': '&#8356;', # lira sign
2335 'Rs': '&#8360;', # rupee sign
2336 'C=': '&#8364;', # euro sign
2337 '=C': '&#8364;', # euro sign
2338 'tm': '&#8482;', # trade mark sign
2339 '<-': '&#8592;', # leftwards arrow
2340 '->': '&#8594;', # rightwards arrow
2341 '<=': '&#8656;', # leftwards double arrow
2342 '=>': '&#8658;', # rightwards double arrow
2343 '=/': '&#8800;', # not equal to
2344 '/=': '&#8800;', # not equal to
2345 '<_': '&#8804;', # less-than or equal to
2346 '_<': '&#8804;', # less-than or equal to
2347 '>_': '&#8805;', # greater-than or equal to
2348 '_>': '&#8805;', # greater-than or equal to
2349 ':(': '&#9785;', # white frowning face
2350 ':)': '&#9786;', # white smiling face
2351 'spade': '&#9824;', # black spade suit
2352 'club': '&#9827;', # black club suit
2353 'heart': '&#9829;', # black heart suit
2354 'diamond': '&#9830;', # black diamond suit
2357 try:
2358 # Try the key.
2359 entity = macros[entity]
2360 except KeyError:
2361 try:
2362 # Try a unicode entity.
2363 entity = unicodedata.lookup(entity)
2364 entity = entity.encode('ascii', 'xmlcharrefreplace')
2365 except:
2366 # Return the unmodified entity.
2367 entity = '{%s}' % entity
2369 return entity
2372 def glyphs(self, text):
2373 """Glyph formatting.
2375 This function replaces quotations marks, dashes and a few other
2376 symbol for numerical entities. The em/en dashes use definitions
2377 comes from http://alistapart.com/articles/emen/.
2380 h1. Glyphs
2382 Textile replaces some of the characters in your text with their
2383 equivalent numerical entities. These include:
2385 * Replace single and double primes used as quotation marks with HTML(HyperText Markup Language) entities for opening and closing quotation marks in readable text, while leaving untouched the primes required within HTML(HyperText Markup Language) tags.
2386 * Replace double hyphens (==--==) with an em-dash (&#8212;) entity.
2387 * Replace triple hyphens (==---==) with two em-dash (&#8212;&#8212;) entities.
2388 * Replace single hyphens surrounded by spaces with an en-dash (&#8211;) entity.
2389 * Replace triplets of periods (==...==) with an ellipsis (&#8230;) entity.
2390 * Convert many nonstandard characters to browser-safe entities corresponding to keyboard input.
2391 * Convert ==(TM)==, ==(R)==, and ==(C)== to &#8482;, &#174;, and &#169;.
2392 * Convert the letter x to a dimension sign: 2==x==4 to 2x4 and 8 ==x== 10 to 8x10.
2394 glyphs = [(r'''"(?<!\w)\b''', r'''&#8220;'''), # double quotes
2395 (r'''"''', r'''&#8221;'''), # double quotes
2396 (r"""\b'""", r'''&#8217;'''), # single quotes
2397 (r"""'(?<!\w)\b""", r'''&#8216;'''), # single quotes
2398 (r"""'""", r'''&#8217;'''), # single single quote
2399 (r'''(\b|^)( )?\.{3}''', r'''\1&#8230;'''), # ellipsis
2400 (r'''\b---\b''', r'''&#8212;&#8212;'''), # double em dash
2401 (r'''\s?--\s?''', r'''&#8212;'''), # em dash
2402 (r'''(\d+)-(\d+)''', r'''\1&#8211;\2'''), # en dash (1954-1999)
2403 (r'''(\d+)-(\W)''', r'''\1&#8212;\2'''), # em dash (1954--)
2404 (r'''\s-\s''', r''' &#8211; '''), # en dash
2405 (r'''(\d+) ?x ?(\d+)''', r'''\1&#215;\2'''), # dimension sign
2406 (r'''\b ?(\((tm|TM)\))''', r'''&#8482;'''), # trademark
2407 (r'''\b ?(\([rR]\))''', r'''&#174;'''), # registered
2408 (r'''\b ?(\([cC]\))''', r'''&#169;'''), # copyright
2409 (r'''([^\s])\[(\d+)\]''', #
2410 r'''\1<sup class="footnote"><a href="#fn\2">\2</a></sup>'''),# footnote
2413 # Apply macros.
2414 text = re.sub(r'''{([^}]+)}''', self.macros, text)
2416 # LaTeX style quotes.
2417 text = text.replace('\x60\x60', '&#8220;')
2418 text = text.replace('\xb4\xb4', '&#8221;')
2420 # Linkify URL and emails.
2421 url = r'''(?=[a-zA-Z0-9./#]) # Must start correctly
2422 ((?: # Match the leading part (proto://hostname, or just hostname)
2423 (?:ftp|https?|telnet|nntp) # protocol
2424 :// # ://
2425 (?: # Optional 'username:password@'
2426 \w+ # username
2427 (?::\w+)? # optional :password
2428 @ # @
2429 )? #
2430 [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
2432 (?::\d+)? # Optional port number
2433 (?: # Rest of the URL, optional
2434 /? # Start with '/'
2435 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
2436 (?: #
2437 [.!,?;:]+ # One or more of these
2438 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish with these
2439 #'" # # or ' or "
2440 )* #
2441 )?) #
2444 email = r'''(?:mailto:)? # Optional mailto:
2445 ([-\+\w]+ # username
2446 \@ # at
2447 [-\w]+(?:\.\w[-\w]*)+) # hostname
2450 # If there is no html, do a simple search and replace.
2451 if not re.search(r'''<.*>''', text):
2452 for glyph_search, glyph_replace in glyphs:
2453 text = preg_replace(glyph_search, glyph_replace, text)
2455 # Linkify.
2456 text = re.sub(re.compile(url, re.VERBOSE), r'''<a href="\1">\1</a>''', text)
2457 text = re.sub(re.compile(email, re.VERBOSE), r'''<a href="mailto:\1">\1</a>''', text)
2459 else:
2460 lines = []
2461 # Else split the text into an array at <>.
2462 for line in re.split('(<.*?>)', text):
2463 if not re.match('<.*?>', line):
2464 for glyph_search, glyph_replace in glyphs:
2465 line = preg_replace(glyph_search, glyph_replace, line)
2467 # Linkify.
2468 line = re.sub(re.compile(url, re.VERBOSE), r'''<a href="\1">\1</a>''', line)
2469 line = re.sub(re.compile(email, re.VERBOSE), r'''<a href="mailto:\1">\1</a>''', line)
2471 lines.append(line)
2473 text = ''.join(lines)
2475 return text
2478 def qtags(self, text):
2479 """Quick tags formatting.
2481 This function does the inline formatting of text, like
2482 bold, italic, strong and also itex code.
2485 h1. Quick tags
2487 Quick tags allow you to format your text, making it bold,
2488 emphasized or small, for example. The quick tags operators
2489 include:
2491 dl. ==*strong*==:Translates into @<strong>strong</strong>@.
2492 ==_emphasis_==:Translates into @<em>emphasis</em>@.
2493 ==**bold**==:Translates into @<b>bold</b>@.
2494 ==__italics__==:Translates into @<i>italics</i>@.
2495 ==++bigger++==:Translates into @<big>bigger</big>@.
2496 ==--smaller--==:Translates into: @<small>smaller</small>@.
2497 ==-deleted text-==:Translates into @<del>deleted text</del>@.
2498 ==+inserted text+==:Translates into @<ins>inserted text</ins>@.
2499 ==^superscript^==:Translates into @<sup>superscript</sup>@.
2500 ==~subscript~==:Translates into @<sub>subscript</sub>@.
2501 ==%span%==:Translates into @<span>span</span>@.
2502 ==@code@==:Translates into @<code>code</code>@.
2504 Note that within a "==@==...==@==" section, @<@ and @>@ are
2505 translated into HTML entities automatically.
2507 Inline formatting operators accept the following modifiers:
2509 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
2510 [ll]:A language identifier (for a "lang" attribute).
2511 (class) or (#id) or (class#id):For CSS(Cascading Style Sheets) class and id attributes.
2513 # itex2mml.
2514 text = re.sub('\$(.*?)\$', lambda m: self.itex(m.group()), text)
2516 # Add span tags to upper-case words which don't have a description.
2517 #text = preg_replace(r'''(^|\s)([A-Z]{3,})\b(?!\()''', r'''\1<span class="caps">\2</span>''', text)
2519 # Quick tags.
2520 qtags = [('**', 'b', {'qf': '(?<!\*)\*\*(?!\*)', 'cls': '\*'}),
2521 ('__', 'i', {'qf': '(?<!_)__(?!_)', 'cls': '_'}),
2522 ('??', 'cite', {'qf': '\?\?(?!\?)', 'cls': '\?'}),
2523 ('-', 'del', {'qf': '(?<!\-)\-(?!\-)', 'cls': '-'}),
2524 ('+', 'ins', {'qf': '(?<!\+)\+(?!\+)', 'cls': '\+'}),
2525 ('*', 'strong', {'qf': '(?<!\*)\*(?!\*)', 'cls': '\*'}),
2526 ('_', 'em', {'qf': '(?<!_)_(?!_)', 'cls': '_'}),
2527 ('++', 'big', {'qf': '(?<!\+)\+\+(?!\+)', 'cls': '\+\+'}),
2528 ('--', 'small', {'qf': '(?<!\-)\-\-(?!\-)', 'cls': '\-\-'}),
2529 ('~', 'sub', {'qf': '(?<!\~)\~(?!(\\\/~))', 'cls': '\~'}),
2530 ('@', 'code', {'qf': '(?<!@)@(?!@)', 'cls': '@'}),
2531 ('%', 'span', {'qf': '(?<!%)%(?!%)', 'cls': '%'}),
2534 # Superscript.
2535 text = re.sub(r'''(?<!\^)\^(?!\^)(.+?)(?<!\^)\^(?!\^)''', r'''<sup>\1</sup>''', text)
2537 # This is from the perl version of Textile.
2538 for qtag, htmltag, redict in qtags:
2539 self.res.update(redict)
2540 p = re.compile(r'''(?: #
2541 ^ # Start of string
2543 (?<=[\s>'"]) # Whitespace, end of tag, quotes
2545 (?P<pre>[{[]) # Surrounded by [ or {
2547 (?<=%(punct)s) # Punctuation
2549 %(qf)s # opening tag
2550 %(qattr)s # attributes
2551 (?P<text>[^%(cls)s\s].*?) # text
2552 (?<=\S) # non-whitespace
2553 %(qf)s #
2554 (?: #
2555 $ # End of string
2557 (?P<post>[\]}]) # Surrounded by ] or }
2558 | #
2559 (?=%(punct)s{1,2}|\s) # punctuation
2561 ''' % self.res, re.VERBOSE)
2563 def _replace(m):
2564 c = m.groupdict('')
2566 attributes = self.parse_params(c['parameters'])
2567 open_tag = self.build_open_tag(htmltag, attributes)
2568 close_tag = '</%s>' % htmltag
2570 # Replace < and > inside <code></code>.
2571 if htmltag == 'code':
2572 c['text'] = c['text'].replace('<', '&lt;')
2573 c['text'] = c['text'].replace('>', '&gt;')
2575 return open_tag + c['text'] + close_tag
2577 text = p.sub(_replace, text)
2579 return text
2582 def images(self, text):
2583 """Process images.
2585 This function process images tags, with or without links. Images
2586 can have vertical and/or horizontal alignment, and can be resized
2587 unefficiently using width and height tags.
2590 h1. Images
2592 An image is generated by enclosing the image source in @!@:
2594 pre. !/path/to/image!
2596 You may optionally specify an alternative text for the image, which
2597 will also be used as its title:
2599 pre. !image.jpg (Nice picture)!
2601 Becomes:
2603 pre. <p><img src="image.jpg" alt="Nice picture" title="Nice picture" /></p>
2605 If you want to make the image point to a link, simply append a
2606 comma and the URL(Universal Republic of Love) to the image:
2608 pre. !image.jpg!:http://diveintopython.org
2610 Images can also be resized. These are all equivalent:
2612 pre. !image.jpg 10x20!
2613 !image.jpg 10w 20h!
2614 !image.jpg 20h 10w!
2616 The image @image.jpg@ will be resized to width 10 and height 20.
2618 Modifiers to the @<img>@ tag go after the opening @!@:
2620 pre. !(class#id)^image.jpg!
2622 Allowed modifiers include:
2624 dl. &lt;:Align the image to the left (causes the image to float if CSS options are enabled).
2625 &gt;:Align the image to the right (causes the image to float if CSS options are enabled).
2626 - (dash):Aligns the image to the middle.
2627 ^:Aligns the image to the top.
2628 ~ (tilde):Aligns the image to the bottom.
2629 {style rule}:Applies a CSS style rule to the image.
2630 (class) or (#id) or (class#id):Applies a CSS class and/or id to the image.
2631 ( (one or more):Pads 1em on the left for each '(' character.
2632 ) (one or more):Pads 1em on the right for each ')' character.
2634 Images receive the class "top" when using top alignment, "bottom"
2635 for bottom alignment and "middle" for middle alignment.
2637 # Compile the beast.
2638 p = re.compile(r'''\! # Opening !
2639 %(iattr)s # Image attributes
2640 (?P<src>%(url)s) # Image src
2641 \s? # Optional whitesapce
2643 \( #
2644 (?P<alt>.*?) # Optional (alt) attribute
2645 \) #
2646 )? #
2647 \s? # Optional whitespace
2648 %(resize)s # Resize parameters
2649 \! # Closing !
2650 ( # Optional link
2651 : # starts with ':'
2652 (?P<link> #
2653 %(url)s # link HREF
2655 )? #
2656 ''' % self.res, re.VERBOSE)
2658 for m in p.finditer(text):
2659 c = m.groupdict('')
2661 # Build the parameters for the <img /> tag.
2662 attributes = self.parse_params(c['parameters'], align_type='image')
2663 attributes.update(c)
2664 if attributes['alt']:
2665 attributes['title'] = attributes['alt']
2667 # Append height and width.
2668 attributes['width'] = m.groups()[5] or m.groups()[7] or m.groups()[10]
2669 attributes['height'] = m.groups()[6] or m.groups()[8] or m.groups()[9]
2671 # Create the image tag.
2672 tag = self.image(attributes)
2674 text = text.replace(m.group(), tag)
2676 return text
2679 def image(self, attributes):
2680 """Process each image.
2682 This method builds the <img> tag for each image in the text. It's
2683 separated from the 'images' method so it can be easily overriden when
2684 subclassing Textiler. Useful if you want to download and/or process
2685 the images, for example.
2687 link = attributes['link']
2688 del attributes['link']
2689 del attributes['parameters']
2691 # Build the tag.
2692 tag = self.build_open_tag('img', attributes, single=1)
2694 if link:
2695 href = preg_replace('&(?!(#|amp))', '&amp;', link)
2696 tag = '<a href="%s">%s</a>' % (href, tag)
2698 return tag
2701 def links(self, text):
2702 """Process links.
2704 This function is responsible for processing links. It has
2705 some nice shortcuts to Google, Amazon and IMDB queries.
2708 h1. Links
2710 A links is done the following way:
2712 pre. "This is the text link":http://example.com
2714 The result from this markup is:
2716 pre. <p><a href="http://example.com">This is the text link</a></p>
2718 You can add an optional @title@ attribute:
2720 pre. "This is the text link(This is the title)":http://example.com
2722 The link can be customised as well:
2724 pre. "(nospam)E-mail me please":mailto:someone@example.com
2726 You can use either single or double quotes. They must be enclosed in
2727 whitespace, punctuation or brackets:
2729 pre. You["gotta":http://example.com]seethis!
2731 If you are going to reference the same link a couple of times, you
2732 can define a lookup list anywhere on your document:
2734 pre. [python]http://www.python.org
2736 Links to the Python website can then be defined the following way:
2738 pre. "Check this":python
2740 There are also shortcuts for Amazon, IMDB(Internet Movie DataBase) and
2741 Google queries:
2743 pre. "Has anyone seen this guy?":imdb:Stephen+Fry
2744 "Really nice book":amazon:Goedel+Escher+Bach
2745 "PyBlosxom":google
2746 ["Using Textile and Blosxom with Python":google:python blosxom textile]
2748 Becomes:
2750 pre. <a href="http://www.imdb.com/Find?for=Stephen+Fry">Has anyone seen this guy?</a>
2751 <a href="http://www.amazon.com/exec/obidos/external-search?index=blended&amp;keyword=Goedel+Escher+Bach">Really nice book</a>
2752 <a href="http://www.google.com/search?q=PyBlosxom">PyBlosxom</a>
2753 <a href="http://www.google.com/search?q=python+blosxom+textile">Using Textile and Blosxom with Python</a>
2755 linkres = [r'''\[ # [
2756 (?P<quote>"|') # Opening quotes
2757 %(lattr)s # Link attributes
2758 (?P<text>[^"]+?) # Link text
2759 \s? # Optional whitespace
2760 (?:\((?P<title>[^\)]+?)\))? # Optional (title)
2761 (?P=quote) # Closing quotes
2762 : # :
2763 (?P<href>[^\]]+) # HREF
2764 \] # ]
2765 ''' % self.res,
2766 r'''(?P<quote>"|') # Opening quotes
2767 %(lattr)s # Link attributes
2768 (?P<text>[^"]+?) # Link text
2769 \s? # Optional whitespace
2770 (?:\((?P<title>[^\)]+?)\))? # Optional (title)
2771 (?P=quote) # Closing quotes
2772 : # :
2773 (?P<href>%(url)s) # HREF
2774 ''' % self.res]
2776 for linkre in linkres:
2777 p = re.compile(linkre, re.VERBOSE)
2778 for m in p.finditer(text):
2779 c = m.groupdict('')
2781 attributes = self.parse_params(c['parameters'])
2782 attributes['title'] = c['title'].replace('"', '&quot;')
2784 # Search lookup list.
2785 link = self._links.get(c['href'], None) or c['href']
2787 # Hyperlinks for Amazon, IMDB and Google searches.
2788 parts = link.split(':', 1)
2789 proto = parts[0]
2790 if len(parts) == 2:
2791 query = parts[1]
2792 else:
2793 query = c['text']
2795 query = query.replace(' ', '+')
2797 # Look for smart search.
2798 if self.searches.has_key(proto):
2799 link = self.searches[proto] % query
2801 # Fix URL.
2802 attributes['href'] = preg_replace('&(?!(#|amp))', '&amp;', link)
2804 open_tag = self.build_open_tag('a', attributes)
2805 close_tag = '</a>'
2807 repl = open_tag + c['text'] + close_tag
2809 text = text.replace(m.group(), repl)
2811 return text
2814 def format(self, text):
2815 """Text formatting.
2817 This function basically defines the order on which the
2818 formatting is applied.
2820 text = self.qtags(text)
2821 text = self.images(text)
2822 text = self.links(text)
2823 text = self.acronym(text)
2824 text = self.glyphs(text)
2826 return text
2829 def inline(self, text):
2830 """Inline formatting.
2832 This function calls the formatting on the inline text,
2833 taking care to avoid the escaped parts.
2836 h1. Inline
2838 Inline formatting is applied within a block of text.
2840 if not re.search(r'''==(.*?)==''', text):
2841 text = self.format(text)
2843 else:
2844 lines = []
2845 # Else split the text into an array at <>.
2846 for line in re.split('(==.*?==)', text):
2847 if not re.match('==.*?==', line):
2848 line = self.format(line)
2849 else:
2850 line = line[2:-2]
2852 lines.append(line)
2854 text = ''.join(lines)
2856 return text
2859 def textile(text, **args):
2860 """This is Textile.
2862 Generates XHTML from a simple markup developed by Dean Allen.
2864 This function should be called like this:
2866 textile(text, head_offset=0, validate=0, sanitize=0,
2867 encoding='latin-1', output='ASCII')
2869 return Textiler(text).process(**args)
2872 if __name__ == '__main__':
2873 print textile('tell me about textile.', head_offset=1)