2 # _*_ coding: latin1 _*_
5 A Humane Web Text Generator
8 * Make it work with Python 2.1.
9 * Make it work with Python 1.5.2? Or that's too optimistic?
12 To get an overview of all PyTextile's features, simply
13 type 'tell me about textile.' in a single line.
16 __authors__
= ["Roberto A. F. De Almeida (roberto@dealmeida.net)",
17 "Mark Pilgrim (f8dy@diveintomark.org)"]
18 __version__
= "2.0.10"
19 __date__
= "2004/10/06"
21 Copyright (c) 2004, Roberto A. F. De Almeida, http://dealmeida.net/
22 Copyright (c) 2003, Mark Pilgrim, http://diveintomark.org/
29 Copyright (c) 2003, Dean Allen, www.textism.com
32 Parts of the documentation and some of the regular expressions are (c) Brad
33 Choate, http://bradchoate.com/. Thanks, Brad!
36 Redistribution and use in source and binary forms, with or without
37 modification, are permitted provided that the following conditions are met:
39 * Redistributions of source code must retain the above copyright notice,
40 this list of conditions and the following disclaimer.
42 * Redistributions in binary form must reproduce the above copyright notice,
43 this list of conditions and the following disclaimer in the documentation
44 and/or other materials provided with the distribution.
46 * Neither the name Textile nor the names of its contributors may be used to
47 endorse or promote products derived from this software without specific
48 prior written permission.
50 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
51 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
54 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
55 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
56 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
57 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
58 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
59 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
60 POSSIBILITY OF SUCH DAMAGE.
63 1.0 - 2003/03/19 - MAP - initial release
64 1.01 - 2003/03/19 - MAP - don't strip whitespace within <pre> tags;
65 map high-bit ASCII to HTML numeric entities
66 1.02 - 2003/03/19 - MAP - changed hyperlink qtag expression to only
67 match valid URL characters (per RFC 2396); fixed preg_replace to
68 not match across line breaks (solves lots of problems with
69 mistakenly matching overlapping inline markup); fixed whitespace
70 stripping to only strip whitespace from beginning and end of lines,
71 not immediately before and after HTML tags.
72 1.03 - 2003/03/20 - MAP - changed hyperlink qtag again to more
73 closely match original Textile (fixes problems with links
74 immediately followed by punctuation -- somewhere Dean is
75 grinning right now); handle curly apostrophe with "ve"
76 contraction; clean up empty titles at end.
77 1.04 - 2003/03/23 - MAP - lstrip input to deal with extra spaces at
78 beginning of first line; tweaked list loop to handle consecutive lists
79 1.1 - 2003/06/06 - MAP - created initial test suite for links and images,
80 and fixed a bunch of related bugs to pass them
81 1.11 - 2003/07/20 - CL - don't demoronise unicode strings; handle
83 1.12 - 2003/07/23 - GW - print debug messages to stderr; handle bq(cite).
84 1.13 - 2003/07/23 - MAP - wrap bq. text in <p>...</p>
85 2 - 2004/03/26 - RAFA - rewritten from (almost) scratch to include
86 all features from Textile 2 and a little bit more.
87 2.0.1 - 2004/04/02 - RAFA - Fixed validating function that uses uTidyLib.
88 2.0.2 - 2004/04/02 - RAFA - Fixed problem with caps letters in URLs.
89 2.0.3 - 2004/04/19 - RAFA - Multiple classes are allowed, thanks to Dave
90 Anderson. The "lang" attribute is now removed from <code>, to be valid
91 XHTML. Fixed <span class="caps">UCAS</span> problem.
92 2.0.4 - 2004/05/20 - RAFA, CLB - Added inline formatting to table cells.
93 Curt Bergmann fixed a bug with the colspan formatting. Added Amazon
95 2.0.5 - 2004/06/01 - CL - Applied patch from Chris Lawrence to (1) fix
96 that Amazon associates ID was being added to all search URIs, (2)
97 customize the Amazon site used with the AMAZON variable, and (3) added
98 an "isbn" URI type that links directly to an Amazon product by ISBN or
100 2.0.6 - 2004/06/02 - RAFA - Fixed CAPS problem, again. I hope this is
102 2.0.7 - 2004/06/04 - RAFA, MW - Fixed bullet macro, thanks to Adam
103 Messinger. Added patch from Michal Wallace changing {}.pop() for
104 compatibility with Python 2.2.x.
105 2.0.8 - 2004/06/25 - RAFA - Strip tags when adding the content from a
106 footnote to the reference link. Escaped '<' and '>' in the self-
107 generated documentation.
108 2.0.9 - 2004/10/04 - RAFA - In images, if ALT is not defined, add an
109 empty attribute. Added "LaTeX" style open/close quotes. Fixed a bug
110 where the acronym definition was being formatted with inline rules.
111 Handle "broken" lines correctly, removing the <br /> from inside
113 2.0.10 - 2004/10/06 - RAFA, LO - Escape all non-escaped ampersands.
114 Applied "trivial patch" from Ludvig Omholt to remove newline right
118 # Set your encoding here.
121 # Output? Non-ASCII characters will be automatically
122 # converted to XML entities if you choose ASCII.
125 # PyTextile can optionally validate the generated
126 # XHTML code. We can use either mxTidy or uTidyLib.
127 # You can change the default behaviour here.
130 # If you want h1. to be translated to something other
131 # than <h1>, change this offset. You can also pass it
132 # as an argument to textile().
135 # If you want to use itex2mml, specify the full path
136 # to the binary here. You can download it from here:
137 # http://golem.ph.utexas.edu/~distler/blog/files/itexToMML.tar.gz
139 #itex2mml = '/usr/local/bin/itex2MML'
140 #itex2mml = '/usr/people/almeida/bin/itex2MML'
142 # PyTextile can optionally sanitize the generated XHTML,
143 # which is good for weblog comments or if you don't trust
150 # Amazon associate for links: "keywords":amazon
151 # If you don't have one, please consider leaving mine here as
152 # a small compensation for writing PyTextile. It's commented
154 #amazon_associate_id = 'bomtempo-21'
155 amazon_associate_id
= None
157 #AMAZON = 'www.amazon.co.uk'
158 AMAZON
= 'www.amazon.com'
167 def _in_tag(text
, tag
):
168 """Extracts text from inside a tag.
170 This function extracts the text from inside a given tag.
171 It's useful to get the text between <body></body> or
172 <pre></pre> when using the validators or the colorizer.
174 if text
.count('<%s' % tag
):
175 text
= text
.split('<%s' % tag
, 1)[1]
177 text
= text
.split('>', 1)[1]
178 if text
.count('</%s' % tag
):
179 text
= text
.split('</%s' % tag
, 1)[0]
181 text
= text
.strip().replace('\r\n', '\n')
186 # If you want PyTextile to automatically colorize
187 # your Python code, you need the htmlizer module
188 # from Twisted. (You can just grab this file from
189 # the distribution, it has no other dependencies.)
191 #from twisted.python import htmlizer
193 from StringIO
import StringIO
196 """Colorizer Python code.
198 This function wraps a text string in a StringIO,
199 and passes it to the htmlizer function from
202 # Fix line continuations.
203 code
= preg_replace(r
' \\\n', ' \\\\\n', code
)
205 code_in
= StringIO(code
)
206 code_out
= StringIO()
208 htmlizer
.filter(code_in
, code_out
)
210 # Remove <pre></pre> from input.
211 code
= _in_tag(code_out
.getvalue(), 'pre')
214 code
= code
.replace('<span class="py-src-newline">\n</span>', '<span class="py-src-newline"></span>\n')
222 # PyTextile can optionally validate the generated
223 # XHTML code using either mxTidy or uTidyLib.
226 from mx
.Tidy
import Tidy
229 """mxTidy's XHTML validator.
231 This function is a wrapper to mxTidy's validator.
233 nerrors
, nwarnings
, text
, errortext
= Tidy
.tidy(text
, output_xhtml
=1, numeric_entities
=1, wrap
=0)
234 return _in_tag(text
, 'body')
244 """uTidyLib's XHTML validator.
246 This function is a wrapper to uTidyLib's validator.
248 text
= tidy
.parseString(text
, output_xhtml
=1, add_xml_decl
=0, indent
=0, tidy_mark
=0)
249 return _in_tag(str(text
), 'body')
257 # This is good for debugging.
258 def _debug(s
, level
=1):
259 """Outputs debug information to sys.stderr.
261 This function outputs debug information if DEBUGLEVEL is
262 higher than a given treshold.
264 if DEBUGLEVEL
>= level
: print >> sys
.stderr
, s
267 #############################
268 # Useful regular expressions.
270 # Horizontal alignment.
271 'align': r
'''(?:(?:<>|[<>=]) # Either '<>', '<', '>' or '='
272 (?![^\s]*(?:<>|[<>=]))) # Look-ahead to ensure it happens once
275 # Horizontal padding.
276 'padding': r
'''(?:[\(\)]+) # Any number of '(' and/or ')'
281 (?:\(\#[\w]+\)) # (#id)
283 (?:\((?:[\w]+(?:\s[\w]+)*) #
284 (?:\#[\w]+)?\)) # (class1 class2 ... classn#id) or (class1 class2 ... classn)
286 (?![^\s]*(?:\([\w#]+\))) # must happen once
290 'lang': r
'''(?:\[[\w-]+\]) # [lang]
291 (?![^\s]*(?:\[.*?\])) # must happen once
295 'style': r
'''(?:{[^\}]+}) # {style}
296 (?![^\s]*(?:{.*?})) # must happen once
302 'punct': r
'''[\!"#\$%&'()\*\+,\-\./:;<=>\?@\[\\\]\^_`{\|}\~]''',
304 # URL regular expression.
305 'url': r
'''(?=[a-zA-Z0-9./#]) # Must start correctly
306 (?: # Match the leading part (proto://hostname, or just hostname)
307 (?:ftp|https?|telnet|nntp) # protocol
309 (?: # Optional 'username:password@'
311 (?::\w+)? # optional :password
314 [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
316 (?:mailto:)? # Optional mailto:
319 [-\w]+(?:\.\w[-\w]*)+ # hostname
321 (?:[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\.)+ # domain without protocol
326 | in(?:t|fo)\b # .int or .info
335 | [a-z][a-z]\b # two-letter country codes
338 (?::\d+)? # Optional port number
339 (?: # Rest of the URL, optional
341 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
343 [.!,?;:]+ # One or more of these
344 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish with these
352 'battr': r
'''(?P<parameters> #
353 (?: %(align)s # alignment
354 | %(classid)s # class and/or id
355 | %(padding)s # padding tags
357 | %(style)s # {style}
362 # (Un)ordered list attributes.
363 'olattr': r
'''(?P<olparameters> #
364 (?: %(align)s # alignment
365 | ((?:\(\#[\w]+\)) # (#id)
367 (?:\((?:[\w]+(?:\s[\w]+)*) #
368 (?:\#[\w]+)?\)) # (class1 class2 ... classn#id) or (class1 class2 ... classn)
370 | %(padding)s # padding tags
372 | %(style)s # {style}
377 # List item attributes.
378 'liattr': r
'''(?P<liparameters> #
379 (?: %(align)s # alignment
380 | %(classid)s # class and/or id
381 | %(padding)s # padding tags
383 | %(style)s # {style}
389 'qattr': r
'''(?P<parameters> #
390 (?: %(classid)s # class and/or id
392 | %(style)s # {style}
398 'lattr': r
'''(?P<parameters> # Links attributes
399 (?: %(align)s # alignment
400 | %(classid)s # class and/or id
402 | %(style)s # {style}
408 'iattr': r
'''(?P<parameters> #
410 (?: [<>]+ # horizontal alignment tags
411 (?![^\s]*(?:[<>]))) # (must happen once)
413 (?: [\-\^~]+ # vertical alignment tags
414 (?![^\s]*(?:[\-\^~]))) # (must happen once)
415 | %(classid)s # class and/or id
416 | %(padding)s # padding tags
417 | %(style)s # {style}
424 (?:([\d]+%?)x([\d]+%?)) # 20x10
427 (?:([\d]+)%?w\s([\d]+)%?h) # 10h 20w
429 (?:([\d]+)%?h\s([\d]+)%?w) # 20w 10h
435 'tattr': r
'''(?P<parameters> #
437 (?: [\^~] # vertical alignment
438 (?![^\s]*(?:[\^~]))) # (must happen once)
439 | %(align)s # alignment
441 | %(style)s # {style}
442 | %(classid)s # class and/or id
443 | %(padding)s # padding
444 | _ # is this a header row/cell?
453 def preg_replace(pattern
, replacement
, text
):
454 """Alternative re.sub that handles empty groups.
456 This acts like re.sub, except it replaces empty groups with ''
457 instead of raising an exception.
460 def replacement_func(matchobj
):
463 _debug(matchobj
.groups())
464 for matchitem
in matchobj
.groups():
468 rc
= rc
.replace(r
'\%s' % counter
, matchitem
)
473 p
= re
.compile(pattern
)
476 return p
.sub(replacement_func
, text
)
479 def html_replace(pattern
, replacement
, text
):
480 """Replacement outside HTML tags.
482 Does a preg_replace only outside HTML tags.
484 # If there is no html, do a simple search and replace.
485 if not re
.search(r
'''<.*>''', text
):
486 return preg_replace(pattern
, replacement
, text
)
490 # Else split the text into an array at <>.
491 for line
in re
.split('(<.*?>)', text
):
492 if not re
.match('<.*?>', line
):
493 line
= preg_replace(pattern
, replacement
, line
)
497 return ''.join(lines
)
500 # PyTextile can optionally sanitize the generated XHTML,
501 # which is good for weblog comments. This code is from
502 # Mark Pilgrim's feedparser.
503 class _BaseHTMLProcessor(sgmllib
.SGMLParser
):
504 elements_no_end_tag
= ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
505 'img', 'input', 'isindex', 'link', 'meta', 'param']
508 sgmllib
.SGMLParser
.__init
__(self
)
512 sgmllib
.SGMLParser
.reset(self
)
514 def normalize_attrs(self
, attrs
):
515 # utility method to be called by descendants
516 attrs
= [(k
.lower(), sgmllib
.charref
.sub(lambda m
: unichr(int(m
.groups()[0])), v
).strip()) for k
, v
in attrs
]
517 attrs
= [(k
, k
in ('rel', 'type') and v
.lower() or v
) for k
, v
in attrs
]
520 def unknown_starttag(self
, tag
, attrs
):
521 # called for each start tag
522 # attrs is a list of (attr, value) tuples
523 # e.g. for <pre class="screen">, tag="pre", attrs=[("class", "screen")]
524 strattrs
= "".join([' %s="%s"' % (key
, value
) for key
, value
in attrs
])
525 if tag
in self
.elements_no_end_tag
:
526 self
.pieces
.append("<%(tag)s%(strattrs)s />" % locals())
528 self
.pieces
.append("<%(tag)s%(strattrs)s>" % locals())
530 def unknown_endtag(self
, tag
):
531 # called for each end tag, e.g. for </pre>, tag will be "pre"
532 # Reconstruct the original end tag.
533 if tag
not in self
.elements_no_end_tag
:
534 self
.pieces
.append("</%(tag)s>" % locals())
536 def handle_charref(self
, ref
):
537 # called for each character reference, e.g. for " ", ref will be "160"
538 # Reconstruct the original character reference.
539 self
.pieces
.append("&#%(ref)s;" % locals())
541 def handle_entityref(self
, ref
):
542 # called for each entity reference, e.g. for "©", ref will be "copy"
543 # Reconstruct the original entity reference.
544 self
.pieces
.append("&%(ref)s;" % locals())
546 def handle_data(self
, text
):
547 # called for each block of plain text, i.e. outside of any tag and
548 # not containing any character or entity references
549 # Store the original text verbatim.
550 self
.pieces
.append(text
)
552 def handle_comment(self
, text
):
553 # called for each HTML comment, e.g. <!-- insert Javascript code here -->
554 # Reconstruct the original comment.
555 self
.pieces
.append("<!--%(text)s-->" % locals())
557 def handle_pi(self
, text
):
558 # called for each processing instruction, e.g. <?instruction>
559 # Reconstruct original processing instruction.
560 self
.pieces
.append("<?%(text)s>" % locals())
562 def handle_decl(self
, text
):
563 # called for the DOCTYPE, if present, e.g.
564 # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
565 # "http://www.w3.org/TR/html4/loose.dtd">
566 # Reconstruct original DOCTYPE
567 self
.pieces
.append("<!%(text)s>" % locals())
570 """Return processed HTML as a single string"""
571 return "".join(self
.pieces
)
574 class _HTMLSanitizer(_BaseHTMLProcessor
):
575 acceptable_elements
= ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
576 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
577 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
578 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
579 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
580 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
581 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
582 'thead', 'tr', 'tt', 'u', 'ul', 'var']
584 acceptable_attributes
= ['abbr', 'accept', 'accept-charset', 'accesskey',
585 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
586 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
587 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
588 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
589 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
590 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
591 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
592 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
593 'usemap', 'valign', 'value', 'vspace', 'width']
595 unacceptable_elements_with_end_tag
= ['script', 'applet']
597 # This if for MathML.
598 mathml_elements
= ['math', 'mi', 'mn', 'mo', 'mrow', 'msup']
599 mathml_attributes
= ['mode', 'xmlns']
601 acceptable_elements
= acceptable_elements
+ mathml_elements
602 acceptable_attributes
= acceptable_attributes
+ mathml_attributes
605 _BaseHTMLProcessor
.reset(self
)
606 self
.unacceptablestack
= 0
608 def unknown_starttag(self
, tag
, attrs
):
609 if not tag
in self
.acceptable_elements
:
610 if tag
in self
.unacceptable_elements_with_end_tag
:
611 self
.unacceptablestack
+= 1
613 attrs
= self
.normalize_attrs(attrs
)
614 attrs
= [(key
, value
) for key
, value
in attrs
if key
in self
.acceptable_attributes
]
615 _BaseHTMLProcessor
.unknown_starttag(self
, tag
, attrs
)
617 def unknown_endtag(self
, tag
):
618 if not tag
in self
.acceptable_elements
:
619 if tag
in self
.unacceptable_elements_with_end_tag
:
620 self
.unacceptablestack
-= 1
622 _BaseHTMLProcessor
.unknown_endtag(self
, tag
)
624 def handle_pi(self
, text
):
627 def handle_decl(self
, text
):
630 def handle_data(self
, text
):
631 if not self
.unacceptablestack
:
632 _BaseHTMLProcessor
.handle_data(self
, text
)
636 """Textile formatter.
638 This is the base class for the PyTextile text processor.
640 def __init__(self
, text
=''):
641 """Instantiate the class, passing the text to be formatted.
643 Here we pre-process the text and collect all the link
648 # Basic regular expressions.
653 self
.searches
['imdb'] = 'http://www.imdb.com/Find?for=%s'
654 self
.searches
['google'] = 'http://www.google.com/search?q=%s'
655 self
.searches
['python'] = 'http://www.python.org/doc/current/lib/module-%s.html'
656 if amazon_associate_id
:
657 self
.searches
['isbn'] = ''.join(['http://', AMAZON
, '/exec/obidos/ASIN/%s/', amazon_associate_id
])
658 self
.searches
['amazon'] = ''.join(['http://', AMAZON
, '/exec/obidos/external-search?mode=blended&keyword=%s&tag=', amazon_associate_id
])
660 self
.searches
['isbn'] = ''.join(['http://', AMAZON
, '/exec/obidos/ASIN/%s'])
661 self
.searches
['amazon'] = ''.join(['http://', AMAZON
, '/exec/obidos/external-search?mode=blended&keyword=%s'])
663 # These are the blocks we know.
666 (r
'''^p # Paragraph signature
667 %(battr)s # Paragraph attributes
669 (?P<extend>\.)? # Extended paragraph denoted by a second dot
672 ''' % self
.res
, self
.paragraph
),
674 # Pre-formatted text.
675 (r
'''^pre # Pre signature
676 %(battr)s # Pre attributes
678 (?P<extend>\.)? # Extended pre denoted by a second dot
681 ''' % self
.res
, self
.pre
),
684 (r
'''^bc # Blockcode signature
685 %(battr)s # Blockcode attributes
687 (?P<extend>\.)? # Extended blockcode denoted by a second dot
690 ''' % self
.res
, self
.bc
),
693 (r
'''^bq # Blockquote signature
694 %(battr)s # Blockquote attributes
696 (?P<extend>\.)? # Extended blockquote denoted by a second dot
697 (:(?P<cite> # Optional cite attribute
700 | "[\w]+(?:\s[\w]+)*" # "Name inside quotes"
705 ''' % self
.res
, self
.blockquote
),
708 (r
'''^h # Header signature
709 (?P<header>\d) # Header number
710 %(battr)s # Header attributes
712 (?P<extend>\.)? # Extended header denoted by a second dot
715 ''' % self
.res
, self
.header
),
718 (r
'''^fn # Footnote signature
719 (?P<footnote>[\d]+) # Footnote number
721 (?P<extend>\.)? # Extended footnote denoted by a second dot
727 (r
'''^dl # Definition list signature
728 %(battr)s # Definition list attributes
730 (?P<extend>\.)? # Extended definition list denoted by a second dot
733 ''' % self
.res
, self
.dl
),
735 # Ordered list (attributes to first <li>).
736 (r
'''^%(olattr)s # Ordered list attributes
737 \# # Ordered list signature
738 %(liattr)s # List item attributes
742 ''' % self
.res
, self
.ol
),
744 # Unordered list (attributes to first <li>).
745 (r
'''^%(olattr)s # Unrdered list attributes
746 \* # Unordered list signature
747 %(liattr)s # Unordered list attributes
751 ''' % self
.res
, self
.ul
),
754 (r
'''^==?(?P<text>.*?)(==)?$ # Escaped text
757 (r
'''^(?P<text><.*)$ # XHTML tag
761 (r
'''^(?P<text> # itex code
762 \\\[ # starts with \[
763 .*? # complicated mathematical equations go here
768 (r
'''^table # Table signature
769 %(tattr)s # Table attributes
771 (?P<extend>\.)? # Extended blockcode denoted by a second dot
774 ''' % self
.res
, self
.table
),
783 (r
'''^(?P<text>tell\sme\sabout\stextile\.)$''', self
.about
),
787 def preprocess(self
):
788 """Pre-processing of the text.
790 Remove whitespace, fix carriage returns.
793 self
.text
= self
.text
.strip()
795 # Zap carriage returns.
796 self
.text
= self
.text
.replace("\r\n", "\n")
797 self
.text
= self
.text
.replace("\r", "\n")
800 self
.text
= self
.sanitize(self
.text
)
803 def grab_links(self
):
804 """Grab link lookups.
806 Check the text for link lookups, store them in a
807 dictionary, and clean them up.
809 # Grab links like this: '[id]example.com'
811 p
= re
.compile(r
'''(?:^|\n)\[([\w]+?)\](%(url)s)(?:$|\n)''' % self
.res
, re
.VERBOSE
)
812 for key
, link
in p
.findall(self
.text
):
815 # And clear them from the text.
816 self
.text
= p
.sub('', self
.text
)
821 def process(self
, head_offset
=HEAD_OFFSET
, validate
=VALIDATE
, sanitize
=SANITIZE
, output
=OUTPUT
, encoding
=ENCODING
):
824 Here we actually process the text, splitting the text in
825 blocks and applying the corresponding function to each
828 # Basic global changes.
831 # Grab lookup links and clean them from the text.
832 self
._links
= self
.grab_links()
834 # Offset for the headers.
835 self
.head_offset
= head_offset
837 # Process each block.
838 self
.blocks
= self
.split_text()
841 for [function
, captures
] in self
.blocks
:
842 text
.append(function(**captures
))
844 text
= '\n\n'.join(text
)
846 # Add titles to footnotes.
847 text
= self
.footnotes(text
)
849 # Convert to desired output.
850 text
= unicode(text
, encoding
)
851 text
= text
.encode(output
, 'xmlcharrefreplace')
860 if _tidy
and validate
:
866 def sanitize(self
, text
):
869 Fix tags like <img />, <br /> and <hr />.
874 Textile can help you generate valid XHTML(eXtensible HyperText Markup Language).
875 It will fix any single tags that are not properly closed, like
876 @<img />@, @<br />@ and @<hr />@.
878 If you have "mx.Tidy":http://www.egenix.com/files/python/mxTidy.html
879 and/or "µTidyLib":http://utidylib.sourceforge.net/ installed,
880 it also can optionally validade the generated code with these wrappers
881 to ensure 100% valid XHTML(eXtensible HyperText Markup Language).
883 # Fix single tags like <img /> and <br />.
884 text
= preg_replace(r
'''<(img|br|hr)(.*?)(?:\s*/?\s*)?>''', r
'''<\1\2 />''', text
)
887 text
= preg_replace(r
'''&(?!#?[xX]?(?:[0-9a-fA-F]+|\w{1,8});)''', r
'''&''', text
)
892 def split_text(self
):
893 """Process the blocks from the text.
895 Split the blocks according to the signatures, join extended
896 blocks and associate each one of them with a function to
902 Textile process your text by dividing it in blocks. Each block
903 is identified by a signature and separated from other blocks by
906 All signatures should end with a period followed by a space. A
907 header @<h1></h1>@ can be done this way:
909 pre. h1. This is a header 1.
911 Blocks may continue for multiple paragraphs of text. If you want
912 a block signature to stay "active", use two periods after the
913 signature instead of one. For example:
915 pre.. bq.. This is paragraph one of a block quote.
917 This is paragraph two of a block quote.
919 =p. Now we're back to a regular paragraph.
924 <p>This is paragraph one of a block quote.</p>
926 <p>This is paragraph two of a block quote.</p>
929 <p>Now we’re back to a regular paragraph.</p>
931 p. The blocks can be customised by adding parameters between the
932 signature and the period. These include:
934 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
935 [ll]:A language identifier (for a "lang" attribute).
936 (class) or (#id) or (class#id):For CSS(Cascading Style Sheets) class and id attributes.
937 >, <, =, <>:Modifier characters for alignment. Right-justification, left-justification, centered, and full-justification. The paragraph will also receive the class names "right", "left", "center" and "justify", respectively.
938 ( (one or more):Adds padding on the left. 1em per "(" character is applied. When combined with the align-left or align-right modifier, it makes the block float.
939 ) (one or more):Adds padding on the right. 1em per ")" character is applied. When combined with the align-left or align-right modifier, it makes the block float.
941 Here's an overloaded example:
943 pre. p(())>(class#id)[en]{color:red}. A simple paragraph.
947 pre. <p lang="en" style="color:red;padding-left:2em;padding-right:2em;float:right;" class="class right" id="id">A simple paragraph.</p>
950 clear_sig
= r
'''^clear(?P<alignment>[<>])?\.$'''
955 # We capture the \n's because they are important inside "pre..".
956 blocks
= re
.split(r
'''((\n\s*){2,})''', self
.text
)
959 # Check for the clear signature.
960 m
= re
.match(clear_sig
, block
)
962 clear
= m
.group('alignment')
964 clear
= {'<': 'clear:left;', '>': 'clear:right;'}[clear
]
966 clear
= 'clear:both;'
969 # Check each of the code signatures.
970 for regexp
, function
in self
.signatures
:
971 p
= re
.compile(regexp
, (re
.VERBOSE | re
.DOTALL
))
974 # Put everything in a dictionary.
975 captures
= m
.groupdict()
977 # If we are extending a block, we require a dot to
978 # break it, so we can start lines with '#' inside
979 # an extended <pre> without matching an ordered list.
980 if extending
and not captures
.get('dot', None):
981 output
[-1][1]['text'] += block
983 elif captures
.has_key('dot'):
986 # If a signature matches, we are not extending a block.
989 # Check if we should extend this block.
990 if captures
.has_key('extend'):
991 extending
= captures
['extend']
992 del captures
['extend']
995 if captures
.has_key('header'):
996 captures
['header'] = int(captures
['header']) + self
.head_offset
1000 captures
['clear'] = clear
1003 # Save the block to be processed later.
1004 output
.append([function
, captures
])
1010 # Append the text to the last block.
1011 output
[-1][1]['text'] += block
1013 output
.append([self
.paragraph
, {'text': block
}])
1018 def parse_params(self
, parameters
, clear
=None, align_type
='block'):
1019 """Parse the parameters from a block signature.
1021 This function parses the parameters from a block signature,
1022 splitting the information about class, id, language and
1023 style. The positioning (indentation and alignment) is parsed
1024 and stored in the style.
1028 p>(class#id){color:red}[en]. Paragraph.
1032 p{color:red}[en](class#id)>. Paragraph.
1034 will have its parameters parsed to:
1036 output = {'lang' : 'en',
1039 'style': 'color:red;text-align:right;'}
1041 Note that order is not important.
1045 return {'style': clear
}
1051 # Match class from (class) or (class#id).
1052 m
= re
.search(r
'''\((?P<class>[\w]+(\s[\w]+)*)(\#[\w]+)?\)''', parameters
)
1053 if m
: output
['class'] = m
.group('class')
1055 # Match id from (#id) or (class#id).
1056 m
= re
.search(r
'''\([\w]*(\s[\w]+)*\#(?P<id>[\w]+)\)''', parameters
)
1057 if m
: output
['id'] = m
.group('id')
1060 m
= re
.search(r
'''\[(?P<lang>[\w-]+)\]''', parameters
)
1061 if m
: output
['lang'] = m
.group('lang')
1064 m
= re
.search(r
'''{(?P<style>[^\}]+)}''', parameters
)
1066 output
['style'] = m
.group('style').replace('\n', '')
1068 # If necessary, apppend a semi-comma to the style.
1069 if not output
['style'].endswith(';'):
1070 output
['style'] += ';'
1074 output
['style'] = output
.get('style', '') + clear
1076 # Remove classes, ids, langs and styles. This makes the
1077 # regular expression for the positioning much easier.
1078 parameters
= preg_replace(r
'''\([\#\w\s]+\)''', '', parameters
)
1079 parameters
= preg_replace(r
'''\[[\w-]+\]''', '', parameters
)
1080 parameters
= preg_replace(r
'''{[\w:;#%-]+}''', '', parameters
)
1084 # Count the left indentation.
1085 l_indent
= parameters
.count('(')
1086 if l_indent
: style
.append('padding-left:%dem;' % l_indent
)
1088 # Count the right indentation.
1089 r_indent
= parameters
.count(')')
1090 if r_indent
: style
.append('padding-right:%dem;' % r_indent
)
1093 if align_type
== 'image':
1094 align
= [('<', 'float:left;', ' left'),
1095 ('>', 'float:right;', ' right')]
1097 valign
= [('^', 'vertical-align:text-top;', ' top'),
1098 ('-', 'vertical-align:middle;', ' middle'),
1099 ('~', 'vertical-align:text-bottom;', ' bottom')]
1101 # Images can have both a vertical and a horizontal alignment.
1102 for alignments
in [align
, valign
]:
1103 for _align
, _style
, _class
in alignments
:
1104 if parameters
.count(_align
):
1105 style
.append(_style
)
1107 # Append a class name related to the alignment.
1108 output
['class'] = output
.get('class', '') + _class
1111 elif align_type
== 'table':
1112 align
= [('<', 'left'),
1117 valign
= [('^', 'top'),
1120 # Horizontal alignment.
1121 for _align
, _style
, in align
:
1122 if parameters
.count(_align
):
1123 output
['align'] = _style
1125 # Vertical alignment.
1126 for _align
, _style
, in valign
:
1127 if parameters
.count(_align
):
1128 output
['valign'] = _style
1130 # Colspan and rowspan.
1131 m
= re
.search(r
'''\\(\d+)''', parameters
)
1133 #output['colspan'] = m.groups()
1134 output
['colspan'] = int(m
.groups()[0])
1136 m
= re
.search(r
'''/(\d+)''', parameters
)
1138 output
['rowspan'] = int(m
.groups()[0])
1141 if l_indent
or r_indent
:
1142 alignments
= [('<>', 'text-align:justify;', ' justify'),
1143 ('=', 'text-align:center;', ' center'),
1144 ('<', 'float:left;', ' left'),
1145 ('>', 'float:right;', ' right')]
1147 alignments
= [('<>', 'text-align:justify;', ' justify'),
1148 ('=', 'text-align:center;', ' center'),
1149 ('<', 'text-align:left;', ' left'),
1150 ('>', 'text-align:right;', ' right')]
1152 for _align
, _style
, _class
in alignments
:
1153 if parameters
.count(_align
):
1154 style
.append(_style
)
1156 # Append a class name related to the alignment.
1157 output
['class'] = output
.get('class', '') + _class
1160 # Join all the styles.
1161 output
['style'] = output
.get('style', '') + ''.join(style
)
1163 # Remove excess whitespace.
1164 if output
.has_key('class'):
1165 output
['class'] = output
['class'].strip()
1170 def build_open_tag(self
, tag
, attributes
={}, single
=0):
1171 """Build the open tag with specified attributes.
1173 This function is used by all block builders to
1174 generate the opening tags with the attributes of
1178 open_tag
= ['<%s' % tag
]
1179 for k
,v
in attributes
.items():
1180 # The ALT attribute can be empty.
1181 if k
== 'alt' or v
: open_tag
.append(' %s="%s"' % (k
, v
))
1184 open_tag
.append(' /')
1187 open_tag
.append('>')
1189 return ''.join(open_tag
)
1192 def paragraph(self
, text
, parameters
=None, attributes
=None, clear
=None):
1193 """Process a paragraph.
1195 This function processes the paragraphs, enclosing the text in a
1196 <p> tag and breaking lines with <br />. Paragraphs are formatted
1197 with all the inline rules.
1202 This is how you write a paragraph:
1204 pre. p. This is a paragraph, although a short one.
1206 Since the paragraph is the default block, you can safely omit its
1207 signature ([@p@]). Simply write:
1209 pre. This is a paragraph, although a short one.
1211 Text in a paragraph block is wrapped in @<p></p>@ tags, and
1212 newlines receive a <br /> tag. In both cases Textile will process
1215 pre. <p>This is a paragraph, although a short one.</p>
1217 Text in a paragraph block is processed with all the inline rules.
1220 lines
= re
.split('\n{2,}', text
)
1222 # Get the attributes.
1223 attributes
= attributes
or self
.parse_params(parameters
, clear
)
1232 open_tag
= self
.build_open_tag('p', attributes
)
1235 # Pop the id because it must be unique.
1236 if attributes
.has_key('id'): del attributes
['id']
1239 line
= preg_replace(r
'(<br />|\n)+', '<br />\n', line
)
1241 # Remove <br /> from inside broken HTML tags.
1242 line
= preg_replace(r
'(<[^>]*)<br />\n(.*?>)', r
'\1 \2', line
)
1244 # Inline formatting.
1245 line
= self
.inline(line
)
1247 output
.append(open_tag
+ line
+ close_tag
)
1249 return '\n\n'.join(output
)
1252 def pre(self
, text
, parameters
=None, clear
=None):
1253 """Process pre-formatted text.
1255 This function processes pre-formatted text into a <pre> tag.
1256 No HTML is added for the lines, but @<@ and @>@ are translated into
1260 h1. Pre-formatted text
1262 Pre-formatted text can be specified using the @pre@ signature.
1263 Inside a "pre" block, whitespace is preserved and @<@ and @>@ are
1264 translated into HTML(HyperText Markup Language) entities
1267 Text in a "pre" block is _not processed_ with any inline rule.
1269 Here's a simple example:
1271 pre. pre. This text is pre-formatted.
1272 Nothing interesting happens inside here...
1277 This text is pre-formatted.
1278 Nothing interesting happens inside here...
1282 # Remove trailing whitespace.
1283 text
= text
.rstrip()
1285 # Get the attributes.
1286 attributes
= self
.parse_params(parameters
, clear
)
1289 #open_tag = self.build_open_tag('pre', attributes) + '\n'
1290 open_tag
= self
.build_open_tag('pre', attributes
)
1291 close_tag
= '\n</pre>'
1294 text
= text
.replace('<', '<')
1295 text
= text
.replace('>', '>')
1297 return open_tag
+ text
+ close_tag
1300 def bc(self
, text
, parameters
=None, clear
=None):
1301 """Process block code.
1303 This function processes block code into a <code> tag inside a
1304 <pre>. No HTML is added for the lines, but @<@ and @>@ are translated
1310 A block code, specified by the @bc@ signature, is a block of
1311 pre-formatted text which also receives a @<code></code>@ tag. As
1312 with "pre", whitespace is preserved and @<@ and @>@ are translated
1313 into HTML(HyperText Markup Language) entities automatically.
1315 Text in a "bc" code is _not processed_ with the inline rules.
1317 If you have "Twisted":http://www.twistedmatrix.com/ installed,
1318 Textile can automatically colorize your Python code if you
1319 specify its language as "Python":
1321 pre. bc[python]. from twisted.python import htmlizer
1326 <code lang="python">
1327 <span class="py-src-keyword">from</span> <span class="py-src-variable">twisted</span><span class="py-src-op">.</span><span class="py-src-variable">python</span> <span class="py-src-keyword">import</span> <span class="py-src-variable">htmlizer</span>
1331 The colors can be specified in your CSS(Cascading Style Sheets)
1332 file. If you don't want to install Twisted, you can download just
1333 the @htmlizer@ module "independently":http://dealmeida.net/code/htmlizer.py.txt.
1336 # Get the attributes.
1337 attributes
= self
.parse_params(parameters
, clear
)
1339 # XHTML <code> can't have the attribute lang.
1340 if attributes
.has_key('lang'):
1341 lang
= attributes
['lang']
1342 del attributes
['lang']
1347 open_tag
= '<pre>\n' + self
.build_open_tag('code', attributes
) + '\n'
1348 close_tag
= '\n</code>\n</pre>'
1350 # Colorize Python code?
1351 if htmlizer
and lang
== 'python':
1355 text
= text
.replace('<', '<')
1356 text
= text
.replace('>', '>')
1358 return open_tag
+ text
+ close_tag
1361 def dl(self
, text
, parameters
=None, clear
=None):
1362 """Process definition list.
1364 This function process definition lists. The text inside
1365 the <dt> and <dd> tags is processed for inline formatting.
1370 A definition list starts with the signature @dl@, and has
1371 its items separated by a @:@. Here's a simple example:
1373 pre. dl. name:Sir Lancelot of Camelot.
1374 quest:To seek the Holy Grail.
1381 <dd>Sir Lancelot of Camelot.</dd>
1383 <dd>To seek the Holy Grail.</dd>
1388 # Get the attributes.
1389 attributes
= self
.parse_params(parameters
, clear
)
1392 open_tag
= self
.build_open_tag('dl', attributes
) + '\n'
1393 close_tag
= '\n</dl>'
1395 lines
= text
.split('\n')
1399 [dt
, dd
] = line
.split(':', 1)
1403 if dt
: output
.append('<dt>%s</dt>\n<dd>%s</dd>' % (dt
, dd
))
1405 text
= '\n'.join(output
)
1407 text
= self
.inline(text
)
1409 return open_tag
+ text
+ close_tag
1412 def blockquote(self
, text
, parameters
=None, cite
=None, clear
=None):
1413 """Process block quote.
1415 The block quote is inserted into a <blockquote> tag, and
1416 processed as a paragraph. An optional cite attribute can
1417 be appended on the last line after two dashes (--), or
1418 after the period following ':' for compatibility with the
1424 A blockquote is denoted by the signature @bq@. The text in this
1425 block will be enclosed in @<blockquote></blockquote>@ and @<p></p>@,
1426 receiving the same formatting as a paragraph. For example:
1428 pre. bq. This is a blockquote.
1433 <p>This is a blockquote.</p>
1436 You can optionally specify the @cite@ attribute of the blockquote,
1437 using the following syntax:
1439 pre. bq.:http://example.com Some text.
1441 pre. bq.:"John Doe" Some other text.
1445 pre. <blockquote cite="http://example.com">
1449 pre. <blockquote cite="John Doe">
1450 <p>Some other text.</p>
1453 You can also specify the @cite@ using a pair of dashes on the
1454 last line of the blockquote:
1457 -- http://example.com
1460 # Get the attributes.
1461 attributes
= self
.parse_params(parameters
, clear
)
1464 # Remove the quotes?
1465 cite
= cite
.strip('"')
1466 attributes
['cite'] = cite
1468 # The citation should be on the last line.
1469 text
= text
.split('\n')
1470 if text
[-1].startswith('-- '):
1471 attributes
['cite'] = text
.pop()[3:]
1473 text
= '\n'.join(text
)
1476 open_tag
= self
.build_open_tag('blockquote', attributes
) + '\n'
1477 close_tag
= '\n</blockquote>'
1479 # Process the paragraph, passing the attributes.
1480 # Does it make sense to pass the id, class, etc. to
1481 # the paragraph instead of applying it to the
1483 text
= self
.paragraph(text
)
1485 return open_tag
+ text
+ close_tag
1488 def header(self
, text
, parameters
=None, header
=1, clear
=None):
1489 """Process a header.
1491 The header number is captured by the regular
1492 expression and lives in header. If head_offset is
1493 set, it is adjusted accordingly.
1498 A header is produced by the signature @hn@, where @n@ goes
1499 from 1 to 6. You can adjust the relative output of the headers
1500 passing a @head_offset@ attribute when calling @textile()@.
1504 pre. h1. This is a header.
1508 pre. <h1>This is a header.</h1>
1510 # Get the attributes.
1511 attributes
= self
.parse_params(parameters
, clear
)
1513 # Get the header number and limit it between 1 and 6.
1519 open_tag
= self
.build_open_tag('h%d' % n
, attributes
)
1520 close_tag
= '</h%d>' % n
1522 text
= self
.inline(text
)
1524 return open_tag
+ text
+ close_tag
1527 def footnote(self
, text
, parameters
=None, footnote
=1, clear
=None):
1528 """Process a footnote.
1530 A footnote is formatted as a paragraph of class
1531 'footnote' and id 'fn%d', starting with the footnote
1532 number in a <sup> tag. Here we just build the
1533 attributes and pass them directly to self.paragraph().
1538 A footnote is produced by the signature @fn@ followed by
1539 a number. Footnotes are paragraphs of a special CSS(Cascading Style Sheets)
1542 pre. fn1. This is footnote number one.
1546 pre. <p class="footnote" id="fn1"><sup>1</sup> This is footnote number one.</p>
1548 This footnote can be referenced anywhere on the text by the
1551 pre. This is a reference[1] to footnote number one.
1555 pre. <p>This is a reference<sup class="footnote"><a href="#fn1" title="This is footnote number one.">1</a></sup> to footnote number 1.</p>
1557 Note that the text from the footnote appears in the @title@ of the
1558 link pointing to it.
1563 # Build the attributes to the paragraph.
1564 attributes
= self
.parse_params(parameters
, clear
)
1565 attributes
['class'] = 'footnote'
1566 attributes
['id'] = 'fn%d' % n
1568 # Build the paragraph text.
1569 text
= ('<sup>%d</sup> ' % n
) + text
1571 # And return the paragraph.
1572 return self
.paragraph(text
=text
, attributes
=attributes
)
1575 def build_li(self
, items
, liattributes
):
1576 """Build the list item.
1578 This function build the list item of an (un)ordered list. It
1579 works by peeking at the next list item, and searching for a
1580 multi-list. If a multi-list is found, it is processed and
1581 appended inside the list item tags, as it should be.
1588 item
= item
.lstrip()
1589 item
= item
.replace('\n', '<br />\n')
1591 # Get list item attributes.
1592 p
= re
.compile(r
'''^%(liattr)s\s''' % self
.res
, re
.VERBOSE
)
1596 liparameters
= c
['liparameters']
1597 item
= p
.sub('', item
)
1601 liattributes
= liattributes
or self
.parse_params(liparameters
)
1603 # Build the item tag.
1604 open_tag_li
= self
.build_open_tag('li', liattributes
)
1606 # Reset the attributes, which should be applied
1607 # only to the first <li>.
1610 # Build the closing tag.
1611 close_tag_li
= '</li>'
1613 # Multi-list recursive routine.
1614 # Here we check the _next_ items for a multi-list. If we
1615 # find one, we extract all items of the multi-list and
1616 # process them recursively.
1620 # Grab all the items that start with # or *.
1621 n_item
= items
.pop(0)
1623 # Grab the <ol> parameters.
1624 p
= re
.compile(r
'''^%(olattr)s''' % self
.res
, re
.VERBOSE
)
1628 olparameters
= c
['olparameters']
1629 tmp
= p
.sub('', n_item
)
1633 # Check for an ordered list inside this one.
1634 if tmp
.startswith('#'):
1636 inlist
.append(n_item
)
1638 # Peek into the next item.
1639 n_item
= items
.pop(0)
1640 if n_item
.startswith('#'):
1641 inlist
.append(n_item
)
1643 items
.insert(0, n_item
)
1646 inlist
= self
.ol('\n'.join(inlist
), olparameters
=olparameters
)
1647 item
= item
+ '\n' + inlist
+ '\n'
1649 # Check for an unordered list inside this one.
1650 elif tmp
.startswith('*'):
1652 inlist
.append(n_item
)
1654 # Peek into the next item.
1655 n_item
= items
.pop(0)
1656 if n_item
.startswith('*'):
1657 inlist
.append(n_item
)
1659 items
.insert(0, n_item
)
1662 inlist
= self
.ul('\n'.join(inlist
), olparameters
=olparameters
)
1663 item
= item
+ '\n' + inlist
+ '\n'
1665 # Otherwise we just put it back in the list.
1667 items
.insert(0, n_item
)
1669 item
= self
.inline(item
)
1671 item
= open_tag_li
+ item
+ close_tag_li
1674 return '\n'.join(lines
)
1677 def ol(self
, text
, liparameters
=None, olparameters
=None, clear
=None):
1678 """Build an ordered list.
1680 This function basically just sets the <ol></ol> with the
1681 right attributes, and then pass everything inside to
1682 _build_li, which does the real tough recursive job.
1687 Ordered lists can be constructed this way:
1689 pre. # Item number 1.
1696 <li>Item number 1.</li>
1697 <li>Item number 2.</li>
1698 <li>Item number 3.</li>
1701 If you want a list to "break" an extended block, you should
1702 add a period after the hash. This is useful for writing
1705 pre.. bc[python].. #!/usr/bin/env python
1707 # This is a comment, not an ordered list!
1708 # So this won't break the extended "bc".
1710 p. Lists can be nested:
1712 pre. # Item number 1.
1718 Textile will transform this to:
1723 <li>Item number 1a.</li>
1724 <li>Item number 1b.</li>
1729 <li>Item number 2a.</li>
1734 You can also mix ordered and unordered lists:
1736 pre. * To write well you need:
1737 *# to read every day
1738 *# to write every day
1744 <li>To write well you need:
1746 <li>to read every day</li>
1747 <li>to write every day</li>
1753 To style a list, the parameters should go before the hash if you want
1754 to set the attributes on the @<ol>@ tag:
1756 pre. (class#id)# one
1760 If you want to customize the firsr @<li>@ tag, apply the parameters
1763 pre. #(class#id) one
1767 # Get the attributes.
1768 olattributes
= self
.parse_params(olparameters
, clear
)
1769 liattributes
= self
.parse_params(liparameters
)
1771 # Remove list depth.
1772 if text
.startswith('#'):
1775 items
= text
.split('\n#')
1777 # Build the open tag.
1778 open_tag
= self
.build_open_tag('ol', olattributes
) + '\n'
1780 close_tag
= '\n</ol>'
1782 # Build the list items.
1783 text
= self
.build_li(items
, liattributes
)
1785 return open_tag
+ text
+ close_tag
1788 def ul(self
, text
, liparameters
=None, olparameters
=None, clear
=None):
1789 """Build an unordered list.
1791 This function basically just sets the <ul></ul> with the
1792 right attributes, and then pass everything inside to
1793 _build_li, which does the real tough recursive job.
1798 Unordered lists behave exactly like the ordered lists, and are
1799 defined using a star:
1810 <li><span class="caps">PHP</span></li>
1813 # Get the attributes.
1814 olattributes
= self
.parse_params(olparameters
, clear
)
1815 liattributes
= self
.parse_params(liparameters
)
1817 # Remove list depth.
1818 if text
.startswith('*'):
1821 items
= text
.split('\n*')
1823 # Build the open tag.
1824 open_tag
= self
.build_open_tag('ul', olattributes
) + '\n'
1826 close_tag
= '\n</ul>'
1828 # Build the list items.
1829 text
= self
.build_li(items
, liattributes
)
1831 return open_tag
+ text
+ close_tag
1834 def table(self
, text
, parameters
=None, clear
=None):
1837 To build a table we split the text in lines to get the
1838 rows, and split the rows between '|' to get the individual
1844 Making a simple table is as easy as possible:
1849 Will be processed into:
1864 If you want to customize the @<table>@ tag, you must use the
1867 pre. table(class#id)[en]. |a|b|c|
1870 To customize a row, apply the modifier _before_ the first @|@:
1872 pre. table. (class)<>|a|b|c|
1875 Individual cells can by customized by adding the parameters _after_
1876 the @|@, proceded by a period and a space:
1881 The allowed modifiers are:
1883 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
1884 (class) or (#id) or (class#id):A CSS(Cascading Style Sheets) class and/or id attribute.
1885 ( (one or more):Adds 1em of padding to the left for each '(' character.
1886 ) (one or more):Adds 1em of padding to the right for each ')' character.
1887 <:Aligns to the left (floats to left for tables if combined with the ')' modifier).
1888 >:Aligns to the right (floats to right for tables if combined with the '(' modifier).
1889 =:Aligns to center (sets left, right margins to 'auto' for tables).
1890 <>:For cells only. Justifies text.
1891 ^:For rows and cells only. Aligns to the top.
1892 ~ (tilde):For rows and cells only. Aligns to the bottom.
1893 _ (underscore):Can be applied to a table row or cell to indicate a header row or cell.
1894 \\2 or \\3 or \\4, etc.:Used within cells to indicate a colspan of 2, 3, 4, etc. columns. When you see "\\", think "push forward".
1895 /2 or /3 or /4, etc.:Used within cells to indicate a rowspan of 2, 3, 4, etc. rows. When you see "/", think "push downward".
1897 When a cell is identified as a header cell and an alignment is
1898 specified, that becomes the default alignment for cells below it.
1899 You can always override this behavior by specifying an alignment
1900 for one of the lower cells.
1902 attributes
= self
.parse_params(parameters
, clear
, align_type
='table')
1903 #attributes['cellspacing'] = '0'
1905 # Build the <table>.
1906 open_tag
= self
.build_open_tag('table', attributes
) + '\n'
1907 close_tag
= '</table>'
1911 rows
= re
.split(r
'''\n+''', text
)
1914 columns
= row
.split('|')
1917 parameters
= columns
.pop(0)
1919 rowattr
= self
.parse_params(parameters
, align_type
='table')
1920 open_tr
= self
.build_open_tag('tr', rowattr
) + '\n'
1921 output
.append(open_tr
)
1923 # Does the row define headers?
1924 if parameters
.count('_'):
1930 for cell
in columns
[:-1]:
1931 p
= re
.compile(r
'''(?:%(tattr)s\.\s)?(?P<text>.*)''' % self
.res
, re
.VERBOSE
)
1935 cellattr
= self
.parse_params(c
['parameters'], align_type
='table')
1937 # Get the width of this cell.
1938 width
= cellattr
.get('colspan', 1)
1941 if c
['parameters'].count('_'):
1944 # If it is a header, let's set the default alignment.
1946 # Set the default aligment for all cells below this one.
1947 # This is a little tricky because this header can have
1949 for i
in range(col
, col
+width
):
1950 default_align
[i
] = cellattr
.get('align', None)
1953 # Apply the default align, if any.
1954 cellattr
['align'] = cellattr
.get('align', default_align
.get(col
, None))
1956 open_td
= self
.build_open_tag(td_tag
, cellattr
)
1957 close_td
= '</%s>\n' % td_tag
1959 #output.append(open_td + c['text'].strip() + close_td)
1960 output
.append(open_td
+ self
.inline(c
['text'].strip()) + close_td
)
1964 output
.append('</tr>\n')
1966 text
= open_tag
+ ''.join(output
) + close_tag
1971 def escape(self
, text
):
1974 This is used to match escaped text. Nothing to see here!
1979 If you don't want Textile processing a block, you can simply
1980 enclose it inside @==@:
1982 pre. p. Regular paragraph
1985 Escaped portion -- will not be formatted
1989 pre. p. Back to normal.
1991 This can also be used inline, disabling the formatting temporarily:
1993 pre. p. This is ==*a test*== of escaping.
1998 def itex(self
, text
):
1999 """Convert itex to MathML.
2001 If the itex2mml binary is set, we use it to convert the
2002 itex to MathML. Otherwise, the text is unprocessed and
2008 Textile can automatically convert itex code to MathML(Mathematical Markup Language)
2009 for you, if you have the itex2MML binary (you can download it
2010 from the "Movable Type plugin":http://golem.ph.utexas.edu/~distler/blog/files/itexToMML.tar.gz).
2012 Block equations should be enclosed inbetween @\[@ and @\]@:
2014 pre. \[ e^{i\pi} + 1 = 0 \]
2016 Will be translated to:
2018 pre. <math xmlns='http://www.w3.org/1998/Math/MathML' mode='display'>
2019 <msup><mi>e</mi> <mrow><mi>i</mi>
2020 <mi>&pi;</mi></mrow></msup>
2021 <mo>+</mo><mn>1</mn><mo>=</mo><mn>0</mn>
2024 Equations can also be displayed inline:
2026 pre. Euler's formula, $e^{i\pi}+1=0$, ...
2028 (Note that if you want to display MathML(Mathematical Markup Language)
2029 your content must be served as @application/xhtml+xml@, which is not
2030 accepted by all browsers.)
2034 text
= os
.popen("echo '%s' | %s" % (text
, itex2mml
)).read()
2041 def about(self
, text
=None):
2042 """Show PyTextile's functionalities.
2044 An introduction to PyTextile. Can be called when running the
2045 main script or if you write the following line:
2047 'tell me about textile.'
2049 But keep it a secret!
2053 about
.append(textile('h1. This is Textile', head_offset
=self
.head_offset
))
2054 about
.append(textile(__doc__
.split('---', 1)[1], head_offset
=self
.head_offset
))
2056 functions
= [(self
.split_text
, 1),
2057 (self
.paragraph
, 2),
2060 (self
.blockquote
, 2),
2079 for function
, offset
in functions
:
2080 doc
= function
.__doc
__.split('---', 1)[1]
2081 doc
= doc
.split('\n')
2087 doc
= '\n'.join(lines
)
2088 about
.append(textile(doc
, head_offset
=self
.head_offset
+offset
))
2090 about
= '\n'.join(about
)
2091 about
= about
.replace('<br />', '')
2096 def acronym(self
, text
):
2097 """Process acronyms.
2099 Acronyms can have letters in upper and lower caps, or even numbers,
2100 provided that the numbers and upper caps are the same in the
2101 abbreviation and in the description. For example:
2103 XHTML(eXtensible HyperText Markup Language)
2104 OPeNDAP(Open source Project for a Network Data Access Protocol)
2107 are all valid acronyms.
2112 You can define acronyms in your text the following way:
2114 pre. This is XHTML(eXtensible HyperText Markup Language).
2116 The resulting code is:
2118 pre. <p><acronym title="eXtensible HyperText Markup Language"><span class="caps">XHTML</span></acronym></p>
2120 Acronyms can have letters in upper and lower caps, or even numbers,
2121 provided that the numbers and upper caps are the same in the
2122 abbreviation and in the description. For example:
2124 pre. XHTML(eXtensible HyperText Markup Language)
2125 OPeNDAP(Open source Project for a Network Data Access Protocol)
2128 are all valid acronyms.
2130 # Find the acronyms.
2131 acronyms
= r
'''(?P<acronym>[\w]+)\((?P<definition>[^\(\)]+?)\)'''
2133 # Check all acronyms.
2134 for acronym
, definition
in re
.findall(acronyms
, text
):
2135 caps_acronym
= ''.join(re
.findall('[A-Z\d]+', acronym
))
2136 caps_definition
= ''.join(re
.findall('[A-Z\d]+', definition
))
2137 if caps_acronym
and caps_acronym
== caps_definition
:
2138 text
= text
.replace('%s(%s)' % (acronym
, definition
), '<acronym title="%s">%s</acronym>' % (definition
, acronym
))
2140 text
= html_replace(r
'''(^|\s)([A-Z]{3,})\b(?!\()''', r
'''\1<span class="caps">\2</span>''', text
)
2145 def footnotes(self
, text
):
2146 """Add titles to footnotes references.
2148 This function searches for footnotes references like this [1], and
2149 adds a title to the link containing the first paragraph of the
2152 # Search for footnotes.
2153 p
= re
.compile(r
'''<p class="footnote" id="fn(?P<n>\d+)"><sup>(?P=n)</sup>(?P<note>.*)</p>''')
2154 for m
in p
.finditer(text
):
2156 note
= m
.group('note').strip()
2158 # Strip HTML from note.
2159 note
= re
.sub('<.*?>', '', note
)
2162 text
= text
.replace('<a href="#fn%s">' % n
, '<a href="#fn%s" title="%s">' % (n
, note
))
2167 def macros(self
, m
):
2170 This function replaces macros inside brackets using a built-in
2171 dictionary, and also unicode names if the key doesn't exist.
2176 Textile has support for character macros, which should be enclosed
2177 in curly braces. A few useful ones are:
2179 pre. {C=} or {=C}: euro sign
2180 {+-} or {-+}: plus-minus sign
2181 {L-} or {-L}: pound sign.
2183 You can also make accented characters:
2189 pre. <p>Expos&#233;</p>
2191 You can also specify Unicode names like:
2194 {white smiling face}
2198 macros
= {'c|': '¢', # cent sign
2199 '|c': '¢', # cent sign
2200 'L-': '£', # pound sign
2201 '-L': '£', # pound sign
2202 'Y=': '¥', # yen sign
2203 '=Y': '¥', # yen sign
2204 '(c)': '©', # copyright sign
2205 '<<': '«', # left-pointing double angle quotation
2206 '(r)': '®', # registered sign
2207 '+_': '±', # plus-minus sign
2208 '_+': '±', # plus-minus sign
2209 '>>': '»', # right-pointing double angle quotation
2210 '1/4': '¼', # vulgar fraction one quarter
2211 '1/2': '½', # vulgar fraction one half
2212 '3/4': '¾', # vulgar fraction three quarters
2213 'A`': 'À', # latin capital letter a with grave
2214 '`A': 'À', # latin capital letter a with grave
2215 'A\'': 'Á', # latin capital letter a with acute
2216 '\'A': 'Á', # latin capital letter a with acute
2217 'A^': 'Â', # latin capital letter a with circumflex
2218 '^A': 'Â', # latin capital letter a with circumflex
2219 'A~': 'Ã', # latin capital letter a with tilde
2220 '~A': 'Ã', # latin capital letter a with tilde
2221 'A"': 'Ä', # latin capital letter a with diaeresis
2222 '"A': 'Ä', # latin capital letter a with diaeresis
2223 'Ao': 'Å', # latin capital letter a with ring above
2224 'oA': 'Å', # latin capital letter a with ring above
2225 'AE': 'Æ', # latin capital letter ae
2226 'C,': 'Ç', # latin capital letter c with cedilla
2227 ',C': 'Ç', # latin capital letter c with cedilla
2228 'E`': 'È', # latin capital letter e with grave
2229 '`E': 'È', # latin capital letter e with grave
2230 'E\'': 'É', # latin capital letter e with acute
2231 '\'E': 'É', # latin capital letter e with acute
2232 'E^': 'Ê', # latin capital letter e with circumflex
2233 '^E': 'Ê', # latin capital letter e with circumflex
2234 'E"': 'Ë', # latin capital letter e with diaeresis
2235 '"E': 'Ë', # latin capital letter e with diaeresis
2236 'I`': 'Ì', # latin capital letter i with grave
2237 '`I': 'Ì', # latin capital letter i with grave
2238 'I\'': 'Í', # latin capital letter i with acute
2239 '\'I': 'Í', # latin capital letter i with acute
2240 'I^': 'Î', # latin capital letter i with circumflex
2241 '^I': 'Î', # latin capital letter i with circumflex
2242 'I"': 'Ï', # latin capital letter i with diaeresis
2243 '"I': 'Ï', # latin capital letter i with diaeresis
2244 'D-': 'Ð', # latin capital letter eth
2245 '-D': 'Ð', # latin capital letter eth
2246 'N~': 'Ñ', # latin capital letter n with tilde
2247 '~N': 'Ñ', # latin capital letter n with tilde
2248 'O`': 'Ò', # latin capital letter o with grave
2249 '`O': 'Ò', # latin capital letter o with grave
2250 'O\'': 'Ó', # latin capital letter o with acute
2251 '\'O': 'Ó', # latin capital letter o with acute
2252 'O^': 'Ô', # latin capital letter o with circumflex
2253 '^O': 'Ô', # latin capital letter o with circumflex
2254 'O~': 'Õ', # latin capital letter o with tilde
2255 '~O': 'Õ', # latin capital letter o with tilde
2256 'O"': 'Ö', # latin capital letter o with diaeresis
2257 '"O': 'Ö', # latin capital letter o with diaeresis
2258 'O/': 'Ø', # latin capital letter o with stroke
2259 '/O': 'Ø', # latin capital letter o with stroke
2260 'U`': 'Ù', # latin capital letter u with grave
2261 '`U': 'Ù', # latin capital letter u with grave
2262 'U\'': 'Ú', # latin capital letter u with acute
2263 '\'U': 'Ú', # latin capital letter u with acute
2264 'U^': 'Û', # latin capital letter u with circumflex
2265 '^U': 'Û', # latin capital letter u with circumflex
2266 'U"': 'Ü', # latin capital letter u with diaeresis
2267 '"U': 'Ü', # latin capital letter u with diaeresis
2268 'Y\'': 'Ý', # latin capital letter y with acute
2269 '\'Y': 'Ý', # latin capital letter y with acute
2270 'a`': 'à', # latin small letter a with grave
2271 '`a': 'à', # latin small letter a with grave
2272 'a\'': 'á', # latin small letter a with acute
2273 '\'a': 'á', # latin small letter a with acute
2274 'a^': 'â', # latin small letter a with circumflex
2275 '^a': 'â', # latin small letter a with circumflex
2276 'a~': 'ã', # latin small letter a with tilde
2277 '~a': 'ã', # latin small letter a with tilde
2278 'a"': 'ä', # latin small letter a with diaeresis
2279 '"a': 'ä', # latin small letter a with diaeresis
2280 'ao': 'å', # latin small letter a with ring above
2281 'oa': 'å', # latin small letter a with ring above
2282 'ae': 'æ', # latin small letter ae
2283 'c,': 'ç', # latin small letter c with cedilla
2284 ',c': 'ç', # latin small letter c with cedilla
2285 'e`': 'è', # latin small letter e with grave
2286 '`e': 'è', # latin small letter e with grave
2287 'e\'': 'é', # latin small letter e with acute
2288 '\'e': 'é', # latin small letter e with acute
2289 'e^': 'ê', # latin small letter e with circumflex
2290 '^e': 'ê', # latin small letter e with circumflex
2291 'e"': 'ë', # latin small letter e with diaeresis
2292 '"e': 'ë', # latin small letter e with diaeresis
2293 'i`': 'ì', # latin small letter i with grave
2294 '`i': 'ì', # latin small letter i with grave
2295 'i\'': 'í', # latin small letter i with acute
2296 '\'i': 'í', # latin small letter i with acute
2297 'i^': 'î', # latin small letter i with circumflex
2298 '^i': 'î', # latin small letter i with circumflex
2299 'i"': 'ï', # latin small letter i with diaeresis
2300 '"i': 'ï', # latin small letter i with diaeresis
2301 'n~': 'ñ', # latin small letter n with tilde
2302 '~n': 'ñ', # latin small letter n with tilde
2303 'o`': 'ò', # latin small letter o with grave
2304 '`o': 'ò', # latin small letter o with grave
2305 'o\'': 'ó', # latin small letter o with acute
2306 '\'o': 'ó', # latin small letter o with acute
2307 'o^': 'ô', # latin small letter o with circumflex
2308 '^o': 'ô', # latin small letter o with circumflex
2309 'o~': 'õ', # latin small letter o with tilde
2310 '~o': 'õ', # latin small letter o with tilde
2311 'o"': 'ö', # latin small letter o with diaeresis
2312 '"o': 'ö', # latin small letter o with diaeresis
2313 ':-': '÷', # division sign
2314 '-:': '÷', # division sign
2315 'o/': 'ø', # latin small letter o with stroke
2316 '/o': 'ø', # latin small letter o with stroke
2317 'u`': 'ù', # latin small letter u with grave
2318 '`u': 'ù', # latin small letter u with grave
2319 'u\'': 'ú', # latin small letter u with acute
2320 '\'u': 'ú', # latin small letter u with acute
2321 'u^': 'û', # latin small letter u with circumflex
2322 '^u': 'û', # latin small letter u with circumflex
2323 'u"': 'ü', # latin small letter u with diaeresis
2324 '"u': 'ü', # latin small letter u with diaeresis
2325 'y\'': 'ý', # latin small letter y with acute
2326 '\'y': 'ý', # latin small letter y with acute
2327 'y"': 'ÿ', # latin small letter y with diaeresis
2328 '"y': 'ÿ', # latin small letter y with diaeresis
2329 'OE': 'Œ', # latin capital ligature oe
2330 'oe': 'œ', # latin small ligature oe
2331 '*': '•', # bullet
2332 'Fr': '₣', # french franc sign
2333 'L=': '₤', # lira sign
2334 '=L': '₤', # lira sign
2335 'Rs': '₨', # rupee sign
2336 'C=': '€', # euro sign
2337 '=C': '€', # euro sign
2338 'tm': '™', # trade mark sign
2339 '<-': '←', # leftwards arrow
2340 '->': '→', # rightwards arrow
2341 '<=': '⇐', # leftwards double arrow
2342 '=>': '⇒', # rightwards double arrow
2343 '=/': '≠', # not equal to
2344 '/=': '≠', # not equal to
2345 '<_': '≤', # less-than or equal to
2346 '_<': '≤', # less-than or equal to
2347 '>_': '≥', # greater-than or equal to
2348 '_>': '≥', # greater-than or equal to
2349 ':(': '☹', # white frowning face
2350 ':)': '☺', # white smiling face
2351 'spade': '♠', # black spade suit
2352 'club': '♣', # black club suit
2353 'heart': '♥', # black heart suit
2354 'diamond': '♦', # black diamond suit
2359 entity
= macros
[entity
]
2362 # Try a unicode entity.
2363 entity
= unicodedata
.lookup(entity
)
2364 entity
= entity
.encode('ascii', 'xmlcharrefreplace')
2366 # Return the unmodified entity.
2367 entity
= '{%s}' % entity
2372 def glyphs(self
, text
):
2373 """Glyph formatting.
2375 This function replaces quotations marks, dashes and a few other
2376 symbol for numerical entities. The em/en dashes use definitions
2377 comes from http://alistapart.com/articles/emen/.
2382 Textile replaces some of the characters in your text with their
2383 equivalent numerical entities. These include:
2385 * Replace single and double primes used as quotation marks with HTML(HyperText Markup Language) entities for opening and closing quotation marks in readable text, while leaving untouched the primes required within HTML(HyperText Markup Language) tags.
2386 * Replace double hyphens (==--==) with an em-dash (—) entity.
2387 * Replace triple hyphens (==---==) with two em-dash (——) entities.
2388 * Replace single hyphens surrounded by spaces with an en-dash (–) entity.
2389 * Replace triplets of periods (==...==) with an ellipsis (…) entity.
2390 * Convert many nonstandard characters to browser-safe entities corresponding to keyboard input.
2391 * Convert ==(TM)==, ==(R)==, and ==(C)== to ™, ®, and ©.
2392 * Convert the letter x to a dimension sign: 2==x==4 to 2x4 and 8 ==x== 10 to 8x10.
2394 glyphs
= [(r
'''"(?<!\w)\b''', r
'''“'''), # double quotes
2395 (r
'''"''', r
'''”'''), # double quotes
2396 (r
"""\b'""", r
'''’'''), # single quotes
2397 (r
"""'(?<!\w)\b""", r
'''‘'''), # single quotes
2398 (r
"""'""", r
'''’'''), # single single quote
2399 (r
'''(\b|^)( )?\.{3}''', r
'''\1…'''), # ellipsis
2400 (r
'''\b---\b''', r
'''——'''), # double em dash
2401 (r
'''\s?--\s?''', r
'''—'''), # em dash
2402 (r
'''(\d+)-(\d+)''', r
'''\1–\2'''), # en dash (1954-1999)
2403 (r
'''(\d+)-(\W)''', r
'''\1—\2'''), # em dash (1954--)
2404 (r
'''\s-\s''', r
''' – '''), # en dash
2405 (r
'''(\d+) ?x ?(\d+)''', r
'''\1×\2'''), # dimension sign
2406 (r
'''\b ?(\((tm|TM)\))''', r
'''™'''), # trademark
2407 (r
'''\b ?(\([rR]\))''', r
'''®'''), # registered
2408 (r
'''\b ?(\([cC]\))''', r
'''©'''), # copyright
2409 (r
'''([^\s])\[(\d+)\]''', #
2410 r
'''\1<sup class="footnote"><a href="#fn\2">\2</a></sup>'''),# footnote
2414 text
= re
.sub(r
'''{([^}]+)}''', self
.macros
, text
)
2416 # LaTeX style quotes.
2417 text
= text
.replace('\x60\x60', '“')
2418 text
= text
.replace('\xb4\xb4', '”')
2420 # Linkify URL and emails.
2421 url
= r
'''(?=[a-zA-Z0-9./#]) # Must start correctly
2422 ((?: # Match the leading part (proto://hostname, or just hostname)
2423 (?:ftp|https?|telnet|nntp) # protocol
2425 (?: # Optional 'username:password@'
2427 (?::\w+)? # optional :password
2430 [-\w]+(?:\.\w[-\w]*)+ # hostname (sub.example.com)
2432 (?::\d+)? # Optional port number
2433 (?: # Rest of the URL, optional
2435 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]* # Can't start with these
2437 [.!,?;:]+ # One or more of these
2438 [^.!,?;:"'<>()\[\]{}\s\x7F-\xFF]+ # Can't finish with these
2444 email
= r
'''(?:mailto:)? # Optional mailto:
2445 ([-\+\w]+ # username
2447 [-\w]+(?:\.\w[-\w]*)+) # hostname
2450 # If there is no html, do a simple search and replace.
2451 if not re
.search(r
'''<.*>''', text
):
2452 for glyph_search
, glyph_replace
in glyphs
:
2453 text
= preg_replace(glyph_search
, glyph_replace
, text
)
2456 text
= re
.sub(re
.compile(url
, re
.VERBOSE
), r
'''<a href="\1">\1</a>''', text
)
2457 text
= re
.sub(re
.compile(email
, re
.VERBOSE
), r
'''<a href="mailto:\1">\1</a>''', text
)
2461 # Else split the text into an array at <>.
2462 for line
in re
.split('(<.*?>)', text
):
2463 if not re
.match('<.*?>', line
):
2464 for glyph_search
, glyph_replace
in glyphs
:
2465 line
= preg_replace(glyph_search
, glyph_replace
, line
)
2468 line
= re
.sub(re
.compile(url
, re
.VERBOSE
), r
'''<a href="\1">\1</a>''', line
)
2469 line
= re
.sub(re
.compile(email
, re
.VERBOSE
), r
'''<a href="mailto:\1">\1</a>''', line
)
2473 text
= ''.join(lines
)
2478 def qtags(self
, text
):
2479 """Quick tags formatting.
2481 This function does the inline formatting of text, like
2482 bold, italic, strong and also itex code.
2487 Quick tags allow you to format your text, making it bold,
2488 emphasized or small, for example. The quick tags operators
2491 dl. ==*strong*==:Translates into @<strong>strong</strong>@.
2492 ==_emphasis_==:Translates into @<em>emphasis</em>@.
2493 ==**bold**==:Translates into @<b>bold</b>@.
2494 ==__italics__==:Translates into @<i>italics</i>@.
2495 ==++bigger++==:Translates into @<big>bigger</big>@.
2496 ==--smaller--==:Translates into: @<small>smaller</small>@.
2497 ==-deleted text-==:Translates into @<del>deleted text</del>@.
2498 ==+inserted text+==:Translates into @<ins>inserted text</ins>@.
2499 ==^superscript^==:Translates into @<sup>superscript</sup>@.
2500 ==~subscript~==:Translates into @<sub>subscript</sub>@.
2501 ==%span%==:Translates into @<span>span</span>@.
2502 ==@code@==:Translates into @<code>code</code>@.
2504 Note that within a "==@==...==@==" section, @<@ and @>@ are
2505 translated into HTML entities automatically.
2507 Inline formatting operators accept the following modifiers:
2509 dl. {style rule}:A CSS(Cascading Style Sheets) style rule.
2510 [ll]:A language identifier (for a "lang" attribute).
2511 (class) or (#id) or (class#id):For CSS(Cascading Style Sheets) class and id attributes.
2514 text
= re
.sub('\$(.*?)\$', lambda m
: self
.itex(m
.group()), text
)
2516 # Add span tags to upper-case words which don't have a description.
2517 #text = preg_replace(r'''(^|\s)([A-Z]{3,})\b(?!\()''', r'''\1<span class="caps">\2</span>''', text)
2520 qtags
= [('**', 'b', {'qf': '(?<!\*)\*\*(?!\*)', 'cls': '\*'}),
2521 ('__', 'i', {'qf': '(?<!_)__(?!_)', 'cls': '_'}),
2522 ('??', 'cite', {'qf': '\?\?(?!\?)', 'cls': '\?'}),
2523 ('-', 'del', {'qf': '(?<!\-)\-(?!\-)', 'cls': '-'}),
2524 ('+', 'ins', {'qf': '(?<!\+)\+(?!\+)', 'cls': '\+'}),
2525 ('*', 'strong', {'qf': '(?<!\*)\*(?!\*)', 'cls': '\*'}),
2526 ('_', 'em', {'qf': '(?<!_)_(?!_)', 'cls': '_'}),
2527 ('++', 'big', {'qf': '(?<!\+)\+\+(?!\+)', 'cls': '\+\+'}),
2528 ('--', 'small', {'qf': '(?<!\-)\-\-(?!\-)', 'cls': '\-\-'}),
2529 ('~', 'sub', {'qf': '(?<!\~)\~(?!(\\\/~))', 'cls': '\~'}),
2530 ('@', 'code', {'qf': '(?<!@)@(?!@)', 'cls': '@'}),
2531 ('%', 'span', {'qf': '(?<!%)%(?!%)', 'cls': '%'}),
2535 text
= re
.sub(r
'''(?<!\^)\^(?!\^)(.+?)(?<!\^)\^(?!\^)''', r
'''<sup>\1</sup>''', text
)
2537 # This is from the perl version of Textile.
2538 for qtag
, htmltag
, redict
in qtags
:
2539 self
.res
.update(redict
)
2540 p
= re
.compile(r
'''(?: #
2543 (?<=[\s>'"]) # Whitespace, end of tag, quotes
2545 (?P<pre>[{[]) # Surrounded by [ or {
2547 (?<=%(punct)s) # Punctuation
2549 %(qf)s # opening tag
2550 %(qattr)s # attributes
2551 (?P<text>[^%(cls)s\s].*?) # text
2552 (?<=\S) # non-whitespace
2557 (?P<post>[\]}]) # Surrounded by ] or }
2559 (?=%(punct)s{1,2}|\s) # punctuation
2561 ''' % self
.res
, re
.VERBOSE
)
2566 attributes
= self
.parse_params(c
['parameters'])
2567 open_tag
= self
.build_open_tag(htmltag
, attributes
)
2568 close_tag
= '</%s>' % htmltag
2570 # Replace < and > inside <code></code>.
2571 if htmltag
== 'code':
2572 c
['text'] = c
['text'].replace('<', '<')
2573 c
['text'] = c
['text'].replace('>', '>')
2575 return open_tag
+ c
['text'] + close_tag
2577 text
= p
.sub(_replace
, text
)
2582 def images(self
, text
):
2585 This function process images tags, with or without links. Images
2586 can have vertical and/or horizontal alignment, and can be resized
2587 unefficiently using width and height tags.
2592 An image is generated by enclosing the image source in @!@:
2594 pre. !/path/to/image!
2596 You may optionally specify an alternative text for the image, which
2597 will also be used as its title:
2599 pre. !image.jpg (Nice picture)!
2603 pre. <p><img src="image.jpg" alt="Nice picture" title="Nice picture" /></p>
2605 If you want to make the image point to a link, simply append a
2606 comma and the URL(Universal Republic of Love) to the image:
2608 pre. !image.jpg!:http://diveintopython.org
2610 Images can also be resized. These are all equivalent:
2612 pre. !image.jpg 10x20!
2616 The image @image.jpg@ will be resized to width 10 and height 20.
2618 Modifiers to the @<img>@ tag go after the opening @!@:
2620 pre. !(class#id)^image.jpg!
2622 Allowed modifiers include:
2624 dl. <:Align the image to the left (causes the image to float if CSS options are enabled).
2625 >:Align the image to the right (causes the image to float if CSS options are enabled).
2626 - (dash):Aligns the image to the middle.
2627 ^:Aligns the image to the top.
2628 ~ (tilde):Aligns the image to the bottom.
2629 {style rule}:Applies a CSS style rule to the image.
2630 (class) or (#id) or (class#id):Applies a CSS class and/or id to the image.
2631 ( (one or more):Pads 1em on the left for each '(' character.
2632 ) (one or more):Pads 1em on the right for each ')' character.
2634 Images receive the class "top" when using top alignment, "bottom"
2635 for bottom alignment and "middle" for middle alignment.
2637 # Compile the beast.
2638 p
= re
.compile(r
'''\! # Opening !
2639 %(iattr)s # Image attributes
2640 (?P<src>%(url)s) # Image src
2641 \s? # Optional whitesapce
2644 (?P<alt>.*?) # Optional (alt) attribute
2647 \s? # Optional whitespace
2648 %(resize)s # Resize parameters
2656 ''' % self
.res
, re
.VERBOSE
)
2658 for m
in p
.finditer(text
):
2661 # Build the parameters for the <img /> tag.
2662 attributes
= self
.parse_params(c
['parameters'], align_type
='image')
2663 attributes
.update(c
)
2664 if attributes
['alt']:
2665 attributes
['title'] = attributes
['alt']
2667 # Append height and width.
2668 attributes
['width'] = m
.groups()[5] or m
.groups()[7] or m
.groups()[10]
2669 attributes
['height'] = m
.groups()[6] or m
.groups()[8] or m
.groups()[9]
2671 # Create the image tag.
2672 tag
= self
.image(attributes
)
2674 text
= text
.replace(m
.group(), tag
)
2679 def image(self
, attributes
):
2680 """Process each image.
2682 This method builds the <img> tag for each image in the text. It's
2683 separated from the 'images' method so it can be easily overriden when
2684 subclassing Textiler. Useful if you want to download and/or process
2685 the images, for example.
2687 link
= attributes
['link']
2688 del attributes
['link']
2689 del attributes
['parameters']
2692 tag
= self
.build_open_tag('img', attributes
, single
=1)
2695 href
= preg_replace('&(?!(#|amp))', '&', link
)
2696 tag
= '<a href="%s">%s</a>' % (href
, tag
)
2701 def links(self
, text
):
2704 This function is responsible for processing links. It has
2705 some nice shortcuts to Google, Amazon and IMDB queries.
2710 A links is done the following way:
2712 pre. "This is the text link":http://example.com
2714 The result from this markup is:
2716 pre. <p><a href="http://example.com">This is the text link</a></p>
2718 You can add an optional @title@ attribute:
2720 pre. "This is the text link(This is the title)":http://example.com
2722 The link can be customised as well:
2724 pre. "(nospam)E-mail me please":mailto:someone@example.com
2726 You can use either single or double quotes. They must be enclosed in
2727 whitespace, punctuation or brackets:
2729 pre. You["gotta":http://example.com]seethis!
2731 If you are going to reference the same link a couple of times, you
2732 can define a lookup list anywhere on your document:
2734 pre. [python]http://www.python.org
2736 Links to the Python website can then be defined the following way:
2738 pre. "Check this":python
2740 There are also shortcuts for Amazon, IMDB(Internet Movie DataBase) and
2743 pre. "Has anyone seen this guy?":imdb:Stephen+Fry
2744 "Really nice book":amazon:Goedel+Escher+Bach
2746 ["Using Textile and Blosxom with Python":google:python blosxom textile]
2750 pre. <a href="http://www.imdb.com/Find?for=Stephen+Fry">Has anyone seen this guy?</a>
2751 <a href="http://www.amazon.com/exec/obidos/external-search?index=blended&keyword=Goedel+Escher+Bach">Really nice book</a>
2752 <a href="http://www.google.com/search?q=PyBlosxom">PyBlosxom</a>
2753 <a href="http://www.google.com/search?q=python+blosxom+textile">Using Textile and Blosxom with Python</a>
2755 linkres
= [r
'''\[ # [
2756 (?P<quote>"|') # Opening quotes
2757 %(lattr)s # Link attributes
2758 (?P<text>[^"]+?) # Link text
2759 \s? # Optional whitespace
2760 (?:\((?P<title>[^\)]+?)\))? # Optional (title)
2761 (?P=quote) # Closing quotes
2763 (?P<href>[^\]]+) # HREF
2766 r
'''(?P<quote>"|') # Opening quotes
2767 %(lattr)s # Link attributes
2768 (?P<text>[^"]+?) # Link text
2769 \s? # Optional whitespace
2770 (?:\((?P<title>[^\)]+?)\))? # Optional (title)
2771 (?P=quote) # Closing quotes
2773 (?P<href>%(url)s) # HREF
2776 for linkre
in linkres
:
2777 p
= re
.compile(linkre
, re
.VERBOSE
)
2778 for m
in p
.finditer(text
):
2781 attributes
= self
.parse_params(c
['parameters'])
2782 attributes
['title'] = c
['title'].replace('"', '"')
2784 # Search lookup list.
2785 link
= self
._links
.get(c
['href'], None) or c
['href']
2787 # Hyperlinks for Amazon, IMDB and Google searches.
2788 parts
= link
.split(':', 1)
2795 query
= query
.replace(' ', '+')
2797 # Look for smart search.
2798 if self
.searches
.has_key(proto
):
2799 link
= self
.searches
[proto
] % query
2802 attributes
['href'] = preg_replace('&(?!(#|amp))', '&', link
)
2804 open_tag
= self
.build_open_tag('a', attributes
)
2807 repl
= open_tag
+ c
['text'] + close_tag
2809 text
= text
.replace(m
.group(), repl
)
2814 def format(self
, text
):
2817 This function basically defines the order on which the
2818 formatting is applied.
2820 text
= self
.qtags(text
)
2821 text
= self
.images(text
)
2822 text
= self
.links(text
)
2823 text
= self
.acronym(text
)
2824 text
= self
.glyphs(text
)
2829 def inline(self
, text
):
2830 """Inline formatting.
2832 This function calls the formatting on the inline text,
2833 taking care to avoid the escaped parts.
2838 Inline formatting is applied within a block of text.
2840 if not re
.search(r
'''==(.*?)==''', text
):
2841 text
= self
.format(text
)
2845 # Else split the text into an array at <>.
2846 for line
in re
.split('(==.*?==)', text
):
2847 if not re
.match('==.*?==', line
):
2848 line
= self
.format(line
)
2854 text
= ''.join(lines
)
2859 def textile(text
, **args
):
2862 Generates XHTML from a simple markup developed by Dean Allen.
2864 This function should be called like this:
2866 textile(text, head_offset=0, validate=0, sanitize=0,
2867 encoding='latin-1', output='ASCII')
2869 return Textiler(text
).process(**args
)
2872 if __name__
== '__main__':
2873 print textile('tell me about textile.', head_offset
=1)