Prepare code directive for inclusion in the core.
[docutils/kirr.git] / sandbox / code-block-directive / pygments_code_block_directive.py
blob23b8f6a3dabeedeedb2c87b0cb93561f875d43bc
1 #!/usr/bin/python
2 # coding: utf-8
4 # :Author: Georg Brandl; Felix Wiemann; Günter Milde
5 # :Date: $Date$
6 # :Copyright: This module has been placed in the public domain.
8 # This is a merge of `Using Pygments in ReST documents`_ from the pygments_
9 # documentation, and a `proof of concept`_ by Felix Wiemann.
11 # .. class:: borderless
13 # ========== =============================================================
14 # 2007-06-01 Removed redundancy from class values.
15 # 2007-06-04 Merge of successive tokens of same type
16 # (code taken from pygments.formatters.others).
17 # 2007-06-05 Separate docutils formatter script
18 # Use pygments' CSS class names (like the html formatter)
19 # allowing the use of pygments-produced style sheets.
20 # 2007-06-07 Merge in the formatting of the parsed tokens
21 # (misnamed as docutils_formatter) as class DocutilsInterface
22 # 2007-06-08 Failsave implementation (fallback to a standard literal block
23 # if pygments not found)
24 # 2010-11-27 Rename directive from "code-block" to "code".
25 # Fix fallback if pygments not found.
26 # Use class-based interface.
27 # Add "number-lines" option.
28 # ========== =============================================================
30 # ::
32 """Define and register a code directive using pygments"""
34 # Requirements
35 # ------------
36 # ::
38 from docutils import nodes
39 from docutils.parsers.rst import directives, Directive
40 from docutils.parsers.rst.roles import set_classes
41 try:
42 import pygments
43 from pygments.lexers import get_lexer_by_name
44 from pygments.formatters.html import _get_ttype_class
45 with_pygments = True
46 except ImportError:
47 with_pygments = False
49 # Customisation
50 # -------------
52 # Do not insert inline nodes for the following tokens.
53 # (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
55 unstyled_tokens = [''] # Token.Text
57 # Lexer
58 # ---------
60 # This interface class combines code from
61 # pygments.formatters.html and pygments.formatters.others.
63 class Lexer(object):
64 """Parse `code` lines and yield "classified" tokens.
66 Arguments
68 code -- list of source code lines to parse
69 language -- formal language the code is written in.
71 Merge subsequent tokens of the same token-type.
73 Iterating over an instance yields the tokens as ``(ttype_class, value)``
74 tuples, where `ttype_class` is taken from pygments.token.STANDARD_TYPES
75 and corresponds to the class argument used in pygments html output.
76 """
78 def __init__(self, code, language):
79 """
80 Set up a lexical analyzer for `code` in `language`.
81 """
82 self.code = code
83 self.language = language
84 self.lexer = None
85 # get lexical analyzer for `language`:
86 if language in ('', 'text'):
87 return
88 if not with_pygments:
89 raise ApplicationError('Cannot highlight code. '
90 'Pygments package not found.')
91 try:
92 self.lexer = get_lexer_by_name(self.language)
93 except pygments.util.ClassNotFound:
94 raise ApplicationError('Cannot highlight code. '
95 'No Pygments lexer found for "%s".' % language)
97 # Since version 1.2. (released Jan 01, 2010) Pygments has a
98 # TokenMergeFilter. ``self.merge(tokens)`` in __iter__ can be
99 # replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
101 def merge(self, tokens):
102 """Merge subsequent tokens of same token-type.
104 Also strip the final '\n' (added by pygments).
106 tokens = iter(tokens)
107 (lasttype, lastval) = tokens.next()
108 for ttype, value in tokens:
109 if ttype is lasttype:
110 lastval += value
111 else:
112 yield(lasttype, lastval)
113 (lasttype, lastval) = (ttype, value)
114 if lastval != '\n':
115 yield(lasttype, lastval)
117 def __iter__(self):
118 """Parse self.code and yield "classified" tokens
120 codestring = u'\n'.join(self.code)
121 if self.lexer is None:
122 yield [('', codestring)]
123 return
124 tokens = pygments.lex(codestring, self.lexer)
125 for ttype, value in self.merge(tokens):
126 # yield (ttype, value) # token type objects
127 yield (_get_ttype_class(ttype), value) # short name strings
130 class NumberLines(object):
131 """Insert linenumber-tokens in front of every newline.
133 Arguments
135 tokens -- iterable of ``(ttype_class, value)`` tuples
136 startline -- first line number
137 endline -- last line number
139 Iterating over an instance yields the tokens preceded by
140 a ``('ln', '<line number>')`` token for every line.
141 Multi-line tokens from pygments are splitted. """
143 def __init__(self, tokens, startline, endline):
144 self.tokens = tokens
145 self.startline = startline
146 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
147 self.fmt_str = '%%%dd ' % len(str(endline))
149 def __iter__(self):
150 lineno = self.startline
151 yield ('ln', self.fmt_str % lineno)
152 for ttype, value in self.tokens:
153 lines = value.split('\n')
154 for line in lines[:-1]:
155 yield (ttype, line + '\n')
156 lineno += 1
157 yield ('ln', self.fmt_str % lineno)
158 yield (ttype, lines[-1])
161 # CodeBlock directive
162 # --------------------
163 # ::
165 class CodeBlock(Directive):
166 """Parse and mark up content of a code block.
168 optional_arguments = 1
169 option_spec = {'class': directives.class_option,
170 'name': directives.unchanged,
171 'number-lines': directives.unchanged # integer or None
173 has_content = True
175 def run(self):
176 self.assert_has_content()
177 if self.arguments:
178 language = self.arguments[0]
179 else:
180 language = ''
181 set_classes(self.options)
182 classes = ['code', language]
183 if 'classes' in self.options:
184 classes.extend(self.options['classes'])
186 # TODO: config setting to skip lexical analysis:
187 ## if document.settings.no_highlight:
188 ## language = ''
190 # set up lexical analyzer
191 tokens = Lexer(self.content, language)
193 if 'number-lines' in self.options:
194 # optional argument `startline`, defaults to 1
195 try:
196 startline = int(self.options['number-lines'] or 1)
197 except ValueError:
198 raise self.error(':number-lines: with non-integer start value')
199 endline = startline + len(self.content)
200 # add linenumber filter:
201 tokens = NumberLines(tokens, startline, endline)
203 node = nodes.literal_block('\n'.join(self.content), classes=classes)
204 self.add_name(node)
206 # analyze content and add nodes for every token
207 for cls, value in tokens:
208 # print (cls, value)
209 if cls in unstyled_tokens:
210 # insert as Text to decrease the verbosity of the output.
211 node += nodes.Text(value, value)
212 else:
213 node += nodes.inline(value, value, classes=[cls])
215 return [node]
218 # Register Directive
219 # ------------------
220 # ::
222 directives.register_directive('code', CodeBlock)
224 # .. _doctutils: http://docutils.sf.net/
225 # .. _pygments: http://pygments.org/
226 # .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
227 # .. _proof of concept:
228 # http://article.gmane.org/gmane.text.docutils.user/3689
230 # Test output
231 # -----------
233 # If called from the command line, call the docutils publisher to render the
234 # input::
236 if __name__ == '__main__':
237 from docutils.core import publish_cmdline, default_description
238 description = 'code-block directive test output' + default_description
239 try:
240 import locale
241 locale.setlocale(locale.LC_ALL, '')
242 except:
243 pass
244 # Uncomment the desired output format:
245 # publish_cmdline(writer_name='pseudoxml', description=description)
246 # publish_cmdline(writer_name='xml', description=description)
247 # publish_cmdline(writer_name='html', description=description)
248 publish_cmdline(writer_name='latex', description=description)