4 # :Author: Georg Brandl; Felix Wiemann; Günter Milde
6 # :Copyright: This module has been placed in the public domain.
8 # This is a merge of `Using Pygments in ReST documents`_ from the pygments_
9 # documentation, and a `proof of concept`_ by Felix Wiemann.
11 # .. class:: borderless
13 # ========== =============================================================
14 # 2007-06-01 Removed redundancy from class values.
15 # 2007-06-04 Merge of successive tokens of same type
16 # (code taken from pygments.formatters.others).
17 # 2007-06-05 Separate docutils formatter script
18 # Use pygments' CSS class names (like the html formatter)
19 # allowing the use of pygments-produced style sheets.
20 # 2007-06-07 Merge in the formatting of the parsed tokens
21 # (misnamed as docutils_formatter) as class DocutilsInterface
22 # 2007-06-08 Failsave implementation (fallback to a standard literal block
23 # if pygments not found)
24 # 2010-11-27 Rename directive from "code-block" to "code".
25 # Fix fallback if pygments not found.
26 # Use class-based interface.
27 # Add "number-lines" option.
28 # ========== =============================================================
32 """Define and register a code directive using pygments"""
38 from docutils
import nodes
39 from docutils
.parsers
.rst
import directives
, Directive
40 from docutils
.parsers
.rst
.roles
import set_classes
43 from pygments
.lexers
import get_lexer_by_name
44 from pygments
.formatters
.html
import _get_ttype_class
52 # Do not insert inline nodes for the following tokens.
53 # (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
55 unstyled_tokens
= [''] # Token.Text
60 # This interface class combines code from
61 # pygments.formatters.html and pygments.formatters.others.
64 """Parse `code` lines and yield "classified" tokens.
68 code -- list of source code lines to parse
69 language -- formal language the code is written in.
71 Merge subsequent tokens of the same token-type.
73 Iterating over an instance yields the tokens as ``(ttype_class, value)``
74 tuples, where `ttype_class` is taken from pygments.token.STANDARD_TYPES
75 and corresponds to the class argument used in pygments html output.
78 def __init__(self
, code
, language
):
80 Set up a lexical analyzer for `code` in `language`.
83 self
.language
= language
85 # get lexical analyzer for `language`:
86 if language
in ('', 'text'):
89 raise ApplicationError('Cannot highlight code. '
90 'Pygments package not found.')
92 self
.lexer
= get_lexer_by_name(self
.language
)
93 except pygments
.util
.ClassNotFound
:
94 raise ApplicationError('Cannot highlight code. '
95 'No Pygments lexer found for "%s".' % language
)
97 # Since version 1.2. (released Jan 01, 2010) Pygments has a
98 # TokenMergeFilter. ``self.merge(tokens)`` in __iter__ can be
99 # replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
101 def merge(self
, tokens
):
102 """Merge subsequent tokens of same token-type.
104 Also strip the final '\n' (added by pygments).
106 tokens
= iter(tokens
)
107 (lasttype
, lastval
) = tokens
.next()
108 for ttype
, value
in tokens
:
109 if ttype
is lasttype
:
112 yield(lasttype
, lastval
)
113 (lasttype
, lastval
) = (ttype
, value
)
115 yield(lasttype
, lastval
)
118 """Parse self.code and yield "classified" tokens
120 codestring
= u
'\n'.join(self
.code
)
121 if self
.lexer
is None:
122 yield [('', codestring
)]
124 tokens
= pygments
.lex(codestring
, self
.lexer
)
125 for ttype
, value
in self
.merge(tokens
):
126 # yield (ttype, value) # token type objects
127 yield (_get_ttype_class(ttype
), value
) # short name strings
130 class NumberLines(object):
131 """Insert linenumber-tokens in front of every newline.
135 tokens -- iterable of ``(ttype_class, value)`` tuples
136 startline -- first line number
137 endline -- last line number
139 Iterating over an instance yields the tokens preceded by
140 a ``('ln', '<line number>')`` token for every line.
141 Multi-line tokens from pygments are splitted. """
143 def __init__(self
, tokens
, startline
, endline
):
145 self
.startline
= startline
146 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
147 self
.fmt_str
= '%%%dd ' % len(str(endline
))
150 lineno
= self
.startline
151 yield ('ln', self
.fmt_str
% lineno
)
152 for ttype
, value
in self
.tokens
:
153 lines
= value
.split('\n')
154 for line
in lines
[:-1]:
155 yield (ttype
, line
+ '\n')
157 yield ('ln', self
.fmt_str
% lineno
)
158 yield (ttype
, lines
[-1])
161 # CodeBlock directive
162 # --------------------
165 class CodeBlock(Directive
):
166 """Parse and mark up content of a code block.
168 optional_arguments
= 1
169 option_spec
= {'class': directives
.class_option
,
170 'name': directives
.unchanged
,
171 'number-lines': directives
.unchanged
# integer or None
176 self
.assert_has_content()
178 language
= self
.arguments
[0]
181 set_classes(self
.options
)
182 classes
= ['code', language
]
183 if 'classes' in self
.options
:
184 classes
.extend(self
.options
['classes'])
186 # TODO: config setting to skip lexical analysis:
187 ## if document.settings.no_highlight:
190 # set up lexical analyzer
191 tokens
= Lexer(self
.content
, language
)
193 if 'number-lines' in self
.options
:
194 # optional argument `startline`, defaults to 1
196 startline
= int(self
.options
['number-lines'] or 1)
198 raise self
.error(':number-lines: with non-integer start value')
199 endline
= startline
+ len(self
.content
)
200 # add linenumber filter:
201 tokens
= NumberLines(tokens
, startline
, endline
)
203 node
= nodes
.literal_block('\n'.join(self
.content
), classes
=classes
)
206 # analyze content and add nodes for every token
207 for cls
, value
in tokens
:
209 if cls
in unstyled_tokens
:
210 # insert as Text to decrease the verbosity of the output.
211 node
+= nodes
.Text(value
, value
)
213 node
+= nodes
.inline(value
, value
, classes
=[cls
])
222 directives
.register_directive('code', CodeBlock
)
224 # .. _doctutils: http://docutils.sf.net/
225 # .. _pygments: http://pygments.org/
226 # .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
227 # .. _proof of concept:
228 # http://article.gmane.org/gmane.text.docutils.user/3689
233 # If called from the command line, call the docutils publisher to render the
236 if __name__
== '__main__':
237 from docutils
.core
import publish_cmdline
, default_description
238 description
= 'code-block directive test output' + default_description
241 locale
.setlocale(locale
.LC_ALL
, '')
244 # Uncomment the desired output format:
245 # publish_cmdline(writer_name='pseudoxml', description=description)
246 # publish_cmdline(writer_name='xml', description=description)
247 # publish_cmdline(writer_name='html', description=description)
248 publish_cmdline(writer_name
='latex', description
=description
)