sandbox/code-block-directive/pygments_code_block_directive.py

   1 #!/usr/bin/python
   2 # coding: utf-8
   3
   4 # :Author: Georg Brandl; Felix Wiemann; Günter Milde
   5 # :Date: $Date$
   6 # :Copyright: This module has been placed in the public domain.
   7 #
   8 # This is a merge of `Using Pygments in ReST documents`_ from the pygments_
   9 # documentation, and a `proof of concept`_ by Felix Wiemann.
  10 #
  11 # .. class:: borderless
  12 #
  13 # ========== =============================================================
  14 # 2007-06-01 Removed redundancy from class values.
  15 # 2007-06-04 Merge of successive tokens of same type
  16 #            (code taken from pygments.formatters.others).
  17 # 2007-06-05 Separate docutils formatter script
  18 #            Use pygments' CSS class names (like the html formatter)
  19 #            allowing the use of pygments-produced style sheets.
  20 # 2007-06-07 Merge in the formatting of the parsed tokens
  21 #            (misnamed as docutils_formatter) as class DocutilsInterface
  22 # 2007-06-08 Failsave implementation (fallback to a standard literal block
  23 #            if pygments not found)
  24 # 2010-11-27 Rename directive from "code-block" to "code".
  25 #            Fix fallback if pygments not found.
  26 #            Use class-based interface.
  27 #            Add "number-lines" option.
  28 # ========== =============================================================
  29 #
  30 # ::
  31
  32 """Define and register a code directive using pygments"""
  33
  34 # Requirements
  35 # ------------
  36 # ::
  37
  38 from docutils import nodes
  39 from docutils.parsers.rst import directives, Directive
  40 from docutils.parsers.rst.roles import set_classes
  41 try:
  42     import pygments
  43     from pygments.lexers import get_lexer_by_name
  44     from pygments.formatters.html import _get_ttype_class
  45     with_pygments = True
  46 except ImportError:
  47     with_pygments = False
  48
  49 # Customisation
  50 # -------------
  51 #
  52 # Do not insert inline nodes for the following tokens.
  53 # (You could add e.g. Token.Punctuation like ``['', 'p']``.) ::
  54
  55 unstyled_tokens = [''] # Token.Text
  56
  57 # Lexer
  58 # ---------
  59 #
  60 # This interface class combines code from
  61 # pygments.formatters.html and pygments.formatters.others.
  62
  63 class Lexer(object):
  64     """Parse `code` lines and yield "classified" tokens.
  65
  66     Arguments
  67
  68       code     -- list of source code lines to parse
  69       language -- formal language the code is written in.
  70
  71     Merge subsequent tokens of the same token-type.
  72
  73     Iterating over an instance yields the tokens as ``(ttype_class, value)``
  74     tuples, where `ttype_class` is taken from pygments.token.STANDARD_TYPES
  75     and corresponds to the class argument used in pygments html output.
  76     """
  77
  78     def __init__(self, code, language):
  79         """
  80         Set up a lexical analyzer for `code` in `language`.
  81         """
  82         self.code = code
  83         self.language = language
  84         self.lexer = None
  85         # get lexical analyzer for `language`:
  86         if language in ('', 'text'):
  87             return
  88         if not with_pygments:
  89             raise ApplicationError('Cannot highlight code. '
  90                                     'Pygments package not found.')
  91         try:
  92             self.lexer = get_lexer_by_name(self.language)
  93         except pygments.util.ClassNotFound:
  94             raise ApplicationError('Cannot highlight code. '
  95                 'No Pygments lexer found for "%s".' % language)
  96
  97     # Since version 1.2. (released Jan 01, 2010) Pygments has a
  98     # TokenMergeFilter. ``self.merge(tokens)`` in __iter__ can be
  99     # replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__.
 100
 101     def merge(self, tokens):
 102         """Merge subsequent tokens of same token-type.
 103
 104         Also strip the final '\n' (added by pygments).
 105         """
 106         tokens = iter(tokens)
 107         (lasttype, lastval) = tokens.next()
 108         for ttype, value in tokens:
 109             if ttype is lasttype:
 110                 lastval += value
 111             else:
 112                 yield(lasttype, lastval)
 113                 (lasttype, lastval) = (ttype, value)
 114         if lastval != '\n':
 115             yield(lasttype, lastval)
 116
 117     def __iter__(self):
 118         """Parse self.code and yield "classified" tokens
 119         """
 120         codestring = u'\n'.join(self.code)
 121         if self.lexer is None:
 122             yield [('', codestring)]
 123             return
 124         tokens = pygments.lex(codestring, self.lexer)
 125         for ttype, value in self.merge(tokens):
 126             # yield (ttype, value)  # token type objects
 127             yield (_get_ttype_class(ttype), value) # short name strings
 128
 129
 130 class NumberLines(object):
 131     """Insert linenumber-tokens in front of every newline.
 132
 133     Arguments
 134
 135        tokens    -- iterable of ``(ttype_class, value)`` tuples
 136        startline -- first line number
 137        endline   -- last line number
 138
 139     Iterating over an instance yields the tokens preceded by
 140     a ``('ln', '<line number>')`` token for every line.
 141     Multi-line tokens from pygments are splitted. """
 142
 143     def __init__(self, tokens, startline, endline):
 144         self.tokens = tokens
 145         self.startline = startline
 146         # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d '
 147         self.fmt_str = '%%%dd ' % len(str(endline))
 148
 149     def __iter__(self):
 150         lineno = self.startline
 151         yield ('ln', self.fmt_str % lineno)
 152         for ttype, value in self.tokens:
 153             lines = value.split('\n')
 154             for line in lines[:-1]:
 155                 yield (ttype, line + '\n')
 156                 lineno += 1
 157                 yield ('ln', self.fmt_str % lineno)
 158             yield (ttype, lines[-1])
 159
 160
 161 # CodeBlock directive
 162 # --------------------
 163 # ::
 164
 165 class CodeBlock(Directive):
 166     """Parse and mark up content of a code block.
 167     """
 168     optional_arguments = 1
 169     option_spec = {'class': directives.class_option,
 170                    'name': directives.unchanged,
 171                    'number-lines': directives.unchanged # integer or None
 172                   }
 173     has_content = True
 174
 175     def run(self):
 176         self.assert_has_content()
 177         if self.arguments:
 178             language = self.arguments[0]
 179         else:
 180             language = ''
 181         set_classes(self.options)
 182         classes = ['code', language]
 183         if 'classes' in self.options:
 184             classes.extend(self.options['classes'])
 185
 186         # TODO: config setting to skip lexical analysis:
 187         ## if document.settings.no_highlight:
 188         ##      language = ''
 189
 190         # set up lexical analyzer
 191         tokens = Lexer(self.content, language)
 192
 193         if 'number-lines' in self.options:
 194             # optional argument `startline`, defaults to 1
 195             try:
 196                 startline = int(self.options['number-lines'] or 1)
 197             except ValueError:
 198                 raise self.error(':number-lines: with non-integer start value')
 199             endline = startline + len(self.content)
 200             # add linenumber filter:
 201             tokens = NumberLines(tokens, startline, endline)
 202
 203         node = nodes.literal_block('\n'.join(self.content), classes=classes)
 204         self.add_name(node)
 205
 206         # analyze content and add nodes for every token
 207         for cls, value in tokens:
 208             # print (cls, value)
 209             if cls in unstyled_tokens:
 210                 # insert as Text to decrease the verbosity of the output.
 211                 node += nodes.Text(value, value)
 212             else:
 213                 node += nodes.inline(value, value, classes=[cls])
 214
 215         return [node]
 216
 217
 218 # Register Directive
 219 # ------------------
 220 # ::
 221
 222 directives.register_directive('code', CodeBlock)
 223
 224 # .. _doctutils: http://docutils.sf.net/
 225 # .. _pygments: http://pygments.org/
 226 # .. _Using Pygments in ReST documents: http://pygments.org/docs/rstdirective/
 227 # .. _proof of concept:
 228 #      http://article.gmane.org/gmane.text.docutils.user/3689
 229 #
 230 # Test output
 231 # -----------
 232 #
 233 # If called from the command line, call the docutils publisher to render the
 234 # input::
 235
 236 if __name__ == '__main__':
 237     from docutils.core import publish_cmdline, default_description
 238     description = 'code-block directive test output' + default_description
 239     try:
 240         import locale
 241         locale.setlocale(locale.LC_ALL, '')
 242     except:
 243         pass
 244     # Uncomment the desired output format:
 245     # publish_cmdline(writer_name='pseudoxml', description=description)
 246     # publish_cmdline(writer_name='xml', description=description)
 247     # publish_cmdline(writer_name='html', description=description)
 248     publish_cmdline(writer_name='latex', description=description)