add new polyglossia languages
[docutils.git] / docutils / writers / xetex / __init__.py
blob59639e0f6e4e78919f0919c6729811d16099bad4
1 # .. coding: utf8
2 # :Author: Günter Milde <milde@users.sourceforge.net>
3 # :Revision: $Revision$
4 # :Date: $Date: 2010-10-29$
5 # :Copyright: © 2010 Günter Milde.
6 # Released without warranties or conditions of any kind
7 # under the terms of the Apache License, Version 2.0
8 # http://www.apache.org/licenses/LICENSE-2.0
10 """
11 XeLaTeX document tree Writer.
13 A variant of Docutils' standard 'latex2e' writer producing output
14 suited for processing with XeLaTeX (http://tug.org/xetex/).
15 """
17 __docformat__ = 'reStructuredText'
19 import os
20 import os.path
21 import re
23 import docutils
24 from docutils import frontend, nodes, utils, writers, languages
25 from docutils.writers import latex2e
27 class Writer(latex2e.Writer):
28 """A writer for Unicode-based LaTeX variants (XeTeX, LuaTeX)"""
30 supported = ('xetex','xelatex','luatex')
31 """Formats this writer supports."""
33 default_template = 'xelatex.tex'
34 default_preamble = '\n'.join([
35 r'% Linux Libertine (free, wide coverage, not only for Linux)',
36 r'\setmainfont{Linux Libertine O}',
37 r'\setsansfont{Linux Biolinum O}',
38 r'\setmonofont[HyphenChar=None]{DejaVu Sans Mono}',
41 config_section = 'xetex writer'
42 config_section_dependencies = ('writers', 'latex2e writer')
44 settings_spec = frontend.filter_settings_spec(
45 latex2e.Writer.settings_spec,
46 'font_encoding',
47 template=('Template file. Default: "%s".' % default_template,
48 ['--template'], {'default': default_template, 'metavar': '<file>'}),
49 latex_preamble=('Customization by LaTeX code in the preamble. '
50 'Default: select PDF standard fonts (Times, Helvetica, Courier).',
51 ['--latex-preamble'],
52 {'default': default_preamble}),
55 def __init__(self):
56 latex2e.Writer.__init__(self)
57 self.settings_defaults.update({'fontencoding': ''}) # use default (EU1)
58 self.translator_class = XeLaTeXTranslator
61 class Babel(latex2e.Babel):
62 """Language specifics for XeTeX.
64 Use `polyglossia` instead of `babel` and adapt settings.
65 """
66 language_codes = latex2e.Babel.language_codes.copy()
67 # Additionally supported or differently named languages:
68 language_codes.update({
69 # code Polyglossia-name comment
70 'cop': 'coptic',
71 'de': 'german', # new spelling (de_1996)
72 'de_1901': 'ogerman', # old spelling
73 'dv': 'divehi', # Maldivian
74 'dsb': 'lsorbian',
75 'el_polyton': 'polygreek',
76 'fa': 'farsi',
77 'grc': 'ancientgreek',
78 'hsb': 'usorbian',
79 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script
80 'sh-latn': 'croatian', # Serbo-Croatian, Latin script
81 'sq': 'albanian',
82 'sr': 'serbian', # Cyrillic script (sr-cyrl)
83 'th': 'thai',
84 'vi': 'vietnamese',
85 # zh-latn: ??? # Chinese Pinyin
87 # Languages without Polyglossia support:
88 for key in ('af', # 'afrikaans',
89 'de_at', # 'naustrian',
90 'de_at_1901', # 'austrian',
91 'fr_ca', # 'canadien',
92 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding)
93 'sr-latn', # 'serbian script=latin'
95 del(language_codes[key])
97 def __init__(self, language_code, reporter):
98 self.language_code = language_code
99 self.reporter = reporter
100 self.language = self.get_language(language_code)
101 self.otherlanguages = {}
102 self.warn_msg = 'Language "%s" not supported by XeTeX (polyglossia).'
103 self.quote_index = 0
104 self.quotes = ('"', '"')
105 # language dependent configuration:
106 # double quotes are "active" in some languages (e.g. German).
107 self.literal_double_quote = u'"' # TODO: use \textquotedbl
109 def __call__(self):
110 setup = [r'\usepackage{polyglossia}',
111 r'\setdefaultlanguage{%s}' % self.language]
112 if self.otherlanguages:
113 setup.append(r'\setotherlanguages{%s}' %
114 ','.join(self.otherlanguages.keys()))
115 return '\n'.join(setup)
118 class XeLaTeXTranslator(latex2e.LaTeXTranslator):
120 def __init__(self, document):
121 latex2e.LaTeXTranslator.__init__(self, document, Babel)
122 if self.latex_encoding == 'utf8':
123 self.requirements.pop('_inputenc', None)
124 else:
125 self.requirements['_inputenc'] = (r'\XeTeXinputencoding %s '
126 % self.latex_encoding)
128 # XeTeX does not know the length unit px
129 # Use \pdfpxdimen, the macro to set the value of 1 px in pdftex
130 # this way, configuring works the same for pdftex and xetex.
131 def to_latex_length(self, length_str, px=r'\pdfpxdimen'):
132 """Convert string with rst lenght to LaTeX length"""
133 return latex2e.LaTeXTranslator.to_latex_length(self, length_str, px)
135 # Simpler variant of encode, as XeTeX understands utf8 Unicode:
136 def encode(self, text):
137 """Return text with 'problematic' characters escaped.
139 Escape the ten special printing characters ``# $ % & ~ _ ^ \ { }``,
140 square brackets ``[ ]``, double quotes and (in OT1) ``< | >``.
142 if self.verbatim:
143 return text
144 # LaTeX encoding maps:
145 special_chars = {
146 ord('#'): ur'\#',
147 ord('$'): ur'\$',
148 ord('%'): ur'\%',
149 ord('&'): ur'\&',
150 ord('~'): ur'\textasciitilde{}',
151 ord('_'): ur'\_',
152 ord('^'): ur'\textasciicircum{}',
153 ord('\\'): ur'\textbackslash{}',
154 ord('{'): ur'\{',
155 ord('}'): ur'\}',
156 # Square brackets are ordinary chars and cannot be escaped with '\',
157 # so we put them in a group '{[}'. (Alternative: ensure that all
158 # macros with optional arguments are terminated with {} and text
159 # inside any optional argument is put in a group ``[{text}]``).
160 # Commands with optional args inside an optional arg must be put
161 # in a group, e.g. ``\item[{\hyperref[label]{text}}]``.
162 ord('['): ur'{[}',
163 ord(']'): ur'{]}'
165 # Unicode chars that are not properly handled by XeTeX
166 unsupported_unicode_chars = {
167 0x00AD: ur'\-', # SOFT HYPHEN
169 # set up the translation table:
170 table = special_chars
171 # keep the underscore in citation references
172 if self.inside_citation_reference_label:
173 del(table[ord('_')])
174 if self.insert_non_breaking_blanks:
175 table[ord(' ')] = ur'~'
176 if self.literal:
177 # double quotes are 'active' in some languages
178 table[ord('"')] = self.babel.literal_double_quote
179 else:
180 text = self.babel.quote_quotes(text)
181 # Unicode chars:
182 table.update(unsupported_unicode_chars)
184 text = text.translate(table)
186 # Literal line breaks (in address or literal blocks):
187 if self.insert_newline:
188 # for blank lines, insert a protected space, to avoid
189 # ! LaTeX Error: There's no line here to end.
190 textlines = [line + '~'*(not line.lstrip())
191 for line in text.split('\n')]
192 text = '\\\\\n'.join(textlines)
193 if self.literal and not self.insert_non_breaking_blanks:
194 # preserve runs of spaces but allow wrapping
195 text = text.replace(' ', ' ~')
196 return text