4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This file has been placed in the public domain.
7 # Call: create_unimap.py < unicode.xml > unicode_latex.py
10 # <http://www.w3.org/2003/entities/xml/unicode.xml>.
12 from xml
.dom
import minidom
16 if sys
.version_info
>= (3,0):
24 if sys
.version_info
>= (3,0) and isinstance(s
, unicode):
33 """Node visitor for contents of unicode.xml."""
35 def visit_character(self
, node
):
36 for n
in node
.childNodes
:
37 if n
.nodeName
== 'latex':
38 code
= node
.attributes
['dec'].value
40 # I don't know what this means, but we probably
44 # Wrong (maps "-" to "$-$", which is too wide) and
45 # unnecessary (maps "a" to "{a}").
47 latex_code
= n
.childNodes
[0].nodeValue
.encode('ascii').strip()
48 if node
.attributes
['mode'].value
== 'math':
49 math_map
[chr(int(code
))] = '$%s$' % latex_code
51 text_map
[chr(int(code
))] = '{%s}' % latex_code
53 def call_visitor(node
, visitor
=Visitor()):
54 if isinstance(node
, minidom
.Text
):
57 name
= node
.nodeName
.replace('#', '_')
58 if hasattr(visitor
, 'visit_' + name
):
59 getattr(visitor
, 'visit_' + name
)(node
)
60 for child
in node
.childNodes
:
62 if hasattr(visitor
, 'depart_' + name
):
63 getattr(visitor
, 'depart_' + name
)(node
)
65 document
= minidom
.parse(sys
.stdin
)
66 call_visitor(document
)
68 unicode_map
= math_map
69 unicode_map
.update(text_map
)
70 # Now unicode_map contains the text entries plus dollar-enclosed math
71 # entries for those chars for which no text entry exists.
73 print('# $%s$' % 'Id')
74 print('# Author: Lea Wiemann <LeWiemann@gmail.com>')
75 print('# Copyright: This file has been placed in the public domain.')
77 print('# This is a mapping of Unicode characters to LaTeX equivalents.')
78 print('# The information has been extracted from')
79 print('# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by')
80 print('# David Carlisle and Sebastian Rahtz.')
82 print('# The extraction has been done by the "create_unimap.py" script')
83 print('# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.')
85 print('unicode_map = %s' % pprint
.pformat(unicode_map
, indent
=0))