4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This file has been placed in the public domain.
7 # Call: create_unimap.py < unicode.xml > unicode_latex.py
10 # <http://www.w3.org/2003/entities/xml/unicode.xml>.
12 from xml
.dom
import minidom
17 if isinstance(s
, unicode):
26 """Node visitor for contents of unicode.xml."""
28 def visit_character(self
, node
):
29 for n
in node
.childNodes
:
30 if n
.nodeName
== 'latex':
31 code
= node
.attributes
['dec'].value
33 # I don't know what this means, but we probably
37 # Wrong (maps "-" to "$-$", which is too wide) and
38 # unnecessary (maps "a" to "{a}").
40 latex_code
= n
.childNodes
[0].nodeValue
.encode('ascii').strip()
41 if node
.attributes
['mode'].value
== 'math':
42 math_map
[unichr(int(code
))] = '$%s$' % latex_code
44 text_map
[unichr(int(code
))] = '{%s}' % latex_code
46 def call_visitor(node
, visitor
=Visitor()):
47 if isinstance(node
, minidom
.Text
):
50 name
= node
.nodeName
.replace('#', '_')
51 if hasattr(visitor
, 'visit_' + name
):
52 getattr(visitor
, 'visit_' + name
)(node
)
53 for child
in node
.childNodes
:
55 if hasattr(visitor
, 'depart_' + name
):
56 getattr(visitor
, 'depart_' + name
)(node
)
58 document
= minidom
.parse(sys
.stdin
)
59 call_visitor(document
)
61 unicode_map
= math_map
62 unicode_map
.update(text_map
)
63 # Now unicode_map contains the text entries plus dollar-enclosed math
64 # entries for those chars for which no text entry exists.
67 print '# Author: Lea Wiemann <LeWiemann@gmail.com>'
68 print '# Copyright: This file has been placed in the public domain.'
70 print '# This is a mapping of Unicode characters to LaTeX equivalents.'
71 print '# The information has been extracted from'
72 print '# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by'
73 print '# David Carlisle and Sebastian Rahtz.'
75 print '# The extraction has been done by the "create_unimap.py" script'
76 print '# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.'
78 print 'unicode_map = %s' % pprint
.pformat(unicode_map
, indent
=0)