HACK around tests that depend on directory contents.
[docutils.git] / tools / dev / create_unimap.py
blob9fb75bcc7138327e0aaeacd49d2587d7c91bc666
1 #!/usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This file has been placed in the public domain.
7 # Call: create_unimap.py < unicode.xml > unicode_latex.py
9 # Get unicode.xml from
10 # <http://www.w3.org/2003/entities/xml/unicode.xml>.
12 from xml.dom import minidom
13 import sys
14 import pprint
16 if sys.version_info >= (3,0):
17 unicode = str
18 else:
19 bytes = str
20 chr = unichr
23 def w(s):
24 if sys.version_info >= (3,0) and isinstance(s, unicode):
25 s = s.encode('utf8')
26 sys.stdout.write(s)
28 text_map = {}
29 math_map = {}
31 class Visitor:
33 """Node visitor for contents of unicode.xml."""
35 def visit_character(self, node):
36 for n in node.childNodes:
37 if n.nodeName == 'latex':
38 code = node.attributes['dec'].value
39 if '-' in code:
40 # I don't know what this means, but we probably
41 # don't need it....
42 continue
43 if int(code) < 128:
44 # Wrong (maps "-" to "$-$", which is too wide) and
45 # unnecessary (maps "a" to "{a}").
46 continue
47 latex_code = n.childNodes[0].nodeValue.encode('ascii').strip()
48 if node.attributes['mode'].value == 'math':
49 math_map[chr(int(code))] = '$%s$' % latex_code
50 else:
51 text_map[chr(int(code))] = '{%s}' % latex_code
53 def call_visitor(node, visitor=Visitor()):
54 if isinstance(node, minidom.Text):
55 name = 'Text'
56 else:
57 name = node.nodeName.replace('#', '_')
58 if hasattr(visitor, 'visit_' + name):
59 getattr(visitor, 'visit_' + name)(node)
60 for child in node.childNodes:
61 call_visitor(child)
62 if hasattr(visitor, 'depart_' + name):
63 getattr(visitor, 'depart_' + name)(node)
65 document = minidom.parse(sys.stdin)
66 call_visitor(document)
68 unicode_map = math_map
69 unicode_map.update(text_map)
70 # Now unicode_map contains the text entries plus dollar-enclosed math
71 # entries for those chars for which no text entry exists.
73 print('# $%s$' % 'Id')
74 print('# Author: Lea Wiemann <LeWiemann@gmail.com>')
75 print('# Copyright: This file has been placed in the public domain.')
76 print('')
77 print('# This is a mapping of Unicode characters to LaTeX equivalents.')
78 print('# The information has been extracted from')
79 print('# <http://www.w3.org/2003/entities/xml/unicode.xml>, written by')
80 print('# David Carlisle and Sebastian Rahtz.')
81 print('#')
82 print('# The extraction has been done by the "create_unimap.py" script')
83 print('# located at <http://docutils.sf.net/tools/dev/create_unimap.py>.')
84 print('')
85 print('unicode_map = %s' % pprint.pformat(unicode_map, indent=0))