Release 0.12: set version number to 0.12
[docutils.git] / sandbox / paultremblay / scripts / rstxml2mathml_sax.py
blob7fc473d13bff6b6316497ec9d7167a699ff18c2c
1 #!/usr/bin/python
2 # $Id: sax_complete_copy.py 54 2011-04-17 15:44:41Z cynthia $
4 import os, sys, argparse, io
5 import xml.sax.handler
6 from xml.sax.handler import feature_namespaces
7 from StringIO import StringIO
9 """
10 if sys.version_info < (3,):
11 else:
12 from io import StringIO
13 """
15 import asciimathml
16 from xml.etree.ElementTree import Element, tostring
17 import xml.etree.cElementTree as etree
18 import tempfile, subprocess, os
19 # import docutils.math.latex2mathml
20 from xml.sax import InputSource
22 """
23 if sys.version_info < (3,):
24 sys.stderr.write('Only run with pyton 3\n')
25 sys.stderr.write('Script now quiting\n')
26 sys.exit(1)
27 """
30 class CopyTree(xml.sax.ContentHandler):
34 def __init__(self, mathml):
35 self.__characters = ''
36 self.__mathml = mathml
37 self.__ns_dict = {'http://www.w3.org/XML/1998/namespace': "xml"}
38 self.__raw = False
39 self.__fix_soft_hyphens = False
42 def startDocument(self):
43 pass
45 def startElement(self, name, qname):
46 print('okay')
48 def characters (self, characters):
49 self.__characters += characters
52 def startElementNS(self, name, qname, attrs):
53 self.__write_text()
54 ns = name[0]
55 el_name = name[1]
56 sys.stdout.write('<')
57 if el_name == 'raw':
58 if attrs.get((None, 'format')) == 'xml':
59 self.__raw = True
60 if ns:
61 sys.stdout.write('ns1:%s' % el_name)
62 else:
63 sys.stdout.write(el_name)
64 if ns:
65 sys.stdout.write(' xmlns:ns1="%s"' % ns)
67 the_keys = list(attrs.keys())
68 counter = 1
69 for the_key in the_keys:
70 counter +=1
71 ns_att = the_key[0]
72 att_name = the_key[1]
73 value = attrs[the_key]
74 ns_prefix = self.__ns_dict.get(ns_att)
75 if ns_att and not ns_prefix:
76 sys.stderr.write('No name space for "%s"\n' % (ns_att))
77 sys.exit(1)
78 if ns_att and ns_prefix == 'xml':
79 sys.stdout.write(' xml:%s="%s"' % (att_name, value))
80 elif ns_att:
81 sys.stderr.write('Sorry, but don\'t know what to do with ns "%s"\n' % (ns_prefix))
82 sys.exit(1)
83 # if ns_att and ns_att != ns:
84 # sys.stdout.write(' xmlns:ns%s="%s"' % (counter,ns_att))
85 # if ns_att and ns_att == ns:
86 # sys.stdout.write(' ns1:%s="%s"' % (att_name, value))
87 else:
88 sys.stdout.write(' %s="%s"' % (att_name, value))
89 sys.stdout.write('>')
93 def __write_text(self, raw = False):
94 soft_hyphen = chr(173)
95 if raw:
96 text = self.__characters
97 else:
98 text = xml.sax.saxutils.escape(self.__characters)
99 if self.__fix_soft_hyphens:
100 text = text.replace(soft_hyphen, '-')
101 sys.stdout.write(text)
102 self.__characters = ''
104 def endElementNS(self, name, qname):
105 ns = name[0]
106 el_name = name[1]
107 if (el_name == 'math_block' and self.__mathml == 'ascii') or (el_name == 'math' and self.__mathml == 'ascii'):
108 raw_tree = asciimathml.parse(self.__characters)[0]
109 math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML")
110 math_tree.append(raw_tree)
111 string_tree = tostring(math_tree, encoding="utf-8")
112 sys.stdout.write(string_tree.decode('utf8'))
114 if sys.version_info < (3,):
115 print(type(string_tree))
116 print()
117 sys.stdout.write(string_tree.decode('utf8'))
118 # sys.stdout.write(line.encode('utf8'))
119 else:
120 sys.stdout.write(string_tree.decode())
122 self.__characters = ''
123 elif (el_name == 'math_block' and self.__mathml == 'latex') or (el_name == 'math' and self.__mathml == 'latex'):
124 raw_tree = self.__tralics()
125 if raw_tree == None:
126 self.__write_text()
127 else:
128 raw_tree = raw_tree[0]
129 math_tree = Element('math', title="%s" % self.__characters, xmlns="http://www.w3.org/1998/Math/MathML")
130 math_tree.append(raw_tree)
131 string_tree = tostring(math_tree, encoding="utf-8").decode()
132 sys.stdout.write(string_tree)
133 self.__characters = ''
134 elif el_name == 'raw' and self.__raw:
135 self.__write_text(raw = True)
136 else:
137 self.__write_text()
138 if ns:
139 sys.stderr.write('Should not be namespace "%s" here\n' % (ns))
140 sys.exit(1)
141 sys.stdout.write('</ns1:%s>' % el_name)
142 else:
143 sys.stdout.write('</%s>' % el_name)
145 def __python_latex_math(self):
147 Python code seriously broken
150 try:
151 mathml_tree = docutils.math.latex2mathml.parse_latex_math(self.__characters)
152 except SyntaxError:
153 return self.__characters
154 math_code = ''.join(mathml_tree.xml())
155 return math_code
157 def __tralics(self):
158 num, tex_file = tempfile.mkstemp(suffix='.tex')
159 write_obj = open(tex_file, 'w')
160 write_obj.write('$')
161 write_obj.write(self.__characters)
162 write_obj.write('$')
163 write_obj.close()
164 num, bogus_out = tempfile.mkstemp()
165 bogus_out = open(bogus_out, 'w')
166 p = subprocess.call(['tralics', '-silent', '-utf8output', '-noentnames', tex_file], stdout=bogus_out)
167 bogus_out.close()
168 dir_name = os.path.dirname(tex_file)
169 filename, ext = os.path.splitext(tex_file)
170 xml_file = filename + '.xml'
171 log_file = filename + '.log'
172 xml_file = os.path.join(dir_name, xml_file)
173 if not os.path.isfile(xml_file):
174 sys.stderr.write('Cannot find file %s\n"' % xml_file)
175 sys.stderr.write('Bug, program now quiting\n')
176 sys.exit(1)
177 tree = etree.ElementTree()
178 read_obj = open(xml_file, 'r')
179 xml_tree = tree.parse(xml_file)
180 found = None
181 while not found:
182 for child in xml_tree:
183 if child.tag == '{http://www.w3.org/1998/Math/MathML}math':
184 found = 1
185 break
186 try:
187 xml_tree = xml_tree[0]
188 except IndexError:
189 sys.stderr.write('Could not find any latex math\n')
190 break
191 if not found:
192 return None
193 return xml_tree
195 class ConverttoMathml:
198 def __init__(self):
199 pass
201 def __parse_args(self):
202 desc = """Inserts Mathmx elements into an rst document.
203 In order to use the script, first run rs2txml.py on the RST file.
204 Then run this script on that resulting file
205 Or, in one pass: rst2xml.py <infile> | python3 rstxml2mathml.py
207 parser = argparse.ArgumentParser(description=desc)
208 parser.add_argument('--mathml', choices = ['latex', 'ascii'], nargs=1 ) # much better--demand an arg; the option is still optional
209 parser.add_argument('in_file', default = sys.stdin, nargs='?',
210 help = 'the file to input; default is standard in')
211 args = parser.parse_args()
212 return args
215 def convert_to_mathml(self):
216 args = self.__parse_args()
217 standard_in = False
218 in_file = args.in_file
219 mathml = args.mathml
220 if mathml:
221 mathml = mathml[0]
222 if not isinstance(in_file, str):
223 standard_in = True
224 the_string = sys.stdin.read()
225 if standard_in:
226 read_obj = StringIO(the_string)
227 else:
228 read_obj = open(in_file, 'r')
229 the_handle=CopyTree(mathml)
230 parser = xml.sax.make_parser()
231 parser.setFeature(feature_namespaces, 1)
232 parser.setContentHandler(the_handle)
233 parser.setFeature("http://xml.org/sax/features/external-general-entities", True)
234 try:
235 parser.parse(read_obj)
236 except xml.sax._exceptions.SAXParseException as msg:
237 print(str(msg))
238 sys.exit(1)
239 read_obj.close()
241 if __name__ == '__main__':
242 main_obj = ConverttoMathml()
243 main_obj.convert_to_mathml()