2 # $Id: sax_complete_copy.py 54 2011-04-17 15:44:41Z cynthia $
4 import os
, sys
, argparse
, io
6 from xml
.sax
.handler
import feature_namespaces
7 from StringIO
import StringIO
10 if sys.version_info < (3,):
12 from io import StringIO
16 from xml
.etree
.ElementTree
import Element
, tostring
17 import xml
.etree
.cElementTree
as etree
18 import tempfile
, subprocess
, os
19 # import docutils.math.latex2mathml
20 from xml
.sax
import InputSource
23 if sys.version_info < (3,):
24 sys.stderr.write('Only run with pyton 3\n')
25 sys.stderr.write('Script now quiting\n')
30 class CopyTree(xml
.sax
.ContentHandler
):
34 def __init__(self
, mathml
):
35 self
.__characters
= ''
36 self
.__mathml
= mathml
37 self
.__ns
_dict
= {'http://www.w3.org/XML/1998/namespace': "xml"}
39 self
.__fix
_soft
_hyphens
= False
42 def startDocument(self
):
45 def startElement(self
, name
, qname
):
48 def characters (self
, characters
):
49 self
.__characters
+= characters
52 def startElementNS(self
, name
, qname
, attrs
):
58 if attrs
.get((None, 'format')) == 'xml':
61 sys
.stdout
.write('ns1:%s' % el_name
)
63 sys
.stdout
.write(el_name
)
65 sys
.stdout
.write(' xmlns:ns1="%s"' % ns
)
67 the_keys
= list(attrs
.keys())
69 for the_key
in the_keys
:
73 value
= attrs
[the_key
]
74 ns_prefix
= self
.__ns
_dict
.get(ns_att
)
75 if ns_att
and not ns_prefix
:
76 sys
.stderr
.write('No name space for "%s"\n' % (ns_att
))
78 if ns_att
and ns_prefix
== 'xml':
79 sys
.stdout
.write(' xml:%s="%s"' % (att_name
, value
))
81 sys
.stderr
.write('Sorry, but don\'t know what to do with ns "%s"\n' % (ns_prefix
))
83 # if ns_att and ns_att != ns:
84 # sys.stdout.write(' xmlns:ns%s="%s"' % (counter,ns_att))
85 # if ns_att and ns_att == ns:
86 # sys.stdout.write(' ns1:%s="%s"' % (att_name, value))
88 sys
.stdout
.write(' %s="%s"' % (att_name
, value
))
93 def __write_text(self
, raw
= False):
94 soft_hyphen
= chr(173)
96 text
= self
.__characters
98 text
= xml
.sax
.saxutils
.escape(self
.__characters
)
99 if self
.__fix
_soft
_hyphens
:
100 text
= text
.replace(soft_hyphen
, '-')
101 sys
.stdout
.write(text
)
102 self
.__characters
= ''
104 def endElementNS(self
, name
, qname
):
107 if (el_name
== 'math_block' and self
.__mathml
== 'ascii') or (el_name
== 'math' and self
.__mathml
== 'ascii'):
108 raw_tree
= asciimathml
.parse(self
.__characters
)[0]
109 math_tree
= Element('math', title
="%s" % self
.__characters
, xmlns
="http://www.w3.org/1998/Math/MathML")
110 math_tree
.append(raw_tree
)
111 string_tree
= tostring(math_tree
, encoding
="utf-8")
112 sys
.stdout
.write(string_tree
.decode('utf8'))
114 if sys.version_info < (3,):
115 print(type(string_tree))
117 sys.stdout.write(string_tree.decode('utf8'))
118 # sys.stdout.write(line.encode('utf8'))
120 sys.stdout.write(string_tree.decode())
122 self
.__characters
= ''
123 elif (el_name
== 'math_block' and self
.__mathml
== 'latex') or (el_name
== 'math' and self
.__mathml
== 'latex'):
124 raw_tree
= self
.__tralics
()
128 raw_tree
= raw_tree
[0]
129 math_tree
= Element('math', title
="%s" % self
.__characters
, xmlns
="http://www.w3.org/1998/Math/MathML")
130 math_tree
.append(raw_tree
)
131 string_tree
= tostring(math_tree
, encoding
="utf-8").decode()
132 sys
.stdout
.write(string_tree
)
133 self
.__characters
= ''
134 elif el_name
== 'raw' and self
.__raw
:
135 self
.__write
_text
(raw
= True)
139 sys
.stderr
.write('Should not be namespace "%s" here\n' % (ns
))
141 sys
.stdout
.write('</ns1:%s>' % el_name
)
143 sys
.stdout
.write('</%s>' % el_name
)
145 def __python_latex_math(self
):
147 Python code seriously broken
151 mathml_tree
= docutils
.math
.latex2mathml
.parse_latex_math(self
.__characters
)
153 return self
.__characters
154 math_code
= ''.join(mathml_tree
.xml())
158 num
, tex_file
= tempfile
.mkstemp(suffix
='.tex')
159 write_obj
= open(tex_file
, 'w')
161 write_obj
.write(self
.__characters
)
164 num
, bogus_out
= tempfile
.mkstemp()
165 bogus_out
= open(bogus_out
, 'w')
166 p
= subprocess
.call(['tralics', '-silent', '-utf8output', '-noentnames', tex_file
], stdout
=bogus_out
)
168 dir_name
= os
.path
.dirname(tex_file
)
169 filename
, ext
= os
.path
.splitext(tex_file
)
170 xml_file
= filename
+ '.xml'
171 log_file
= filename
+ '.log'
172 xml_file
= os
.path
.join(dir_name
, xml_file
)
173 if not os
.path
.isfile(xml_file
):
174 sys
.stderr
.write('Cannot find file %s\n"' % xml_file
)
175 sys
.stderr
.write('Bug, program now quiting\n')
177 tree
= etree
.ElementTree()
178 read_obj
= open(xml_file
, 'r')
179 xml_tree
= tree
.parse(xml_file
)
182 for child
in xml_tree
:
183 if child
.tag
== '{http://www.w3.org/1998/Math/MathML}math':
187 xml_tree
= xml_tree
[0]
189 sys
.stderr
.write('Could not find any latex math\n')
195 class ConverttoMathml
:
201 def __parse_args(self
):
202 desc
= """Inserts Mathmx elements into an rst document.
203 In order to use the script, first run rs2txml.py on the RST file.
204 Then run this script on that resulting file
205 Or, in one pass: rst2xml.py <infile> | python3 rstxml2mathml.py
207 parser
= argparse
.ArgumentParser(description
=desc
)
208 parser
.add_argument('--mathml', choices
= ['latex', 'ascii'], nargs
=1 ) # much better--demand an arg; the option is still optional
209 parser
.add_argument('in_file', default
= sys
.stdin
, nargs
='?',
210 help = 'the file to input; default is standard in')
211 args
= parser
.parse_args()
215 def convert_to_mathml(self
):
216 args
= self
.__parse
_args
()
218 in_file
= args
.in_file
222 if not isinstance(in_file
, str):
224 the_string
= sys
.stdin
.read()
226 read_obj
= StringIO(the_string
)
228 read_obj
= open(in_file
, 'r')
229 the_handle
=CopyTree(mathml
)
230 parser
= xml
.sax
.make_parser()
231 parser
.setFeature(feature_namespaces
, 1)
232 parser
.setContentHandler(the_handle
)
233 parser
.setFeature("http://xml.org/sax/features/external-general-entities", True)
235 parser
.parse(read_obj
)
236 except xml
.sax
._exceptions
.SAXParseException
as msg
:
241 if __name__
== '__main__':
242 main_obj
= ConverttoMathml()
243 main_obj
.convert_to_mathml()