1 import os
, sys
, subprocess
, argparse
, tempfile
, logging
, glob
2 import asciitomathml
.asciitomathml
3 import validate_docbook
, validate_fo
8 from lxml
import etree
as etree
12 locale
.setlocale(locale
.LC_ALL
, '')
16 class NoRunException(Exception):
21 convert to XML with math string
25 def __init__(self
, in_file
, in_encoding
='utf8', to_docbook
=True,
26 validate_docbook
= True, convert_to_fo
= True, convert_to_pdf
= True, debug
=False):
27 self
.path_id
= '__rst__'
28 self
._transform
_num
= 0
29 self
.in_file
= in_file
30 self
.in_encoding
= in_encoding
31 self
.validate_docbook
= validate_docbook
32 self
.convert_to_fo
= convert_to_fo
33 self
.convert_to_pdf
= convert_to_pdf
38 def make_logging(self
, ch_level
=logging
.ERROR
, fh_level
=logging
.INFO
):
39 logger
= logging
.getLogger(__name__
)
40 logger
.setLevel(logging
.DEBUG
)
41 fh
= logging
.FileHandler('docutils_to_xml.log')
43 ch
= logging
.StreamHandler()
45 formatter
= logging
.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
46 ch
.setFormatter(formatter
)
47 fh
.setFormatter(formatter
)
52 def pretty_print(self
, elem
, level
=0):
55 if not elem
.text
or not elem
.text
.strip():
57 if not elem
.tail
or not elem
.tail
.strip():
60 self
.pretty_print(elem
, level
+1)
61 if not elem
.tail
or not elem
.tail
.strip():
64 if level
and (not elem
.tail
or not elem
.tail
.strip()):
67 def rst_to_xml(self
, in_file
, base
=None, in_encoding
='utf8'):
68 from docutils
.core
import publish_cmdline
, default_description
69 input_encode
= '--input-encoding={0}'.format(in_encoding
)
70 output_encode
= '--output-encoding=ascii'
71 needed_opts
= ['--traceback', '--strip-comments', '--trim-footnote-reference-space',
72 '--no-doctype', input_encode
, output_encode
, '--no-generator', ]
73 fh
, temp_file
= tempfile
.mkstemp()
76 sys
.argv
= base
+ needed_opts
+ [in_file
]
77 with
open(temp_file
, 'w') as write_obj
:
79 sys
.stdout
= write_obj
80 description
= ('Generates Docutils-native XML from standalone '
81 'reStructuredText sources. ' + default_description
)
82 publish_cmdline(writer_name
='xml', description
=description
)
84 with
open(temp_file
, 'r', ) as read_obj
:
85 xml_string
= ''.join(read_obj
.readlines())
90 def insert_math_elements(self
, xml_string
, out_encoding
='ASCII'):
91 tree
= etree
.XML(xml_string
)
92 for ma
in ['math_block', 'math']:
93 for e
in tree
.iter(ma
):
96 if ma
== 'math_block':
97 math_obj
= asciitomathml
.asciitomathml
.AsciiMathML(mstyle
={'displaystyle':'true'})
99 math_obj
= asciitomathml
.asciitomathml
.AsciiMathML()
100 math_obj
.parse_string(math_text
)
101 math_tree
= math_obj
.get_tree()
102 math_string
= etree
.tostring(math_tree
, encoding
='ascii')
103 math_tree
= etree
.XML(math_string
)
104 e
.insert(0, math_tree
)
105 xml_string
= etree
.tostring(tree
, encoding
=out_encoding
)
108 def _make_temp(self
, the_type
):
109 filename
, ext
= os
.path
.splitext(self
.in_file
)
110 basename
= os
.path
.basename(self
.in_file
)
111 if the_type
== 'transform':
112 self
._transform
_num
+= 1
113 return '{0}{1}transform{2}.xml'.format(filename
, self
.path_id
, self
._transform
_num
)
114 elif the_type
== 'docbook':
115 return '{0}{1}docbook.xml'.format(filename
, self
.path_id
)
116 elif the_type
== 'rst':
117 return '{0}{1}raw.xml'.format(filename
, self
.path_id
)
118 elif the_type
== 'fo':
119 return '{0}{1}docbook.fo'.format(filename
, self
.path_id
)
121 def to_docbook(self
, raw_path
, xsl_files
= []):
122 doc_home
= os
.environ
.get('RST_DOCBOOK_HOME')
124 raise OSError('You must set RST_DOCBOOK_HOME')
125 if len(xsl_files
) == 0:
126 xsl_file
= os
.path
.join(doc_home
, 'docutils_to_docbook.xsl')
127 if not os
.path
.isfile(xsl_file
):
128 raise IOError('cannot find "{0}'.format(xsl_file
))
129 xsl_files
= [xsl_file
]
130 in_files
= [raw_path
]
132 for xsl
in xsl_files
:
133 if counter
+ 1 == len(xsl_files
):
136 the_type
= 'transform'
137 out_file
= self
._make
_temp
(the_type
=the_type
)
138 self
.logger
.debug('out file from temp is "{0}"'.format(out_file
))
139 error
, xml_obj
= xsl_process
.transform_lxml(xsl
, in_files
[counter
])
140 in_files
.append(out_file
)
144 def to_fo(self
, result_tree_obj
, xsl_files
= []):
145 doc_home
= os
.environ
.get('DOCBOOK_HOME')
147 raise OSError('You must set DOCBOOK_HOME')
148 if len(xsl_files
) == 0:
149 xsl_file
= os
.path
.join(doc_home
, 'fo', 'docbook.xsl')
150 if not os
.path
.isfile(xsl_file
):
151 raise IOError('cannot find "{0}'.format(xsl_file
))
152 xsl_files
= [xsl_file
]
153 in_files
= [result_tree_obj
]
155 for xsl
in xsl_files
:
156 if counter
+ 1 == len(xsl_files
):
159 the_type
= 'transform'
160 out_file
= self
._make
_temp
(the_type
=the_type
)
161 self
.logger
.debug('out file from temp is "{0}"'.format(out_file
))
162 error
, xml_obj
= xsl_process
.transform_lxml(xsl
, in_files
[counter
])
163 in_files
.append(out_file
)
168 def to_pdf(self
, fo_file
):
170 pdf_file
= fop_obj
.to_pdf(fo_file
)
172 def report_xsl_error(self
, transform_error_obj
):
173 for error_obj
in transform_error_obj
:
174 sys
.stderr
.write(error_obj
.message
)
175 sys
.stderr
.write('\n')
176 if error_obj
.line
!= 0 and error_obj
.column
!= 0:
177 self
.logger
.critical(str(error_obj
.line
))
178 self
.logger
.critical(str(error_obj
.column
))
180 def validate_docutils_rng(self
, xml_obj
):
181 valid_home
= os
.environ
.get('VALIDATE_HOME')
182 if valid_home
== None:
183 raise IOError('You need to set the variable "VALIDATE_HOME"')
184 docbook_rng
= os
.path
.join(valid_home
, 'relax', 'docbook.rng')
185 if not os
.path
.isfile(docbook_rng
):
186 msg
= 'cannot find "{0}"'.format(docbook_rng
)
187 msg
+= '\nYou need download docbook.rng'
189 # the_rng = os.path.join(os.path.dirname(__file__), 'valid','docutils.rng')
190 relaxng_doc
= etree
.parse(open(docbook_rng
, 'r'))
191 relaxng
= etree
.RelaxNG(relaxng_doc
)
192 is_valid
= relaxng
.validate(xml_obj
)
194 self
.logger
.critical('Not valid docbook\n')
195 self
.report_xsl_error(relaxng
.error_log
)
198 def validate_fo_xsl(self
, result_tree_obj
):
199 # xsl_ss = os.path.join(os.path.dirname(__file__), 'valid','folint.xsl')
200 valid_home
= os
.environ
.get('VALIDATE_HOME')
201 if valid_home
== None:
202 raise IOError('You need to set the variable "VALIDATE_HOME"')
203 xsl_ss
= os
.path
.join(valid_home
, 'xslt', 'folint.xsl')
204 if not os
.path
.isfile(xsl_ss
):
205 msg
= 'cannot find "{0}"'.format(docbook_rng
)
206 msg
+= '\nYou need download docbook.rng'
208 xslt_doc
= etree
.parse(xsl_ss
)
209 transform
= etree
.XSLT(xslt_doc
)
210 indoc
= result_tree_obj
212 outdoc
= transform(indoc
)
213 except lxml
.etree
.XSLTApplyError
, error
:
214 msg
= 'error converting %s to %s with %s:\n' % (xml_file
, out_file
, xslt_file
)
217 self
.logger
.critical(msg
)
218 self
.report_xsl_error(transform
.error_log
)
220 self
.report_xsl_error(transform
.error_log
)
221 return len(transform
.error_log
)
224 def clean(self
, the_dir
):
225 pattern
= os
.path
.join(the_dir
, '*{0}docbook.xml'.format(self
.path_id
))
226 files
= glob
.glob(pattern
)
227 pattern
= os
.path
.join(the_dir
, '*{0}transform[0-9].xml'.format(self
.path_id
))
228 files
+= glob
.glob(pattern
)
229 pattern
= os
.path
.join(the_dir
, '*{0}raw.xml'.format(self
.path_id
))
230 files
+= glob
.glob(pattern
)
231 self
.logger
.debug(files
)
237 xml_string
= self
.rst_to_xml(in_file
=self
.in_file
, in_encoding
=self
.in_encoding
)
238 xml_string
= self
.insert_math_elements(xml_string
)
239 raw_path
= self
._make
_temp
(the_type
= 'rst')
240 with
open(raw_path
, 'w') as write_obj
:
241 write_obj
.write(xml_string
)
242 result_obj
= self
.to_docbook(raw_path
)
243 if self
.validate_docbook
:
244 valid
= self
.validate_docutils_rng(result_obj
)
245 if self
.convert_to_fo
:
246 result_obj
= self
.to_fo(result_obj
)
247 valid
= self
.validate_fo_xsl(result_obj
)
248 fo_file
= self
._make
_temp
(the_type
= 'fo')
250 root
= etree
.tostring(result_obj
, pretty_print
=True)
252 root
= etree
.tostring(result_obj
)
253 with
open(fo_file
, 'w') as write_obj
:
254 write_obj
.write(root
)
255 if self
.convert_to_pdf
:
260 if __name__
== '__main__':
261 to_xml_obj
= ToXml(sys
.argv
[1])