Fix #338: re.sub() flag argument at wrong position.
[docutils.git] / sandbox / docbook / scripts / docutils_to_docbook.py
blob18225150734e519916c654ac94c81359a1c1d9f3
1 import os, sys, subprocess, argparse, tempfile, logging, glob
2 import asciitomathml.asciitomathml
3 import validate_docbook, validate_fo
4 import fop
5 import logging
6 import xsl_process
7 import lxml
8 from lxml import etree as etree
10 try:
11 import locale
12 locale.setlocale(locale.LC_ALL, '')
13 except:
14 pass
16 class NoRunException(Exception):
17 pass
19 class ToXml():
20 """
21 convert to XML with math string
23 """
25 def __init__(self, in_file, in_encoding='utf8', to_docbook=True,
26 validate_docbook = True, convert_to_fo = True, convert_to_pdf = True, debug=False):
27 self.path_id = '__rst__'
28 self._transform_num = 0
29 self.in_file = in_file
30 self.in_encoding = in_encoding
31 self.validate_docbook = validate_docbook
32 self.convert_to_fo = convert_to_fo
33 self.convert_to_pdf = convert_to_pdf
34 self.debug = debug
35 self.make_logging()
36 self.debug=True
38 def make_logging(self, ch_level=logging.ERROR, fh_level=logging.INFO):
39 logger = logging.getLogger(__name__)
40 logger.setLevel(logging.DEBUG)
41 fh = logging.FileHandler('docutils_to_xml.log')
42 fh.setLevel(fh_level)
43 ch = logging.StreamHandler()
44 ch.setLevel(ch_level)
45 formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
46 ch.setFormatter(formatter)
47 fh.setFormatter(formatter)
48 logger.addHandler(ch)
49 logger.addHandler(fh)
50 self.logger = logger
52 def pretty_print(self, elem, level=0):
53 i = "\n" + level*" "
54 if len(elem):
55 if not elem.text or not elem.text.strip():
56 elem.text = i + " "
57 if not elem.tail or not elem.tail.strip():
58 elem.tail = i
59 for elem in elem:
60 self.pretty_print(elem, level+1)
61 if not elem.tail or not elem.tail.strip():
62 elem.tail = i
63 else:
64 if level and (not elem.tail or not elem.tail.strip()):
65 elem.tail = i
67 def rst_to_xml(self, in_file, base=None, in_encoding='utf8'):
68 from docutils.core import publish_cmdline, default_description
69 input_encode = '--input-encoding={0}'.format(in_encoding)
70 output_encode = '--output-encoding=ascii'
71 needed_opts = ['--traceback', '--strip-comments', '--trim-footnote-reference-space',
72 '--no-doctype', input_encode, output_encode, '--no-generator', ]
73 fh, temp_file = tempfile.mkstemp()
74 if not base:
75 base = [sys.argv[0]]
76 sys.argv = base + needed_opts + [in_file]
77 with open(temp_file, 'w') as write_obj:
78 stdout = sys.stdout
79 sys.stdout = write_obj
80 description = ('Generates Docutils-native XML from standalone '
81 'reStructuredText sources. ' + default_description)
82 publish_cmdline(writer_name='xml', description=description)
83 sys.stdout = stdout
84 with open(temp_file, 'r', ) as read_obj:
85 xml_string = ''.join(read_obj.readlines())
86 os.close(fh)
87 os.remove(temp_file)
88 return xml_string
90 def insert_math_elements(self, xml_string, out_encoding='ASCII'):
91 tree = etree.XML(xml_string)
92 for ma in ['math_block', 'math']:
93 for e in tree.iter(ma):
94 math_text = e.text
95 e.text = ''
96 if ma == 'math_block':
97 math_obj = asciitomathml.asciitomathml.AsciiMathML(mstyle={'displaystyle':'true'})
98 else:
99 math_obj = asciitomathml.asciitomathml.AsciiMathML()
100 math_obj.parse_string(math_text)
101 math_tree = math_obj.get_tree()
102 math_string = etree.tostring(math_tree, encoding='ascii')
103 math_tree = etree.XML(math_string)
104 e.insert(0, math_tree)
105 xml_string = etree.tostring(tree, encoding=out_encoding)
106 return xml_string
108 def _make_temp(self, the_type):
109 filename, ext = os.path.splitext(self.in_file)
110 basename = os.path.basename(self.in_file)
111 if the_type == 'transform':
112 self._transform_num += 1
113 return '{0}{1}transform{2}.xml'.format(filename, self.path_id, self._transform_num )
114 elif the_type == 'docbook':
115 return '{0}{1}docbook.xml'.format(filename, self.path_id )
116 elif the_type == 'rst':
117 return '{0}{1}raw.xml'.format(filename, self.path_id )
118 elif the_type == 'fo':
119 return '{0}{1}docbook.fo'.format(filename, self.path_id )
121 def to_docbook(self, raw_path, xsl_files = []):
122 doc_home = os.environ.get('RST_DOCBOOK_HOME')
123 if not doc_home:
124 raise OSError('You must set RST_DOCBOOK_HOME')
125 if len(xsl_files) == 0:
126 xsl_file = os.path.join(doc_home, 'docutils_to_docbook.xsl')
127 if not os.path.isfile(xsl_file):
128 raise IOError('cannot find "{0}'.format(xsl_file))
129 xsl_files = [xsl_file]
130 in_files = [raw_path]
131 counter = 0
132 for xsl in xsl_files:
133 if counter + 1 == len(xsl_files):
134 the_type = 'docbook'
135 else:
136 the_type = 'transform'
137 out_file = self._make_temp(the_type=the_type)
138 self.logger.debug('out file from temp is "{0}"'.format(out_file))
139 error, xml_obj = xsl_process.transform_lxml(xsl, in_files[counter])
140 in_files.append(out_file)
141 counter += 1
142 return xml_obj
144 def to_fo(self, result_tree_obj, xsl_files = []):
145 doc_home = os.environ.get('DOCBOOK_HOME')
146 if not doc_home:
147 raise OSError('You must set DOCBOOK_HOME')
148 if len(xsl_files) == 0:
149 xsl_file = os.path.join(doc_home, 'fo', 'docbook.xsl')
150 if not os.path.isfile(xsl_file):
151 raise IOError('cannot find "{0}'.format(xsl_file))
152 xsl_files = [xsl_file]
153 in_files = [result_tree_obj]
154 counter = 0
155 for xsl in xsl_files:
156 if counter + 1 == len(xsl_files):
157 the_type = 'fo'
158 else:
159 the_type = 'transform'
160 out_file = self._make_temp(the_type=the_type)
161 self.logger.debug('out file from temp is "{0}"'.format(out_file))
162 error, xml_obj = xsl_process.transform_lxml(xsl, in_files[counter])
163 in_files.append(out_file)
164 counter += 1
165 return xml_obj
168 def to_pdf(self, fo_file):
169 fop_obj = fop.Fop()
170 pdf_file = fop_obj.to_pdf(fo_file)
172 def report_xsl_error(self, transform_error_obj):
173 for error_obj in transform_error_obj:
174 sys.stderr.write(error_obj.message)
175 sys.stderr.write('\n')
176 if error_obj.line != 0 and error_obj.column != 0:
177 self.logger.critical(str(error_obj.line))
178 self.logger.critical(str(error_obj.column))
180 def validate_docutils_rng(self, xml_obj):
181 valid_home = os.environ.get('VALIDATE_HOME')
182 if valid_home == None:
183 raise IOError('You need to set the variable "VALIDATE_HOME"')
184 docbook_rng = os.path.join(valid_home, 'relax', 'docbook.rng')
185 if not os.path.isfile(docbook_rng):
186 msg = 'cannot find "{0}"'.format(docbook_rng)
187 msg += '\nYou need download docbook.rng'
188 raise IOError(msg)
189 # the_rng = os.path.join(os.path.dirname(__file__), 'valid','docutils.rng')
190 relaxng_doc = etree.parse(open(docbook_rng, 'r'))
191 relaxng = etree.RelaxNG(relaxng_doc)
192 is_valid = relaxng.validate(xml_obj)
193 if not is_valid:
194 self.logger.critical('Not valid docbook\n')
195 self.report_xsl_error(relaxng.error_log)
196 return 1
198 def validate_fo_xsl(self, result_tree_obj):
199 # xsl_ss = os.path.join(os.path.dirname(__file__), 'valid','folint.xsl')
200 valid_home = os.environ.get('VALIDATE_HOME')
201 if valid_home == None:
202 raise IOError('You need to set the variable "VALIDATE_HOME"')
203 xsl_ss = os.path.join(valid_home, 'xslt', 'folint.xsl')
204 if not os.path.isfile(xsl_ss):
205 msg = 'cannot find "{0}"'.format(docbook_rng)
206 msg += '\nYou need download docbook.rng'
207 raise IOError(msg)
208 xslt_doc = etree.parse(xsl_ss)
209 transform = etree.XSLT(xslt_doc)
210 indoc = result_tree_obj
211 try:
212 outdoc = transform(indoc)
213 except lxml.etree.XSLTApplyError, error:
214 msg = 'error converting %s to %s with %s:\n' % (xml_file, out_file, xslt_file)
215 msg += str(error)
216 msg += '\n'
217 self.logger.critical(msg)
218 self.report_xsl_error(transform.error_log)
219 return 1
220 self.report_xsl_error(transform.error_log)
221 return len(transform.error_log)
224 def clean(self, the_dir):
225 pattern = os.path.join(the_dir, '*{0}docbook.xml'.format(self.path_id))
226 files = glob.glob(pattern)
227 pattern = os.path.join(the_dir, '*{0}transform[0-9].xml'.format(self.path_id))
228 files += glob.glob(pattern)
229 pattern = os.path.join(the_dir, '*{0}raw.xml'.format(self.path_id))
230 files += glob.glob(pattern)
231 self.logger.debug(files)
232 for f in files:
233 os.remove(f)
236 def convert(self):
237 xml_string = self.rst_to_xml(in_file=self.in_file, in_encoding=self.in_encoding)
238 xml_string = self.insert_math_elements(xml_string)
239 raw_path = self._make_temp(the_type = 'rst')
240 with open(raw_path, 'w') as write_obj:
241 write_obj.write(xml_string)
242 result_obj = self.to_docbook(raw_path)
243 if self.validate_docbook:
244 valid = self.validate_docutils_rng(result_obj)
245 if self.convert_to_fo:
246 result_obj = self.to_fo(result_obj)
247 valid = self.validate_fo_xsl(result_obj)
248 fo_file = self._make_temp(the_type = 'fo')
249 if self.debug:
250 root = etree.tostring(result_obj, pretty_print=True)
251 else:
252 root = etree.tostring(result_obj)
253 with open(fo_file, 'w') as write_obj:
254 write_obj.write(root)
255 if self.convert_to_pdf:
256 self.to_pdf(fo_file)
260 if __name__ == '__main__':
261 to_xml_obj = ToXml(sys.argv[1])
262 to_xml_obj.convert()