[Jan Gerber] fix unicode filenames in wikibooks2epub (from https://code.launchpad...
[objavi2.git] / tests / epub2booki.py
blobcaef6480c21f53242adee8fafd8d0625120616f7
1 #!/usr/bin/python
3 """tests for epub.py"""
5 import os, sys
7 sys.path.extend(('.', '..'))
8 #print sys.path
10 import tempfile
11 from pprint import pprint, pformat
12 from objavi import epub
14 from lxml.etree import Element
15 import lxml
18 def _xhtml_parse(*args, **kwargs):
19 kwargs['parser'] = lxml.html.XHTMLParser(encoding="utf-8")
20 return lxml.html.parse(*args, **kwargs)
22 def _html_parse(*args, **kwargs):
23 kwargs['parser'] = lxml.etree.HTMLParser(encoding="utf-8")
24 return lxml.html.parse(*args, **kwargs)
26 def _find_tag(doc, tag):
27 try:
28 return doc.iter(epub.XHTMLNS + tag).next()
29 except StopIteration:
30 return doc.iter(tag).next()
32 def add_guts(src, dest):
33 """Append the contents of the <body> of one tree onto that of
34 another. The source tree will be emptied."""
35 #print lxml.etree.tostring(src)
36 sbody = _find_tag(src, 'body')
37 dbody = _find_tag(dest, 'body')
39 dbody[-1].tail = ((dbody[-1].tail or '') +
40 (sbody.text or '')) or None
42 for x in sbody:
43 dbody.append(x)
45 dbody.tail = ((dbody.tail or '') +
46 (sbody.tail or '')) or None
48 def add_marker(doc, ID, title=None, klass="espri-marker"):
49 marker = lxml.etree.Element('hr')
50 marker.set('id', ID)
51 marker.set('class', klass)
52 if title is not None:
53 marker.set('title', title)
54 dbody = _find_tag(doc, 'body')
55 dbody.append(marker)
57 def concat_chapters(fn):
58 e = epub.Epub()
59 e.load(open(fn).read())
61 e.parse_meta()
62 e.parse_opf()
63 e.parse_ncx()
65 lang = e.find_language() or 'UND'
66 chapter_depth, toc_points = e.find_probable_chapters()
68 doc = epub.new_doc(lang=lang)
69 for ID in e.order:
70 fn, mimetype = e.manifest[ID]
71 print fn
72 if mimetype.startswith('image'):
73 tree = epub.new_doc(guts='<img src="%s" alt="" />' % fn)
74 else:
75 tree = e.gettree(fn, parse=_html_parse)
77 add_marker(doc, 'espri-new-page-%s' % ID, fn)
78 add_guts(tree, doc)
80 return doc
82 if __name__ == '__main__':
83 concat_chapters(sys.argv[1])