check for kill failure on first round as well as second
[objavi2.git] / tests / _test_content.py
bloba8ca179224dc5a1c208f2626d59a426668b78153
1 #!/usr/bin/python
3 """tests for epub.py"""
5 import os, sys
7 sys.path.extend(('.', '..'))
8 #print sys.path
10 import tempfile
11 from pprint import pprint, pformat
13 from objavi import epub
15 from lxml.etree import Element
16 import lxml
18 from _epub import _get_elements, TEST_FILES, _load_epub
20 OK_TAGS = [
21 "body", "head", "html", "title", "abbr", "acronym", "address",
22 "blockquote", "br", "cite", "code", "dfn", "div", "em", "h1", "h2",
23 "h3", "h4", "h5", "h6", "kbd", "p", "pre", "q", "samp", "span",
24 "strong", "var", "a", "dl", "dt", "dd", "ol", "ul", "li", "object",
25 "param", "b", "big", "hr", "i", "small", "sub", "sup", "tt", "del",
26 "ins", "bdo", "caption", "col", "colgroup", "table", "tbody", "td",
27 "tfoot", "th", "thead", "tr", "img", "area", "map", "meta", "style",
28 "link", "base"
32 def _xhtml_parse(*args, **kwargs):
33 kwargs['parser'] = lxml.html.XHTMLParser(encoding="utf-8")
35 return lxml.html.parse(*args, **kwargs)
37 def _html_parse(*args, **kwargs):
38 kwargs['parser'] = lxml.etree.HTMLParser(encoding="utf-8")
39 return lxml.html.parse(*args, **kwargs)
42 def test_tags(parse=_html_parse):
43 #XXX not testing that the tags are correctly used or nested!
44 good_tags = dict((x, 0) for x in OK_TAGS)
45 bad_tags = {}
46 for book in TEST_FILES:
47 #print book
48 e = _load_epub(book, verbose=True)
49 e.parse_meta()
50 e.parse_opf()
51 #e.parse_ncx()
52 for ID in e.spine:
53 try:
54 tree = e.gettree(id=ID, parse=parse)
55 except Exception, exc:
56 print ID, exc
57 for x in tree.getiterator(Element):
58 t = x.tag
59 #print t
60 #if not t.startswith(epub.XHTMLNS):
61 # t = '{No namespace}' + t
62 # bad_tags[t] = bad_tags.get(t, 0) + 1
63 # continue
64 t = t.replace(epub.XHTMLNS, '')
65 if t in good_tags:
66 good_tags[t] += 1
67 else:
68 bad_tags[t] = bad_tags.get(t, 0) + 1
70 print "GOOD TAGS"
72 for n, t in sorted((v, k) for k, v in good_tags.iteritems()):
73 print "%20s:%s" % (t, n)
74 print "BAD TAGS"
75 for t, n in bad_tags.iteritems():
76 print "%20s:%s" % (t, n)
81 def add_guts(src, dest):
82 """Append the contents of the <body> of one tree onto that of
83 another. The source tree will be emptied."""
84 #print lxml.etree.tostring(src)
85 try:
86 sbody = src.iter(epub.XHTMLNS + 'body').next()
87 except StopIteration:
88 sbody = src.iter('body').next()
89 try:
90 dbody = dest.iter(epub.XHTMLNS + 'body').next()
91 except StopIteration:
92 dbody = dest.iter('body').next()
93 try:
94 dbody.tail += sbody.text
95 except TypeError:
96 pass
97 for x in sbody:
98 dbody.append(x)
99 try:
100 dbody.tail += sbody.tail
101 except TypeError:
102 pass
105 def add_marker(doc, ID, title=None, klass="espri-marker"):
106 marker = lxml.etree.Element('hr')
107 marker.set('id', ID)
108 marker.set('class', klass)
109 if title is not None:
110 marker.set('title', title)
111 try:
112 dbody = doc.iter(epub.XHTMLNS + 'body').next()
113 except StopIteration:
114 dbody = doc.iter('body').next()
115 dbody.append(marker)
117 def concat_books():
118 for book in TEST_FILES:
119 print book
120 e = _load_epub(book, verbose=True)
121 e.parse_meta()
122 e.parse_opf()
123 e.parse_ncx()
124 lang = e.find_language() or 'UND'
125 doc = epub.new_doc(lang=lang)
126 for ID in e.spine:
127 fn, mimetype = e.manifest[ID]
128 print fn
129 if mimetype.startswith('image'):
130 tree = epub.new_doc(guts='<img src="%s" alt="" />' % fn)
131 else:
132 tree = e.gettree(fn, parse=_html_parse)
134 add_marker(doc, 'espri-new-page-%s' % ID, fn)
135 add_guts(tree, doc)
137 f = open('tests/xhtml/' + os.path.basename(book) + '.html', 'w')
138 print >> f, lxml.etree.tostring(doc, encoding='utf-8', method='html').replace('&#13;', '')#.encode('utf-8')
139 f.close()
142 def test_concat():
143 for book in TEST_FILES:
144 #print book
145 e = _load_epub(book, verbose=True)
146 e.parse_meta()
147 e.parse_opf()
148 e.parse_ncx()
150 doc = e.concat_document()
152 f = open('tests/xhtml/' + os.path.basename(book) + '.html', 'w')
153 print >> f, lxml.etree.tostring(doc, encoding='utf-8', method='html').replace('&#13;', '')#.encode('utf-8')
154 f.close()
156 def test_bookizip():
157 for book in TEST_FILES:
158 #for book in ['tests/epub-examples/Doctorow - I, Robot.epub']:
159 #for book in ['tests/epub-examples/pg829.epub']:
160 #for book in ['tests/epub-examples/ia-huckfin.epub']:
161 #print book
162 e = _load_epub(book, verbose=True)
163 e.parse_meta()
164 e.parse_opf()
165 e.parse_ncx()
166 e.make_bookizip('tests/booki_books/%s.zip' % os.path.basename(book))
167 #if 'Gimp' in book:
168 # sys.exit()
170 def test_objavi_epub():
171 src = 'tests/booki_books'
172 dest = 'tests/generated-epubs'
174 from objavi.fmbook import log, ZipBook, make_book_name
175 from objavi import config
176 config.BOOKI_BOOK_DIR = src
178 for zfn in os.listdir(src):
179 log('starting %s' % zfn)
180 book = ZipBook(config.LOCALHOST, zfn[:-4])
181 book.make_epub()
182 log( zfn, (book.epubfile, '%s/%s' % (dest, os.path.basename(book.epubfile))))
183 #break
184 os.rename(book.epubfile, '%s/%s' % (dest, os.path.basename(book.epubfile)))
187 if __name__ == '__main__':
188 #test_tags()
189 #concat_books()
190 test_bookizip()
191 #test_objavi_epub()