3 """tests for epub.py"""
7 sys
.path
.extend(('.', '..'))
11 from pprint
import pprint
, pformat
13 from objavi
import epub
15 from lxml
.etree
import Element
18 from _epub
import _get_elements
, TEST_FILES
, _load_epub
21 "body", "head", "html", "title", "abbr", "acronym", "address",
22 "blockquote", "br", "cite", "code", "dfn", "div", "em", "h1", "h2",
23 "h3", "h4", "h5", "h6", "kbd", "p", "pre", "q", "samp", "span",
24 "strong", "var", "a", "dl", "dt", "dd", "ol", "ul", "li", "object",
25 "param", "b", "big", "hr", "i", "small", "sub", "sup", "tt", "del",
26 "ins", "bdo", "caption", "col", "colgroup", "table", "tbody", "td",
27 "tfoot", "th", "thead", "tr", "img", "area", "map", "meta", "style",
32 def _xhtml_parse(*args
, **kwargs
):
33 kwargs
['parser'] = lxml
.html
.XHTMLParser(encoding
="utf-8")
35 return lxml
.html
.parse(*args
, **kwargs
)
37 def _html_parse(*args
, **kwargs
):
38 kwargs
['parser'] = lxml
.etree
.HTMLParser(encoding
="utf-8")
39 return lxml
.html
.parse(*args
, **kwargs
)
42 def test_tags(parse
=_html_parse
):
43 #XXX not testing that the tags are correctly used or nested!
44 good_tags
= dict((x
, 0) for x
in OK_TAGS
)
46 for book
in TEST_FILES
:
48 e
= _load_epub(book
, verbose
=True)
54 tree
= e
.gettree(id=ID
, parse
=parse
)
55 except Exception, exc
:
57 for x
in tree
.getiterator(Element
):
60 #if not t.startswith(epub.XHTMLNS):
61 # t = '{No namespace}' + t
62 # bad_tags[t] = bad_tags.get(t, 0) + 1
64 t
= t
.replace(epub
.XHTMLNS
, '')
68 bad_tags
[t
] = bad_tags
.get(t
, 0) + 1
72 for n
, t
in sorted((v
, k
) for k
, v
in good_tags
.iteritems()):
73 print "%20s:%s" % (t
, n
)
75 for t
, n
in bad_tags
.iteritems():
76 print "%20s:%s" % (t
, n
)
81 def add_guts(src
, dest
):
82 """Append the contents of the <body> of one tree onto that of
83 another. The source tree will be emptied."""
84 #print lxml.etree.tostring(src)
86 sbody
= src
.iter(epub
.XHTMLNS
+ 'body').next()
88 sbody
= src
.iter('body').next()
90 dbody
= dest
.iter(epub
.XHTMLNS
+ 'body').next()
92 dbody
= dest
.iter('body').next()
94 dbody
.tail
+= sbody
.text
100 dbody
.tail
+= sbody
.tail
105 def add_marker(doc
, ID
, title
=None, klass
="espri-marker"):
106 marker
= lxml
.etree
.Element('hr')
108 marker
.set('class', klass
)
109 if title
is not None:
110 marker
.set('title', title
)
112 dbody
= doc
.iter(epub
.XHTMLNS
+ 'body').next()
113 except StopIteration:
114 dbody
= doc
.iter('body').next()
118 for book
in TEST_FILES
:
120 e
= _load_epub(book
, verbose
=True)
124 lang
= e
.find_language() or 'UND'
125 doc
= epub
.new_doc(lang
=lang
)
127 fn
, mimetype
= e
.manifest
[ID
]
129 if mimetype
.startswith('image'):
130 tree
= epub
.new_doc(guts
='<img src="%s" alt="" />' % fn
)
132 tree
= e
.gettree(fn
, parse
=_html_parse
)
134 add_marker(doc
, 'espri-new-page-%s' % ID
, fn
)
137 f
= open('tests/xhtml/' + os
.path
.basename(book
) + '.html', 'w')
138 print >> f
, lxml
.etree
.tostring(doc
, encoding
='utf-8', method
='html').replace(' ', '')#.encode('utf-8')
143 for book
in TEST_FILES
:
145 e
= _load_epub(book
, verbose
=True)
150 doc
= e
.concat_document()
152 f
= open('tests/xhtml/' + os
.path
.basename(book
) + '.html', 'w')
153 print >> f
, lxml
.etree
.tostring(doc
, encoding
='utf-8', method
='html').replace(' ', '')#.encode('utf-8')
157 for book
in TEST_FILES
:
158 #for book in ['tests/epub-examples/Doctorow - I, Robot.epub']:
159 #for book in ['tests/epub-examples/pg829.epub']:
160 #for book in ['tests/epub-examples/ia-huckfin.epub']:
162 e
= _load_epub(book
, verbose
=True)
166 e
.make_bookizip('tests/booki_books/%s.zip' % os
.path
.basename(book
))
170 def test_objavi_epub():
171 src
= 'tests/booki_books'
172 dest
= 'tests/generated-epubs'
174 from objavi
.fmbook
import log
, ZipBook
, make_book_name
175 from objavi
import config
176 config
.BOOKI_BOOK_DIR
= src
178 for zfn
in os
.listdir(src
):
179 log('starting %s' % zfn
)
180 book
= ZipBook(config
.LOCALHOST
, zfn
[:-4])
182 log( zfn
, (book
.epubfile
, '%s/%s' % (dest
, os
.path
.basename(book
.epubfile
))))
184 os
.rename(book
.epubfile
, '%s/%s' % (dest
, os
.path
.basename(book
.epubfile
)))
187 if __name__
== '__main__':