tests/_test_content.py

   1 #!/usr/bin/python
   2
   3 """tests for epub.py"""
   4
   5 import os, sys
   6
   7 sys.path.extend(('.', '..'))
   8 #print sys.path
   9
  10 import tempfile
  11 from pprint import pprint, pformat
  12
  13 from objavi import epub
  14
  15 from lxml.etree import Element
  16 import lxml
  17
  18 from _epub import _get_elements, TEST_FILES, _load_epub
  19
  20 OK_TAGS = [
  21     "body", "head", "html", "title", "abbr", "acronym", "address",
  22     "blockquote", "br", "cite", "code", "dfn", "div", "em", "h1", "h2",
  23     "h3", "h4", "h5", "h6", "kbd", "p", "pre", "q", "samp", "span",
  24     "strong", "var", "a", "dl", "dt", "dd", "ol", "ul", "li", "object",
  25     "param", "b", "big", "hr", "i", "small", "sub", "sup", "tt", "del",
  26     "ins", "bdo", "caption", "col", "colgroup", "table", "tbody", "td",
  27     "tfoot", "th", "thead", "tr", "img", "area", "map", "meta", "style",
  28     "link", "base"
  29     ]
  30
  31
  32 def _xhtml_parse(*args, **kwargs):
  33     kwargs['parser'] = lxml.html.XHTMLParser(encoding="utf-8")
  34
  35     return lxml.html.parse(*args, **kwargs)
  36
  37 def _html_parse(*args, **kwargs):
  38     kwargs['parser'] = lxml.etree.HTMLParser(encoding="utf-8")
  39     return lxml.html.parse(*args, **kwargs)
  40
  41
  42 def test_tags(parse=_html_parse):
  43     #XXX not testing that the tags are correctly used or nested!
  44     good_tags = dict((x, 0) for x in OK_TAGS)
  45     bad_tags = {}
  46     for book in TEST_FILES:
  47         #print book
  48         e = _load_epub(book, verbose=True)
  49         e.parse_meta()
  50         e.parse_opf()
  51         #e.parse_ncx()
  52         for ID in e.spine:
  53             try:
  54                 tree = e.gettree(id=ID, parse=parse)
  55             except Exception, exc:
  56                 print ID, exc
  57             for x in tree.getiterator(Element):
  58                 t = x.tag
  59                 #print t
  60                 #if not t.startswith(epub.XHTMLNS):
  61                 #    t = '{No namespace}' + t
  62                 #    bad_tags[t] = bad_tags.get(t, 0) + 1
  63                 #    continue
  64                 t = t.replace(epub.XHTMLNS, '')
  65                 if t in good_tags:
  66                     good_tags[t] += 1
  67                 else:
  68                     bad_tags[t] = bad_tags.get(t, 0) + 1
  69
  70     print "GOOD TAGS"
  71
  72     for n, t in sorted((v, k) for k, v in good_tags.iteritems()):
  73         print "%20s:%s" % (t, n)
  74     print "BAD TAGS"
  75     for t, n in bad_tags.iteritems():
  76         print "%20s:%s" % (t, n)
  77
  78
  79
  80
  81 def add_guts(src, dest):
  82     """Append the contents of the <body> of one tree onto that of
  83     another.  The source tree will be emptied."""
  84     #print  lxml.etree.tostring(src)
  85     try:
  86         sbody = src.iter(epub.XHTMLNS + 'body').next()
  87     except StopIteration:
  88         sbody = src.iter('body').next()
  89     try:
  90         dbody = dest.iter(epub.XHTMLNS + 'body').next()
  91     except StopIteration:
  92         dbody = dest.iter('body').next()
  93     try:
  94         dbody.tail += sbody.text
  95     except TypeError:
  96         pass
  97     for x in sbody:
  98         dbody.append(x)
  99     try:
 100         dbody.tail += sbody.tail
 101     except TypeError:
 102         pass
 103
 104
 105 def add_marker(doc, ID, title=None, klass="espri-marker"):
 106     marker = lxml.etree.Element('hr')
 107     marker.set('id', ID)
 108     marker.set('class', klass)
 109     if title is not None:
 110         marker.set('title', title)
 111     try:
 112         dbody = doc.iter(epub.XHTMLNS + 'body').next()
 113     except StopIteration:
 114         dbody = doc.iter('body').next()
 115     dbody.append(marker)
 116
 117 def concat_books():
 118     for book in TEST_FILES:
 119         print book
 120         e = _load_epub(book, verbose=True)
 121         e.parse_meta()
 122         e.parse_opf()
 123         e.parse_ncx()
 124         lang = e.find_language() or 'UND'
 125         doc = epub.new_doc(lang=lang)
 126         for ID in e.spine:
 127             fn, mimetype = e.manifest[ID]
 128             print fn
 129             if mimetype.startswith('image'):
 130                 tree = epub.new_doc(guts='<img src="%s" alt="" />' % fn)
 131             else:
 132                 tree = e.gettree(fn, parse=_html_parse)
 133
 134             add_marker(doc, 'espri-new-page-%s' % ID, fn)
 135             add_guts(tree, doc)
 136
 137         f = open('tests/xhtml/' + os.path.basename(book) + '.html', 'w')
 138         print >> f, lxml.etree.tostring(doc, encoding='utf-8', method='html').replace('&#13;', '')#.encode('utf-8')
 139         f.close()
 140
 141
 142 def test_concat():
 143     for book in TEST_FILES:
 144         #print book
 145         e = _load_epub(book, verbose=True)
 146         e.parse_meta()
 147         e.parse_opf()
 148         e.parse_ncx()
 149
 150         doc = e.concat_document()
 151
 152         f = open('tests/xhtml/' + os.path.basename(book) + '.html', 'w')
 153         print >> f, lxml.etree.tostring(doc, encoding='utf-8', method='html').replace('&#13;', '')#.encode('utf-8')
 154         f.close()
 155
 156 def test_bookizip():
 157     for book in TEST_FILES:
 158     #for book in ['tests/epub-examples/Doctorow - I, Robot.epub']:
 159     #for book in ['tests/epub-examples/pg829.epub']:
 160     #for book in ['tests/epub-examples/ia-huckfin.epub']:
 161         #print book
 162         e = _load_epub(book, verbose=True)
 163         e.parse_meta()
 164         e.parse_opf()
 165         e.parse_ncx()
 166         e.make_bookizip('tests/booki_books/%s.zip' % os.path.basename(book))
 167         #if 'Gimp' in book:
 168         #    sys.exit()
 169
 170 def test_objavi_epub():
 171     src = 'tests/booki_books'
 172     dest = 'tests/generated-epubs'
 173
 174     from objavi.fmbook import log, ZipBook, make_book_name
 175     from objavi import config
 176     config.BOOKI_BOOK_DIR = src
 177
 178     for zfn in os.listdir(src):
 179         log('starting %s' % zfn)
 180         book = ZipBook(config.LOCALHOST, zfn[:-4])
 181         book.make_epub()
 182         log( zfn, (book.epubfile, '%s/%s' % (dest, os.path.basename(book.epubfile))))
 183         #break
 184         os.rename(book.epubfile, '%s/%s' % (dest, os.path.basename(book.epubfile)))
 185
 186
 187 if __name__ == '__main__':
 188     #test_tags()
 189     #concat_books()
 190     test_bookizip()
 191     #test_objavi_epub()
 192