spell server, 'server', not 'book'
[objavi2.git] / objavi / fmbook.py
blob62a9992dd51d6f0fef8b6919cb6b5ef3aa6341e1
1 # Part of Objavi2, which turns html manuals into books.
2 # This contains classes representing books and coordinates their processing.
4 # Copyright (C) 2009 Douglas Bagnall
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 """Library module representing a complete FM book being turned into a
21 PDF"""
23 import os, sys
24 import tempfile
25 import re, time
26 import random
27 from subprocess import Popen, check_call, PIPE
28 from cStringIO import StringIO
29 import zipfile
30 import traceback
31 try:
32 import simplejson as json
33 except ImportError:
34 import json
36 import lxml, lxml.html, lxml.etree
38 from objavi import config, twiki_wrapper, epub_utils
39 from objavi.cgi_utils import log, run, shift_file
40 from objavi.pdf import PageSettings, count_pdf_pages, concat_pdfs, rotate_pdf, parse_outline
42 from iarchive import epub as ia_epub
43 from booki.xhtml_utils import EpubChapter
45 TMPDIR = os.path.abspath(config.TMPDIR)
46 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
47 HTTP_HOST = os.environ.get('HTTP_HOST', '')
48 PUBLISH_PATH = "%s/books/" % DOC_ROOT
50 def make_book_name(book, server, suffix='.pdf'):
51 lang = config.SERVER_DEFAULTS.get(server, config.SERVER_DEFAULTS[config.DEFAULT_SERVER])['lang']
52 book = ''.join(x for x in book if x.isalnum())
53 return '%s-%s-%s%s' % (book, lang,
54 time.strftime('%Y.%m.%d-%H.%M.%S'),
55 suffix)
57 def _add_initial_number(e, n):
58 """Put a styled chapter number n at the beginning of element e."""
59 initial = e.makeelement("strong", Class="initial")
60 e.insert(0, initial)
61 initial.tail = ' '
62 if e.text is not None:
63 initial.tail += e.text
64 e.text = ''
65 initial.text = "%s." % n
68 class TocItem(object):
69 """This makes sense of the tuples from TOC.txt files"""
70 def __init__(self, status, chapter, title):
71 # status is
72 # 0 - section heading with no chapter
73 # 1 - chapter heading
74 # 2 - book title
76 # chapter is twiki name of the chapter
77 # title is a human readable name of the chapter.
78 self.status = status
79 self.chapter = chapter
80 self.title = title
82 def is_chapter(self):
83 return self.status == '1'
85 def is_section(self):
86 return self.status == '0'
88 def is_title(self):
89 return self.status == '2'
91 def __str__(self):
92 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
95 class Book(object):
96 page_numbers = 'latin'
97 preamble_page_numbers = 'roman'
99 def notify_watcher(self, message=None):
100 if self.watcher:
101 if message is None:
102 #message is the name of the caller
103 message = traceback.extract_stack(None, 2)[0][2]
104 log("notify_watcher called with '%s'" % message)
105 self.watcher(message)
107 def __enter__(self):
108 return self
110 def __exit__(self, exc_type, exc_value, traceback):
111 self.cleanup()
112 #could deal with exceptions here and return true
114 def __init__(self, book, server, bookname,
115 page_settings=None, watcher=None, isbn=None,
116 license=config.DEFAULT_LICENSE):
117 log("*** Starting new book %s ***" % bookname)
118 self.book = book
119 self.server = server
120 self.watcher = watcher
121 self.isbn = isbn
122 self.license = license
123 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
124 os.chmod(self.workdir, 0755)
125 defaults = config.SERVER_DEFAULTS[server]
126 self.lang = defaults['lang']
127 self.dir = defaults['dir']
129 self.body_html_file = self.filepath('body.html')
130 self.body_pdf_file = self.filepath('body.pdf')
131 self.preamble_html_file = self.filepath('preamble.html')
132 self.preamble_pdf_file = self.filepath('preamble.pdf')
133 self.tail_html_file = self.filepath('tail.html')
134 self.tail_pdf_file = self.filepath('tail.pdf')
135 self.isbn_pdf_file = None
136 self.pdf_file = self.filepath('final.pdf')
137 self.body_odt_file = self.filepath('body.odt')
139 self.publish_name = bookname
140 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
141 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
143 if page_settings is not None:
144 self.maker = PageSettings(**page_settings)
146 self.notify_watcher()
148 if config.TRY_BOOK_CLEANUP_ON_DEL:
149 #Dont even define __del__ if it is not used.
150 _try_cleanup_on_del = True
151 def __del__(self):
152 if self._try_cleanup_on_del and os.path.exists(self.workdir):
153 self._try_cleanup_on_del = False #or else you can get in bad cycles
154 self.cleanup()
156 def filepath(self, fn):
157 return os.path.join(self.workdir, fn)
159 def save_data(self, fn, data):
160 """Save without tripping up on unicode"""
161 if isinstance(data, unicode):
162 data = data.encode('utf8', 'ignore')
163 f = open(fn, 'w')
164 f.write(data)
165 f.close()
167 def save_tempfile(self, fn, data):
168 """Save the data in a temporary directory that will be cleaned
169 up when all is done. Return the absolute file path."""
170 fn = self.filepath(fn)
171 self.save_data(fn, data)
172 return fn
174 def make_oo_doc(self):
175 """Make an openoffice document, using the html2odt script."""
176 self.wait_for_xvfb()
177 html_text = lxml.etree.tostring(self.tree, method="html")
178 self.save_data(self.body_html_file, html_text)
179 run([config.HTML2ODT, self.workdir, self.body_html_file, self.body_odt_file])
180 log("Publishing %r as %r" % (self.body_odt_file, self.publish_file))
181 os.rename(self.body_odt_file, self.publish_file)
182 self.notify_watcher()
184 def extract_pdf_outline(self):
185 self.outline_contents, self.outline_text, number_of_pages = parse_outline(self.body_pdf_file, 1)
186 for x in self.outline_contents:
187 log(x)
188 self.notify_watcher()
189 return number_of_pages
191 def make_body_pdf(self):
192 """Make a pdf of the HTML, using webkit"""
193 #1. Save the html
194 html_text = lxml.etree.tostring(self.tree, method="html")
195 self.save_data(self.body_html_file, html_text)
197 #2. Make a pdf of it
198 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file, outline=True)
199 self.notify_watcher('generate_pdf')
201 n_pages = self.extract_pdf_outline()
203 log ("found %s pages in pdf" % n_pages)
204 #4. resize pages, shift gutters, even pages
205 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
206 self.notify_watcher('reshape_pdf')
208 #5 add page numbers
209 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
210 numbers=self.page_numbers)
211 self.notify_watcher("number_pdf")
212 self.notify_watcher()
214 def make_preamble_pdf(self):
215 contents = self.make_contents()
216 inside_cover_html = self.compose_inside_cover()
217 html = ('<html dir="%s"><head>\n'
218 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
219 '<link rel="stylesheet" href="%s" />\n'
220 '</head>\n<body>\n'
221 '<h1 class="frontpage">%s</h1>'
222 '%s\n'
223 '<div class="contents">%s</div>\n'
224 '<div style="page-break-after: always; color:#fff" class="unseen">.'
225 '<!--%s--></div></body></html>'
226 ) % (self.dir, self.css_url, self.title, inside_cover_html,
227 contents, self.title)
228 self.save_data(self.preamble_html_file, html)
230 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file)
232 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
234 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
235 numbers=self.preamble_page_numbers,
236 number_start=-2)
238 self.notify_watcher()
240 def make_end_matter_pdf(self):
241 """Make an inside back cover and a back cover. If there is an
242 isbn number its barcode will be put on the back cover."""
243 if self.isbn:
244 self.isbn_pdf_file = self.filepath('isbn.pdf')
245 self.maker.make_barcode_pdf(self.isbn, self.isbn_pdf_file)
246 self.notify_watcher('make_barcode_pdf')
248 self.save_data(self.tail_html_file, self.compose_end_matter())
249 self.maker.make_raw_pdf(self.tail_html_file, self.tail_pdf_file)
251 self.maker.reshape_pdf(self.tail_pdf_file, self.dir, centre_start=True,
252 centre_end=True, even_pages=False)
253 self.notify_watcher()
255 def make_book_pdf(self):
256 """A convenient wrapper of a few necessary steps"""
257 # now the Xvfb server is needed. make sure it has had long enough to get going
258 self.wait_for_xvfb()
259 self.make_body_pdf()
260 self.make_preamble_pdf()
261 self.make_end_matter_pdf()
263 concat_pdfs(self.pdf_file, self.preamble_pdf_file,
264 self.body_pdf_file, self.tail_pdf_file,
265 self.isbn_pdf_file)
267 self.notify_watcher('concatenated_pdfs')
270 def make_simple_pdf(self, mode):
271 """Make a simple pdf document without contents or separate
272 title page. This is used for multicolumn newspapers and for
273 web-destined pdfs."""
274 self.wait_for_xvfb()
275 #0. Add heading to begining of html
276 body = list(self.tree.cssselect('body'))[0]
277 e = body.makeelement('h1', {'id': 'book-title'})
278 e.text = self.title
279 body.insert(0, e)
280 intro = lxml.html.fragment_fromstring(self.compose_inside_cover())
281 e.addnext(intro)
283 #0.5 adjust parameters to suit the particular kind of output
284 if mode == 'web':
285 self.maker.gutter = 0
287 #1. Save the html
288 html_text = lxml.etree.tostring(self.tree, method="html")
289 self.save_data(self.body_html_file, html_text)
291 #2. Make a pdf of it (direct to to final pdf)
292 self.maker.make_raw_pdf(self.body_html_file, self.pdf_file, outline=True)
293 self.notify_watcher('generate_pdf')
294 n_pages = count_pdf_pages(self.pdf_file)
296 if mode != 'web':
297 #3. resize pages and shift gutters.
298 self.maker.reshape_pdf(self.pdf_file, self.dir, centre_end=True)
299 self.notify_watcher('reshape_pdf')
301 #4. add page numbers
302 self.maker.number_pdf(self.pdf_file, n_pages,
303 dir=self.dir, numbers=self.page_numbers)
304 self.notify_watcher("number_pdf")
305 self.notify_watcher()
308 def rotate180(self):
309 """Rotate the pdf 180 degrees so an RTL book can print on LTR
310 presses."""
311 rotated = self.filepath('final-rotate.pdf')
312 unrotated = self.filepath('final-pre-rotate.pdf')
313 #leave the unrotated pdf intact at first, in case of error.
314 rotate_pdf(self.pdf_file, rotated)
315 os.rename(self.pdf_file, unrotated)
316 os.rename(rotated, self.pdf_file)
317 self.notify_watcher()
319 def publish_pdf(self):
320 """Move the finished PDF to its final resting place"""
321 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
322 os.rename(self.pdf_file, self.publish_file)
323 self.notify_watcher()
325 def get_twiki_metadata(self):
326 """Get information about a twiki book (as much as is easy and useful)."""
327 if not hasattr(self, 'toc'):
328 self.load_toc()
330 title_map = {}
331 authors = {}
332 meta = {
333 'language': self.lang,
334 'identifier': 'http://%s/epub/%s/%s' %(self.server, self.book, time.strftime('%Y.%m.%d-%H.%M.%S')),
335 'publisher': 'FLOSS Manuals http://flossmanuals.net',
336 'date': time.strftime('%Y-%m-%d'),
337 'fm:server': self.server,
338 'fm:book': self.book,
339 'title': self.book,
341 spine = []
342 toc = []
343 section = toc
344 for t in self.toc:
345 if t.is_chapter():
346 spine.append(t.chapter)
347 section.append((t.title, t.chapter))
348 title_map[t.title] = t.chapter
349 elif t.is_section():
350 section = []
351 toc.append([[t.title, None], section])
352 elif t.is_title():
353 meta['title'] = t.title
355 author_copyright, chapter_copyright = twiki_wrapper.get_book_copyright(self.server, self.book, title_map)
357 return {
358 'metadata': meta,
359 'TOC': toc,
360 'spine': spine,
361 'copyright': author_copyright,
362 #'chapter_copyright': chapter_copyright,
365 def load_toc(self):
366 """From the TOC.txt file create a list of TocItems with
367 the attributes <status>, <chapter>, and <title>.
369 <status> is a number, with the following meaning:
371 0 - section heading with no chapter
372 1 - chapter heading
373 2 - book title
375 The TocItem object has convenience functions <is_chapter> and
376 <is_section>.
378 <chapter> is twiki name of the chapter.
380 <title> is a human readable title for the chapter. It is likely to
381 differ from the title given in the chapter's <h1> heading.
383 self.toc = []
384 for status, chapter, title in twiki_wrapper.toc_iterator(self.server, self.book):
385 self.toc.append(TocItem(status, chapter, title))
386 self.notify_watcher()
388 def load_book(self):
389 """Fetch and parse the raw html of the book. Links in the
390 document will be made absolute."""
391 html = twiki_wrapper.get_book_html(self.server, self.book, self.dir)
392 self.save_tempfile('raw.html', html)
394 self.tree = lxml.html.document_fromstring(html)
395 self.tree.make_links_absolute(config.BOOK_URL % (self.server, self.book))
396 self.headings = [x for x in self.tree.cssselect('h1')]
397 if self.headings:
398 self.headings[0].set('class', "first-heading")
399 for h1 in self.headings:
400 h1.title = h1.text_content().strip()
401 self.notify_watcher()
403 def load(self):
404 """Wrapper around all necessary load methods."""
405 self.load_book()
406 self.load_toc()
408 def make_contents(self):
409 """Generate HTML containing the table of contents. This can
410 only be done after the main PDF has been made."""
411 header = '<h1>Table of Contents</h1><table class="toc">\n'
412 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
413 '<td class="pagenumber">%s</td></tr>\n')
414 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
415 footer = '\n</table>'
417 contents = []
419 chapter = 1
420 page_num = 1
421 subsections = [] # for the subsection heading pages.
423 outline_contents = iter(self.outline_contents)
424 headings = iter(self.headings)
426 for t in self.toc:
427 if t.is_chapter():
428 try:
429 h1 = headings.next()
430 except StopIteration:
431 log("heading not found for %s (previous h1 missing?). Stopping" % t)
432 break
433 h1_text, level, page_num = outline_contents.next()
434 log("%r %r" % (h1.title, h1_text))
435 contents.append(row_tmpl % (chapter, h1.title, page_num))
436 chapter += 1
437 elif t.is_section():
438 contents.append(section_tmpl % t.title)
439 else:
440 log("mystery TOC item: %s" % t)
442 doc = header + '\n'.join(contents) + footer
443 self.notify_watcher()
444 return doc
446 def add_section_titles(self):
447 """Add any section heading pages that the TOC.txt file
448 specifies. These are sub-book, super-chapter groupings.
450 Also add initial numbers to chapters.
452 headings = iter(self.headings)
453 chapter = 1
454 section = None
456 for t in self.toc:
457 if t.is_chapter() and section is not None:
458 try:
459 h1 = headings.next()
460 except StopIteration:
461 log("heading not found for %s (previous h1 missing?)" % t)
462 break
463 item = h1.makeelement('div', Class='chapter')
464 log(h1.title, debug='HTMLGEN')
465 item.text = h1.title
466 _add_initial_number(item, chapter)
468 section.append(item)
470 if not section_placed:
471 log("placing section", debug='HTMLGEN')
472 h1.addprevious(section)
473 section_placed = True
474 else:
475 log("NOT placing section", debug='HTMLGEN')
477 #put a bold number at the beginning of the h1.
478 _add_initial_number(h1, chapter)
479 chapter += 1
481 elif t.is_section():
482 section = self.tree.makeelement('div', Class="subsection")
483 # section Element complains when you try to ask it whether it
484 # has been placed (though it does know)
485 section_placed = False
486 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
487 heading.set("Class", "subsection-heading")
488 section.append(heading)
490 self.notify_watcher()
493 def add_css(self, css=None, mode='book'):
494 """If css looks like a url, use it as a stylesheet link.
495 Otherwise it is the CSS itself, which is saved to a temporary file
496 and linked to."""
497 log("css is %r" % css)
498 htmltree = self.tree
499 if css is None or not css.strip():
500 defaults = config.SERVER_DEFAULTS[self.server]
501 url = 'file://' + os.path.abspath(defaults['css-%s' % mode])
502 elif not re.match(r'^http://\S+$', css):
503 fn = self.save_tempfile('objavi.css', css)
504 url = 'file://' + fn
505 else:
506 url = css
507 #XXX for debugging and perhaps sensible anyway
508 #url = url.replace('file:///home/douglas/objavi2', '')
511 #find the head -- it's probably first child but lets not assume.
512 for child in htmltree:
513 if child.tag == 'head':
514 head = child
515 break
516 else:
517 head = htmltree.makeelement('head')
518 htmltree.insert(0, head)
520 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
521 self.css_url = url
522 self.notify_watcher()
523 return url
525 def set_title(self, title=None):
526 """If a string is supplied, it becomes the book's title.
527 Otherwise a guess is made."""
528 if title:
529 self.title = title
530 else:
531 titles = [x.text_content() for x in self.tree.cssselect('title')]
532 if titles and titles[0]:
533 self.title = titles[0]
534 else:
535 #oh well
536 self.title = 'A Manual About ' + self.book
537 return self.title
539 def _read_localised_template(self, template, fallbacks=['en']):
540 """Try to get the template in the approriate language, otherwise in english."""
541 for lang in [self.lang] + fallbacks:
542 try:
543 fn = template % (lang)
544 f = open(fn)
545 break
546 except IOError, e:
547 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
548 log(e)
549 template = f.read()
550 f.close()
551 return template
553 def compose_inside_cover(self):
554 """create the markup for the preamble inside cover."""
555 template = self._read_localised_template(config.INSIDE_FRONT_COVER_TEMPLATE)
557 if self.isbn:
558 isbn_text = '<b>ISBN :</b> %s <br>' % self.isbn
559 else:
560 isbn_text = ''
562 return template % {'date': time.strftime('%Y-%m-%d'),
563 'isbn': isbn_text,
564 'license': self.license,
568 def compose_end_matter(self):
569 """create the markup for the end_matter inside cover. If
570 self.isbn is not set, the html will result in a pdf that
571 spills onto two pages.
573 template = self._read_localised_template(config.END_MATTER_TEMPLATE)
575 d = {'css_url': self.css_url,
576 'title': self.title
579 if self.isbn:
580 d['inside_cover_style'] = ''
581 else:
582 d['inside_cover_style'] = 'page-break-after: always'
584 return template % d
589 def spawn_x(self):
590 """Start an Xvfb instance, using a new server number. A
591 reference to it is stored in self.xvfb, which is used to kill
592 it when the pdf is done.
594 Note that Xvfb doesn't interact well with dbus which is
595 present on modern desktops.
597 #Find an unused server number (in case two cgis are running at once)
598 while True:
599 servernum = random.randrange(50, 500)
600 if not os.path.exists('/tmp/.X%s-lock' % servernum):
601 break
603 self.xserver_no = ':%s' % servernum
605 authfile = self.filepath('Xauthority')
606 os.environ['XAUTHORITY'] = authfile
608 #mcookie(1) eats into /dev/random, so avoid that
609 from hashlib import md5
610 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
611 mcookie = m.hexdigest()
613 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
615 self.xvfb = Popen(['Xvfb', self.xserver_no,
616 '-screen', '0', '1024x768x24',
617 '-pixdepths', '32',
618 #'-blackpixel', '0',
619 #'-whitepixel', str(2 ** 24 -1),
620 #'+extension', 'Composite',
621 '-dpi', '96',
622 '-kb',
623 '-nolisten', 'tcp',
626 # We need to wait a bit before the Xvfb is ready. but the
627 # downloads are so slow that that probably doesn't matter
629 self.xvfb_ready_time = time.time() + 2
631 os.environ['DISPLAY'] = self.xserver_no
632 log(self.xserver_no)
634 def wait_for_xvfb(self):
635 """wait until a previously set time before continuing. This
636 is so Xvfb has time to properly start."""
637 if hasattr(self, 'xvfb'):
638 d = self.xvfb_ready_time - time.time()
639 if d > 0:
640 time.sleep(d)
641 self.notify_watcher()
643 def cleanup_x(self):
644 """Try very hard to kill off Xvfb. In addition to killing
645 this instance's xvfb, occasionally (randomly) search for
646 escaped Xvfb instances and kill those too."""
647 if not hasattr(self, 'xvfb'):
648 return
649 check_call(['xauth', 'remove', self.xserver_no])
650 p = self.xvfb
651 log("trying to kill Xvfb %s" % p.pid)
652 os.kill(p.pid, 15)
653 for i in range(10):
654 if p.poll() is not None:
655 log("%s died with %s" % (p.pid, p.poll()))
656 break
657 log("%s not dead yet" % p.pid)
658 time.sleep(0.2)
659 else:
660 log("Xvfb would not die! kill -9! kill -9!")
661 os.kill(p.pid, 9)
663 if random.random() < 0.1:
664 # occasionally kill old xvfbs and soffices, if there are any.
665 self.kill_old_processes()
667 def kill_old_processes(self):
668 """Sometimes, despite everything, Xvfb or soffice instances
669 hang around well after they are wanted -- for example if the
670 cgi process dies particularly badly. So kill them if they have
671 been running for a long time."""
672 log("running kill_old_processes")
673 p = Popen(['ps', '-C' 'Xvfb soffice soffice.bin html2odt ooffice wkhtmltopdf',
674 '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
675 data = p.communicate()[0].strip()
676 if data:
677 lines = data.split('\n')
678 for line in lines:
679 log('dealing with ps output "%s"' % line)
680 try:
681 pid, days, hours, minutes, seconds \
682 = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$', line).groups()
683 except AttributeError:
684 log("Couldn't parse that line!")
685 # 50 minutes should be enough xvfb time for anyone
686 if days or hours or int(minutes) > 50:
687 log("going to kill pid %s" % pid)
688 os.kill(int(pid), 15)
689 time.sleep(0.5)
690 try:
691 os.kill(int(pid), 9)
692 log('killing %s with -9')
693 except OSError, e:
694 pass
695 self.notify_watcher()
697 def cleanup(self):
698 self.cleanup_x()
699 if not config.KEEP_TEMP_FILES:
700 for fn in os.listdir(self.workdir):
701 os.remove(os.path.join(self.workdir, fn))
702 os.rmdir(self.workdir)
703 else:
704 log("NOT removing '%s', containing the following files:" % self.workdir)
705 log(*os.listdir(self.workdir))
707 self.notify_watcher()
711 def fetch_zip(server, book, project):
712 from urllib2 import urlopen
713 settings = config.SERVER_DEFAULTS[server]
714 interface = settings['interface']
715 if interface == 'Booki':
716 url = config.BOOKI_ZIP_URL % {'server': server, 'project': project, 'book':book}
717 f = urlopen(url)
718 elif interface == 'TWiki':
719 url = config.TWIKI_GATEWAY_URL % (HTTP_HOST, server, book)
720 f = urlopen(url)
721 elif interface == 'local':
722 f = open('%s/%s.zip' % (config.BOOKI_BOOK_DIR, book))
723 else:
724 raise NotImplementedError("Can't handle '%s' interface" % interface)
725 if hasattr(f, 'geturl'):
726 log(f.geturl())
727 blob = f.read()
728 f.close()
729 return blob
731 class ZipBook(Book):
732 """A Book based on a booki-zip file. Depending how out-of-date
733 this docstring is, some of the parent's methods will not work.
735 def __init__(self, server, book, project=None, **kwargs):
736 blob = fetch_zip(server, book, project)
737 f = StringIO(blob)
738 self.store = zipfile.ZipFile(f, 'r')
739 self.info = json.loads(self.store.read('info.json'))
740 metadata = self.info['metadata']
742 if server == config.LOCALHOST:
743 server = metadata.get('fm:server', server)
744 book = metadata.get('fm:book', book)
746 bookname = make_book_name(book, server)
748 Book.__init__(self, book, server, bookname, **kwargs)
749 self.set_title(metadata['title'])
750 self.project = project
751 self.epubfile = self.filepath('%s.epub' % self.book)
753 def make_epub(self, use_cache=False):
754 """Make an epub version of the book, using Mike McCabe's
755 epub module for the Internet Archive."""
756 ebook = ia_epub.Book(self.epubfile, content_dir='')
757 manifest = self.info['manifest']
758 metadata = self.info['metadata']
759 toc = self.info['TOC']
760 spine = self.info['spine']
762 #manifest
763 filemap = {} #reformulated manifest for NCX
764 for ID in manifest:
765 fn, mediatype = manifest[ID]
766 oldfn = fn
767 log(ID, fn, mediatype)
768 content = self.store.read(fn)
769 if mediatype == 'text/html':
770 log('CONVERTING')
771 #convert to application/xhtml+xml
772 c = EpubChapter(self.server, self.book, ID, content,
773 use_cache=use_cache)
774 c.remove_bad_tags()
775 c.prepare_for_epub()
776 content = c.as_xhtml()
777 fn = fn[:-5] + '.xhtml'
778 mediatype = 'application/xhtml+xml'
779 if mediatype == 'application/xhtml+xml':
780 filemap[oldfn] = fn
781 #log(fn, mediatype)
783 info = {'id': ID.encode('utf-8'),
784 'href': fn.encode('utf-8'),
785 'media-type': mediatype.encode('utf-8')}
786 ebook.add_content(info, content)
788 #toc
789 ncx = epub_utils.make_ncx(toc, metadata, filemap)
790 ebook.add(ebook.content_dir + 'toc.ncx', ncx)
792 #spine
793 for ID in spine:
794 ebook.add_spine_item({'idref': ID})
796 #metadata -- no use of attributes (yet)
797 # and fm: metadata disappears for now
798 dcns = config.DCNS
799 meta_info_items = [{'item': dcns + 'creator',
800 'text': 'The Contributors'}
802 for k, v in metadata.iteritems():
803 if k.startswith('fm:'):
804 continue
805 meta_info_items.append({'item': dcns + k,
806 'text': v}
809 #copyright
810 authors = sorted(self.info['copyright'])
811 for a in authors:
812 meta_info_items.append({'item': dcns + 'contributor',
813 'text': a}
815 meta_info_items.append({'item': dcns + 'rights',
816 'text': 'This book is free. Copyright %s' % (', '.join(authors))}
819 tree_str = ia_epub.make_opf(meta_info_items,
820 ebook.manifest_items,
821 ebook.spine_items,
822 ebook.guide_items,
823 ebook.cover_id)
824 ebook.add(ebook.content_dir + 'content.opf', tree_str)
825 ebook.z.close()
828 def publish_s3(self):
829 """Push the book's epub to archive.org, using S3."""
830 #XXX why only epub?
831 secrets = {}
832 for x in ('S3_SECRET', 'S3_ACCESSKEY'):
833 fn = getattr(config, x)
834 f = open(fn)
835 secrets[x] = f.read().strip()
836 f.close()
838 log(secrets)
839 now = time.strftime('%F')
840 s3url = 'http://s3.us.archive.org/booki-%s-%s/%s-%s.epub' % (self.project, self.book, self.book, now)
841 detailsurl = 'http://archive.org/details/booki-%s-%s' % (self.project, self.book)
842 headers = [
843 'x-amz-auto-make-bucket:1',
844 "authorization: LOW %(S3_ACCESSKEY)s:%(S3_SECRET)s" % secrets,
845 'x-archive-meta-mediatype:texts',
846 'x-archive-meta-collection:opensource',
847 'x-archive-meta-title:%s' %(self.book,),
848 'x-archive-meta-date:%s' % (now,),
849 'x-archive-meta-creator:FLOSS Manuals Contributors',
852 if self.license in config.LICENSES:
853 headers.append('x-archive-meta-licenseurl:%s' % config.licenses[self.license])
855 argv = ['curl', '--location',]
856 for h in headers:
857 argv.extend(('--header', h))
858 argv.extend(('--upload-file', self.epubfile, s3url,))
860 log(argv)
861 check_call(argv)
862 return detailsurl
864 def publish_epub(self):
865 self.epubfile = shift_file(self.epubfile, config.EPUB_DIR)
866 return self.epubfile