clarifications and cleanups: debug noise, comments, variable unpacking
[objavi2.git] / fmbook.py
blobeaca0adaedaf15f35271c0d944e4080de15be964
1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
3 # their interactions.
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
22 PDF"""
24 import os, sys
25 import tempfile
26 import re, time
27 import random
28 from urllib2 import urlopen
29 from subprocess import Popen, check_call, PIPE
31 import lxml.etree, lxml.html
32 import lxml, lxml.html, lxml.etree
34 import config
35 from config import SERVER_DEFAULTS, DEFAULT_SERVER, POINT_2_MM, PDFEDIT_MAX_PAGES
37 TMPDIR = os.path.abspath(config.TMPDIR)
38 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH = "%s/books/" % DOC_ROOT
42 def log(*messages, **kwargs):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs or config.DEBUG_ALL or kwargs['debug'] in config.DEBUG_MODES:
47 for m in messages:
48 try:
49 print >> sys.stderr, m
50 except Exception:
51 print >> sys.stderr, repr(m)
53 def _add_initial_number(e, n):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial = e.makeelement("strong", Class="initial")
56 e.insert(0, initial)
57 initial.tail = ' '
58 if e.text is not None:
59 initial.tail += e.text
60 e.text = ''
61 initial.text = "%s." % n
63 def _add_chapter_cookie(e):
64 """add magic hidden text to help with contents generation"""
65 cookie = e.makeelement("span", Class="heading-cookie", dir="ltr",
66 style="font-size:6pt; line-height: 6pt; color: #fff; width:0;"
67 " float:left; margin:-2em; z-index: -67; display: block;"
69 cookie.text = ''.join(random.choice(config.CHAPTER_COOKIE_CHARS) for x in range(8))
70 e.cookie = cookie.text
71 e.addnext(cookie)
72 #e.append(cookie)
75 class TocItem(object):
76 """This makes sense of the tuples from TOC.txt files"""
77 def __init__(self, status, chapter, title):
78 # status is
79 # 0 - section heading with no chapter
80 # 1 - chapter heading
81 # 2 - book title
83 # chapter is twiki name of the chapter
84 # title is a human readable name of the chapter.
85 self.status = status
86 self.chapter = chapter
87 self.title = title
89 def is_chapter(self):
90 return self.status == '1'
92 def is_section(self):
93 return self.status == '0'
95 def __str__(self):
96 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
99 def run(cmd):
100 try:
101 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
102 out, err = p.communicate()
103 except Exception:
104 log("Failed on command: %r" % cmd)
105 raise
106 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
107 (' '.join(cmd), cmd[0], p.poll(), out, err))
110 def find_containing_paper(w, h):
111 size = None
112 for name, pw, ph in config.PAPER_SIZES:
113 if pw >= w and ph >= h:
114 mw = (pw - w) * 0.5
115 mh = (ph - h) * 0.5
116 return (name, mw, mh)
118 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
119 (w * POINT_2_MM, h * POINT_2_MM))
123 class PageSettings(object):
124 """Calculates and wraps commands for the generation and processing
125 of PDFs"""
126 def __init__(self, pointsize, **kwargs):
127 # the formulas for default gutters, margins and column margins
128 # are quite ad-hoc and certainly improvable.
130 self.width, self.height = pointsize
131 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
133 self.gutter = kwargs.get('gutter', (config.BASE_GUTTER +
134 config.PROPORTIONAL_GUTTER * self.width))
136 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
137 self.top_margin = kwargs.get('top_margin', default_margin)
138 self.side_margin = kwargs.get('top_margin', default_margin)
139 self.bottom_margin = kwargs.get('top_margin', default_margin)
140 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
141 self.columns = kwargs.get('columns', 1)
143 self.column_margin = kwargs.get('column_margin', default_margin * 2 / (4.0 + self.columns))
145 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
146 self.number_margin = self.side_margin
148 # calculate margins in mm for browsers
149 self.margins = []
150 for m, clip in ((self.top_margin, clipy),
151 (self.side_margin, clipx + 0.5 * self.gutter),
152 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
153 (self.side_margin, clipx + 0.5 * self.gutter),
155 if m is None:
156 m = default_margin
157 self.margins.append((m + clip) * POINT_2_MM)
159 for x in locals().iteritems():
160 log("%s: %s" % x, debug='PDFGEN')
161 for x in dir(self):
162 log("%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
166 def _webkit_command(self, html, pdf, outline=False):
167 m = [str(x) for x in self.margins]
168 outline_args = ['--outline'] * outline
169 cmd = ([config.WKHTMLTOPDF, '-q', '-s', self.papersize,
170 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
171 ] + outline_args +
172 config.WKHTMLTOPDF_EXTRA_COMMANDS + [html, pdf])
173 log(' '.join(cmd))
174 return cmd
176 def _gecko_command(self, html, pdf, outline=False):
177 m = [str(x) for x in self.margins]
178 #firefox -P pdfprint -print URL -printprinter "printer_settings"
179 cmd = [config.FIREFOX, '-P', 'pdfprint', '-print',
180 html, '-printprinter', self.moz_printer]
181 log(' '.join(cmd))
182 return cmd
184 def make_raw_pdf(self, html, pdf, engine='webkit', outline=False):
185 func = getattr(self, '_%s_command' % engine)
186 if self.columns == 1:
187 cmd = func(html, pdf, outline=outline)
188 run(cmd)
189 else:
190 printable_width = self.width - 2.0 * self.side_margin - self.gutter
191 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
192 page_width = column_width + self.column_margin
194 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
195 gutter=0, top_margin=self.top_margin,
196 side_margin=self.column_margin * 0.5,
197 bottom_margin=self.bottom_margin)
199 column_pdf = pdf[:-4] + '-single-column.pdf'
200 columnmaker.make_raw_pdf(html, column_pdf, engine=engine)
201 columnmaker.reshape_pdf(column_pdf)
203 cmd = ['pdfnup',
204 '--nup', '%sx1' % int(self.columns),
205 '--paper', self.papersize.lower() + 'paper',
206 '--outfile', pdf,
207 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
208 '--noautoscale', 'true',
209 '--orient', 'portrait',
210 #'--tidy', 'false',
211 column_pdf
214 run(cmd)
218 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
219 even_pages=True):
220 """Spin the pdf for RTL text, resize it to the right size, and
221 shift the gutter left and right"""
222 ops = 'resize'
223 if self.gutter:
224 ops += ',shift'
225 if even_pages:
226 ops += ',even_pages'
227 gutter = self.gutter
228 if dir == 'RTL':
229 gutter = -gutter
230 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
231 'dir=%s' % dir,
232 'filename=%s' % pdf,
233 'output_filename=%s' % pdf,
234 'operation=%s' % ops,
235 'width=%s' % self.width,
236 'height=%s' % self.height,
237 'offset=%s' % gutter,
238 'centre_start=%s' % centre_start,
239 'centre_end=%s' % centre_end,
241 run(cmd)
243 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
244 number_start=1):
245 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
246 'operation=page_numbers',
247 'dir=%s' % dir,
248 'filename=%s' % pdf,
249 'output_filename=%s' % pdf,
250 'number_start=%s' % number_start,
251 'number_style=%s' % numbers,
252 'number_bottom=%s' % self.number_bottom,
253 'number_margin=%s' % self.number_margin,
255 run(cmd)
257 def number_pdf(self, pdf, pages, **kwargs):
258 # if there are too many pages for pdfedit to handle in one go,
259 # split the job into bits. <pages> may not be exact
260 if pages is None or pages <= PDFEDIT_MAX_PAGES:
261 self._number_pdf(pdf, **kwargs)
262 else:
263 # section_size must be even
264 sections = pages // PDFEDIT_MAX_PAGES + 1
265 section_size = (pages // sections + 2) & ~1
267 pdf_sections = []
268 s = kwargs.pop('number_start', 1)
269 while s < pages:
270 e = s + section_size - 1
271 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
272 if e < pages - 1:
273 page_range = '%s-%s' % (s, e)
274 else:
275 page_range = '%s-end' % s
276 run(['pdftk',
277 pdf,
278 'cat',
279 page_range,
280 'output',
281 pdf_section,
283 self._number_pdf(pdf_section, number_start=s, **kwargs)
284 pdf_sections.append(pdf_section)
285 s = e + 1
287 concat_pdfs(pdf, *pdf_sections)
289 def make_barcode_pdf(self, isbn, pdf, corner='br'):
290 """Put an ISBN barcode in a corner of a single blank page."""
292 position = '%s,%s,%s,%s,%s' %(corner, self.width, self.height, self.side_margin, self.bottom_margin)
293 cmd1 = [config.BOOKLAND,
294 '--position', position,
295 str(isbn)]
296 cmd2 = ['ps2pdf',
297 '-dFIXEDMEDIA',
298 '-dDEVICEWIDTHPOINTS=%s' % self.width,
299 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
300 '-', pdf]
302 p1 = Popen(cmd1, stdout=PIPE)
303 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
304 out, err = p2.communicate()
306 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
307 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
312 def concat_pdfs(name, *args):
313 """Join all the named pdfs together into one and save it as <name>"""
314 cmd = ['pdftk']
315 cmd.extend(x for x in args if x is not None)
316 cmd += ['cat', 'output', name]
317 run(cmd)
319 def index_pdf(pdf, text=None):
320 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
321 separate pages."""
322 if text is None:
323 text = pdf + '.index.txt'
324 cmd = ['pdftotext',
325 #'-layout', #keeps more original formatting
326 pdf,
327 text]
328 run(cmd)
329 return text
331 def rotate_pdf(pdfin, pdfout):
332 """Turn the PDF on its head"""
333 cmd = ['pdftk', pdfin,
334 'cat',
335 '1-endD',
336 'output',
337 pdfout
339 run(cmd)
342 class Book(object):
343 page_numbers = 'latin'
344 preamble_page_numbers = 'roman'
345 engine= 'webkit'
346 _try_cleanup_on_del = True
348 def notify_watcher(self, message=None):
349 if self.watcher:
350 if message is None:
351 #message is the name of the caller
352 #XXX look at using inspect module
353 import traceback
354 message = traceback.extract_stack(None, 2)[0][2]
355 log("notify_watcher called with '%s'" % message)
356 self.watcher(message)
358 def __enter__(self):
359 return self
361 def __exit__(self, exc_type, exc_value, traceback):
362 self.cleanup()
363 #could deal with exceptions here and return true
365 def __init__(self, book, server, bookname,
366 page_settings=None, engine=None, watcher=None, isbn=None,
367 license=config.DEFAULT_LICENSE):
368 log("*** Starting new book %s ***" % bookname)
369 self.book = book
370 self.server = server
371 self.watcher = watcher
372 self.isbn = isbn
373 self.license = license
374 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
375 os.chmod(self.workdir, 0755)
376 defaults = SERVER_DEFAULTS.get(server, SERVER_DEFAULTS[DEFAULT_SERVER])
377 self.default_css = defaults['css']
378 self.lang = defaults['lang']
379 self.dir = defaults['dir']
381 self.body_html_file = self.filepath('body.html')
382 self.body_pdf_file = self.filepath('body.pdf')
383 self.body_index_file = self.filepath('body.txt')
384 self.preamble_html_file = self.filepath('preamble.html')
385 self.preamble_pdf_file = self.filepath('preamble.pdf')
386 self.tail_html_file = self.filepath('tail.html')
387 self.tail_pdf_file = self.filepath('tail.pdf')
388 self.isbn_pdf_file = None
389 self.pdf_file = self.filepath('final.pdf')
391 self.publish_name = bookname
392 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
393 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
395 self.book_url = config.BOOK_URL % (self.server, self.book)
396 self.toc_url = config.TOC_URL % (self.server, self.book)
398 self.set_page_dimensions(page_settings)
400 if engine is not None:
401 self.engine = engine
402 self.notify_watcher()
404 def __del__(self):
405 if os.path.exists(self.workdir) and self._try_cleanup_on_del:
406 self._try_cleanup_on_del = False #or else you can get in bad cycles
407 self.cleanup()
409 def __getattr__(self, attr):
410 """catch unloaded books and load them"""
411 #log('looking for missing attribute "%s"' % (attr))
412 if attr == 'tree':
413 self.load_book()
414 return self.tree
415 if attr == 'toc':
416 self.load_toc()
417 return self.toc
418 raise AttributeError("no such member: '%s'" % attr)
421 def filepath(self, fn):
422 return os.path.join(self.workdir, fn)
424 def save_data(self, fn, data):
425 """Save without tripping up on unicode"""
426 if isinstance(data, unicode):
427 data = data.encode('utf8', 'ignore')
428 f = open(fn, 'w')
429 f.write(data)
430 f.close()
432 def save_tempfile(self, fn, data):
433 """Save the data in a temporary directory that will be cleaned
434 up when all is done. Return the absolute file path."""
435 fn = self.filepath(fn)
436 self.save_data(fn, data)
437 return fn
439 def set_page_dimensions(self, dimensions):
440 self.maker = PageSettings(**dimensions)
443 def extract_pdf_text(self):
444 """Extract the text from the body pdf, split into pages, so
445 that the correct page can be found to generate the table of
446 contents."""
447 index_pdf(self.body_pdf_file, self.body_index_file)
448 f = open(self.body_index_file)
449 s = unicode(f.read(), 'utf8')
450 f.close()
451 #pages are spearated by formfeed character "^L", "\f" or chr(12)
452 self.text_pages = s.split("\f")
453 #there is sometimes (probably always) an unwanted ^L at the end
454 return len(self.text_pages)
456 def make_body_pdf(self):
457 """Make a pdf of the HTML, using webkit"""
458 #1. Save the html
459 html_text = lxml.etree.tostring(self.tree, method="html")
460 self.save_data(self.body_html_file, html_text)
462 #2. Make a pdf of it
463 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file,
464 engine=self.engine)
465 self.notify_watcher('generate_pdf')
467 #3. extract the text for finding contents.
468 n_pages = self.extract_pdf_text()
469 log ("found %s pages in pdf" % n_pages)
470 #4. resize pages, shift gutters, even pages
471 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
472 self.notify_watcher('reshape_pdf')
474 #5 add page numbers
475 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
476 numbers=self.page_numbers)
477 self.notify_watcher("number_pdf")
478 self.notify_watcher()
480 def make_preamble_pdf(self):
481 contents = self.make_contents()
482 inside_cover_html = self.compose_inside_cover()
483 html = ('<html dir="%s"><head>\n'
484 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
485 '<link rel="stylesheet" href="%s" />\n'
486 '</head>\n<body>\n'
487 '<h1 class="frontpage">%s</h1>'
488 '%s\n'
489 '<div class="contents">%s</div>\n'
490 '<div style="page-break-after: always; color:#fff" class="unseen">.'
491 '<!--%s--></div></body></html>'
492 ) % (self.dir, self.css_url, self.title, inside_cover_html,
493 contents, self.title)
494 self.save_data(self.preamble_html_file, html)
496 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file,
497 engine=self.engine)
499 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
501 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
502 numbers=self.preamble_page_numbers,
503 number_start=-2)
505 self.notify_watcher()
507 def make_end_matter_pdf(self):
508 """Make an inside back cover and a back cover. If there is an
509 isbn number its barcode will be put on the back cover."""
510 if self.isbn:
511 self.isbn_pdf_file = self.filepath('isbn.pdf')
512 self.maker.make_barcode_pdf(self.isbn, self.isbn_pdf_file)
513 self.notify_watcher('make_barcode_pdf')
515 self.save_data(self.tail_html_file, self.compose_end_matter())
516 self.maker.make_raw_pdf(self.tail_html_file, self.tail_pdf_file,
517 engine=self.engine)
519 self.maker.reshape_pdf(self.tail_pdf_file, self.dir, centre_start=True,
520 centre_end=True, even_pages=False)
521 self.notify_watcher()
523 def make_book_pdf(self):
524 """A convenient wrapper of a few necessary steps"""
525 # now the Xvfb server is needed. make sure it has had long enough to get going
526 self.wait_for_xvfb()
527 self.make_body_pdf()
528 self.make_preamble_pdf()
529 self.make_end_matter_pdf()
531 concat_pdfs(self.pdf_file, self.preamble_pdf_file,
532 self.body_pdf_file, self.tail_pdf_file,
533 self.isbn_pdf_file)
535 self.notify_watcher('concatenated_pdfs')
538 def make_simple_pdf(self, mode):
539 """Make a simple pdf document without contents or separate
540 title page. This is used for multicolumn newspapers and for
541 web-destined pdfs."""
542 self.wait_for_xvfb()
543 #0. Add heading to begining of html
544 body = list(self.tree.cssselect('body'))[0]
545 e = body.makeelement('h1', {'id': 'book-title'})
546 e.text = self.title
547 body.insert(0, e)
548 intro = lxml.html.fragment_fromstring(self.compose_inside_cover())
549 e.addnext(intro)
551 #0.5 adjust parameters to suit the particular kind of output
552 if mode == 'web':
553 self.maker.gutter = 0
555 #1. Save the html
556 html_text = lxml.etree.tostring(self.tree, method="html")
557 self.save_data(self.body_html_file, html_text)
559 #2. Make a pdf of it (direct to to final pdf)
560 self.maker.make_raw_pdf(self.body_html_file, self.pdf_file,
561 engine=self.engine, outline=True)
562 self.notify_watcher('generate_pdf')
564 if mode != 'web':
565 #3. resize pages and shift gutters.
566 self.maker.reshape_pdf(self.pdf_file, self.dir, centre_end=True)
567 self.notify_watcher('reshape_pdf')
569 #4. add page numbers
570 self.maker.number_pdf(self.pdf_file, None, dir=self.dir,
571 numbers=self.page_numbers)
572 self.notify_watcher("number_pdf")
573 self.notify_watcher()
576 def rotate180(self):
577 """Rotate the pdf 180 degrees so an RTL book can print on LTR
578 presses."""
579 rotated = self.filepath('final-rotate.pdf')
580 unrotated = self.filepath('final-pre-rotate.pdf')
581 #leave the unrotated pdf intact at first, in case of error.
582 rotate_pdf(self.pdf_file, rotated)
583 os.rename(self.pdf_file, unrotated)
584 os.rename(rotated, self.pdf_file)
585 self.notify_watcher()
587 def publish_pdf(self):
588 """Move the finished PDF to its final resting place"""
589 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
590 os.rename(self.pdf_file, self.publish_file)
591 self.notify_watcher()
593 def load_toc(self):
594 """From the TOC.txt file create a list of TocItems with
595 the attributes <status>, <chapter>, and <title>.
597 <status> is a number, with the following meaning:
599 0 - section heading with no chapter
600 1 - chapter heading
601 2 - book title
603 The TocItem object has convenience functions <is_chapter> and
604 <is_section>.
606 <chapter> is twiki name of the chapter.
608 <title> is a human readable title for the chapter. It is likely to
609 differ from the title given in the chapter's <h1> heading.
611 f = urlopen(self.toc_url)
612 self.toc = []
613 while True:
614 try:
615 self.toc.append(TocItem(f.next().strip(),
616 f.next().strip(),
617 f.next().strip()))
618 except StopIteration:
619 break
620 f.close()
621 self.notify_watcher()
623 def load_book(self, tidy=True):
624 """Fetch and parse the raw html of the book. If tidy is true
625 (default) links in the document will be made absolute."""
626 f = urlopen(self.book_url)
627 html = f.read()
628 f.close()
629 html = ('<html dir="%s"><head>\n<title>%s</title>\n'
630 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
631 '</head>\n<body>\n'
632 '%s\n'
633 '<div style="page-break-before: always; color:#fff;" class="unseen">'
634 'A FLOSSManuals book</div>\n</body></html>'
635 ) % (self.dir, self.book, html)
637 self.save_tempfile('raw.html', html)
639 tree = lxml.html.document_fromstring(html)
640 if tidy:
641 tree.make_links_absolute(self.book_url)
642 self.tree = tree
643 self.headings = [x for x in tree.cssselect('h1')]
644 if self.headings:
645 self.headings[0].set('class', "first-heading")
646 #self.heading_texts = [x.textcontent() for x in self.headings]
647 for h1 in self.headings:
648 h1.title = h1.text_content().strip()
649 self.notify_watcher()
652 def load(self):
653 """Wrapper around all necessary load methods."""
654 self.load_book()
655 self.load_toc()
657 def find_page(self, element, start_page=1):
658 """Search through a page iterator and return the page
659 number which the element probably occurs."""
660 text = element.cookie
661 for i, content in enumerate(self.text_pages[start_page - 1:]):
662 log("looking for '%s' in page %s below:\n%s[...]" %
663 (text, i + start_page, content[:160]), debug='INDEX')
664 #remove spaces: they can appear spuriously
665 content = ''.join(content.split())
666 if text in content:
667 return i + start_page, True
668 #If it isn't found, return the start page so the next chapter has a chance
669 return start_page, False
671 def make_contents(self):
672 """Generate HTML containing the table of contents. This can
673 only be done after the main PDF has been made."""
674 header = '<h1>Table of Contents</h1><table class="toc">\n'
675 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
676 '<td class="pagenumber">%s</td></tr>\n')
677 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
678 footer = '\n</table>'
680 contents = []
682 chapter = 1
683 page_num = 1
684 subsections = [] # for the subsection heading pages.
686 headings = iter(self.headings)
688 for t in self.toc:
689 if t.is_chapter():
690 try:
691 h1 = headings.next()
692 except StopIteration:
693 log("heading not found for %s (previous h1 missing?). Stopping" % t)
694 break
695 page_num, found = self.find_page(h1, page_num)
696 # sometimes the heading isn't found, which is shown as a frown
697 if found:
698 contents.append(row_tmpl % (chapter, h1.title, page_num))
699 else:
700 contents.append(row_tmpl % (chapter, h1.title, ':-('))
701 chapter += 1
702 elif t.is_section():
703 contents.append(section_tmpl % t.title)
704 else:
705 log("mystery TOC item: %s" % t)
707 doc = header + '\n'.join(contents) + footer
708 self.notify_watcher()
709 return doc
711 def add_section_titles(self):
712 """Add any section heading pages that the TOC.txt file
713 specifies. These are sub-book, super-chapter groupings.
715 Also add initial numbers to chapters.
717 headings = iter(self.headings)
718 chapter = 1
719 section = None
721 for t in self.toc:
722 if t.is_chapter() and section is not None:
723 try:
724 h1 = headings.next()
725 except StopIteration:
726 log("heading not found for %s (previous h1 missing?)" % t)
727 break
728 item = h1.makeelement('div', Class='chapter')
729 log(h1.title, debug='HTMLGEN')
730 item.text = h1.title
731 _add_initial_number(item, chapter)
733 section.append(item)
735 if not section_placed:
736 log("placing section", debug='HTMLGEN')
737 h1.addprevious(section)
738 section_placed = True
739 else:
740 log("NOT placing section", debug='HTMLGEN')
742 #put a bold number at the beginning of the h1, and a hidden cookie at the end.
743 _add_initial_number(h1, chapter)
744 _add_chapter_cookie(h1)
745 chapter += 1
747 elif t.is_section():
748 section = self.tree.makeelement('div', Class="subsection")
749 # section Element complains when you try to ask it whether it
750 # has been placed (though it does know)
751 section_placed = False
752 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
753 heading.set("Class", "subsection-heading")
754 section.append(heading)
756 self.notify_watcher()
759 def add_css(self, css=None, mode='book'):
760 """If css looks like a url, use it as a stylesheet link.
761 Otherwise it is the CSS itself, which is saved to a temporary file
762 and linked to."""
763 log("css is %r" % css)
764 htmltree = self.tree
765 if css is None or not css.strip():
766 defaults = SERVER_DEFAULTS[self.server]
767 url = 'file://' + os.path.abspath(defaults['css-%s' % mode])
768 elif not re.match(r'^http://\S+$', css):
769 fn = self.save_tempfile('objavi.css', css)
770 url = 'file://' + fn
771 else:
772 url = css
773 #XXX for debugging and perhaps sensible anyway
774 #url = url.replace('file:///home/douglas/objavi2', '')
777 #find the head -- it's probably first child but lets not assume.
778 for child in htmltree:
779 if child.tag == 'head':
780 head = child
781 break
782 else:
783 head = htmltree.makeelement('head')
784 htmltree.insert(0, head)
786 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
787 self.css_url = url
788 self.notify_watcher()
789 return url
791 def set_title(self, title=None):
792 """If a string is supplied, it becomes the book's title.
793 Otherwise a guess is made."""
794 if title:
795 self.title = title
796 else:
797 titles = [x.text_content() for x in self.tree.cssselect('title')]
798 if titles and titles[0]:
799 self.title = titles[0]
800 else:
801 #oh well
802 self.title = 'A Manual About ' + self.book
803 return self.title
805 def _read_localised_template(self, template, fallbacks=['en']):
806 """Try to get the template in the approriate language, otherwise in english."""
807 for lang in [self.lang] + fallbacks:
808 try:
809 fn = template % (lang)
810 f = open(fn)
811 break
812 except IOError, e:
813 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
814 log(e)
815 template = f.read()
816 f.close()
817 return template
819 def compose_inside_cover(self):
820 """create the markup for the preamble inside cover."""
821 template = self._read_localised_template(config.INSIDE_FRONT_COVER_TEMPLATE)
823 if self.isbn:
824 isbn_text = '<b>ISBN :</b> %s <br>' % self.isbn
825 else:
826 isbn_text = ''
828 return template % {'date': time.strftime('%Y-%m-%d'),
829 'isbn': isbn_text,
830 'license': self.license,
834 def compose_end_matter(self):
835 """create the markup for the end_matter inside cover. If
836 self.isbn is not set, the html will result in a pdf that
837 spills onto two pages.
839 template = self._read_localised_template(config.END_MATTER_TEMPLATE)
841 d = {'css_url': self.css_url,
842 'title': self.title
845 if self.isbn:
846 d['inside_cover_style'] = ''
847 else:
848 d['inside_cover_style'] = 'page-break-after: always'
850 return template % d
855 def spawn_x(self):
856 """Start an Xvfb instance, using a new server number. A
857 reference to it is stored in self.xvfb, which is used to kill
858 it when the pdf is done.
860 Note that Xvfb doesn't interact well with dbus which is
861 present on modern desktops.
863 #Find an unused server number (in case two cgis are running at once)
864 while True:
865 servernum = random.randrange(50, 500)
866 if not os.path.exists('/tmp/.X%s-lock' % servernum):
867 break
869 self.xserver_no = ':%s' % servernum
871 authfile = self.filepath('Xauthority')
872 os.environ['XAUTHORITY'] = authfile
874 #mcookie(1) eats into /dev/random, so avoid that
875 from hashlib import md5
876 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
877 mcookie = m.hexdigest()
879 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
881 self.xvfb = Popen(['Xvfb', self.xserver_no,
882 '-screen', '0', '1024x768x24',
883 '-pixdepths', '32',
884 #'-blackpixel', '0',
885 #'-whitepixel', str(2 ** 24 -1),
886 #'+extension', 'Composite',
887 '-dpi', '96',
888 '-kb',
889 '-nolisten', 'tcp',
892 # We need to wait a bit before the Xvfb is ready. but the
893 # downloads are so slow that that probably doesn't matter
895 self.xvfb_ready_time = time.time() + 2
897 os.environ['DISPLAY'] = self.xserver_no
898 log(self.xserver_no)
900 def wait_for_xvfb(self):
901 """wait until a previously set time before continuing. This
902 is so Xvfb has time to properly start."""
903 if hasattr(self, 'xvfb'):
904 d = self.xvfb_ready_time - time.time()
905 if d > 0:
906 time.sleep(d)
907 self.notify_watcher()
909 def cleanup_x(self):
910 """Try very hard to kill off Xvfb. In addition to killing
911 this instance's xvfb, occasionally (randomly) search for
912 escaped Xvfb instances and kill those too."""
913 if not hasattr(self, 'xvfb'):
914 return
915 check_call(['xauth', 'remove', self.xserver_no])
916 p = self.xvfb
917 log("trying to kill Xvfb %s" % p.pid)
918 os.kill(p.pid, 15)
919 for i in range(10):
920 if p.poll() is not None:
921 log("%s died with %s" % (p.pid, p.poll()))
922 break
923 log("%s not dead yet" % p.pid)
924 time.sleep(0.2)
925 else:
926 log("Xvfb would not die! kill -9! kill -9!")
927 os.kill(p.pid, 9)
929 if random.random() < 0.05:
930 #kill old xvfbs occasionally, if there are any.
931 self.kill_old_xvfbs()
933 def kill_old_xvfbs(self):
934 """Sometimes, despite everything, Xvfb instances hang around
935 well after they are wanted -- for example if the cgi process
936 dies particularly badly. So kill them if they have been
937 running for a long time."""
938 log("running kill_old_xvfbs")
939 p = Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
940 data = p.communicate()[0].strip()
941 if data:
942 lines = data.split('\n')
943 for line in lines:
944 log('dealing with ps output "%s"' % line)
945 try:
946 pid, days_, hours, minutes, seconds = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
947 except AttributeError:
948 log("Couldn't parse that line!")
949 # 50 minutes should be enough xvfb time for anyone
950 if days or hours or int(minutes) > 50:
951 log("going to kill pid %s" % pid)
952 os.kill(int(pid), 15)
953 time.sleep(0.5)
954 os.kill(int(pid), 9)
955 self.notify_watcher()
957 def cleanup(self):
958 self.cleanup_x()
959 if not config.KEEP_TEMP_FILES:
960 for fn in os.listdir(self.workdir):
961 os.remove(os.path.join(self.workdir, fn))
962 os.rmdir(self.workdir)
963 else:
964 log("NOT removing '%s', containing the following files:" % self.workdir)
965 log(*os.listdir(self.workdir))
967 self.notify_watcher()