Make separate default CSS files for different modes
[objavi2.git] / fmbook.py
blob976820a89162ae8680b8d22fded8388e5e12d133
1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
3 # their interactions.
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
22 PDF"""
24 import os, sys
25 import tempfile
26 import re, time
27 import random
28 from urllib2 import urlopen
29 from subprocess import Popen, check_call, PIPE
31 import lxml.etree, lxml.html
32 import lxml, lxml.html, lxml.etree
34 import config
35 from config import SERVER_DEFAULTS, DEFAULT_SERVER, POINT_2_MM, PDFEDIT_MAX_PAGES
37 TMPDIR = os.path.abspath(config.TMPDIR)
38 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH = "%s/books/" % DOC_ROOT
42 def log(*messages, **kwargs):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs or config.DEBUG_ALL or kwargs['debug'] in config.DEBUG_MODES:
47 for m in messages:
48 try:
49 print >> sys.stderr, m
50 except Exception:
51 print >> sys.stderr, repr(m)
53 def _add_initial_number(e, n):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial = e.makeelement("strong", Class="initial")
56 e.insert(0, initial)
57 initial.tail = ' '
58 if e.text is not None:
59 initial.tail += e.text
60 e.text = ''
61 initial.text = "%s." % n
63 def _add_chapter_cookie(e):
64 """add magic hidden text to help with contents generation"""
65 cookie = e.makeelement("span", Class="heading-cookie", dir="ltr",
66 style="font-size:6pt; line-height: 6pt; color: #fff; width:0;"
67 " float:left; margin:-2em; z-index: -67; display: block;"
69 cookie.text = ''.join(random.choice(config.CHAPTER_COOKIE_CHARS) for x in range(8))
70 e.cookie = cookie.text
71 e.addnext(cookie)
72 #e.append(cookie)
75 class TocItem(object):
76 """This makes sense of the tuples from TOC.txt files"""
77 def __init__(self, status, chapter, title):
78 # status is
79 # 0 - section heading with no chapter
80 # 1 - chapter heading
81 # 2 - book title
83 # chapter is twiki name of the chapter
84 # title is a human readable name of the chapter.
85 self.status = status
86 self.chapter = chapter
87 self.title = title
89 def is_chapter(self):
90 return self.status == '1'
92 def is_section(self):
93 return self.status == '0'
95 def __str__(self):
96 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
99 def run(cmd):
100 try:
101 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
102 out, err = p.communicate()
103 except Exception:
104 log("Failed on command: %r" % cmd)
105 raise
106 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
107 (' '.join(cmd), cmd[0], p.poll(), out, err))
110 def find_containing_paper(w, h):
111 size = None
112 for name, pw, ph in config.PAPER_SIZES:
113 if pw >= w and ph >= h:
114 mw = (pw - w) * 0.5
115 mh = (ph - h) * 0.5
116 return (name, mw, mh)
118 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
119 (w * POINT_2_MM, h * POINT_2_MM))
123 class PageSettings(object):
124 """Calculates and wraps commands for the generation and processing
125 of PDFs"""
126 def __init__(self, pointsize, **kwargs):
127 # the formulas for default gutters, margins and column margins
128 # are quite ad-hoc and certainly improvable.
130 self.width, self.height = pointsize
131 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
133 self.gutter = kwargs.get('gutter', (config.BASE_GUTTER +
134 config.PROPORTIONAL_GUTTER * self.width))
136 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
137 self.top_margin = kwargs.get('top_margin', default_margin)
138 self.side_margin = kwargs.get('top_margin', default_margin)
139 self.bottom_margin = kwargs.get('top_margin', default_margin)
140 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
141 self.columns = kwargs.get('columns', 1)
143 self.column_margin = kwargs.get('column_margin', default_margin * 2 / (4.0 + self.columns))
145 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
146 self.number_margin = self.side_margin
148 # calculate margins in mm for browsers
149 self.margins = []
150 for m, clip in ((self.top_margin, clipy),
151 (self.side_margin, clipx + 0.5 * self.gutter),
152 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
153 (self.side_margin, clipx + 0.5 * self.gutter),
155 if m is None:
156 m = default_margin
157 self.margins.append((m + clip) * POINT_2_MM)
159 for x in locals().iteritems():
160 log("%s: %s" % x, debug='PDFGEN')
161 for x in dir(self):
162 log("%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
166 def _webkit_command(self, html, pdf):
167 m = [str(x) for x in self.margins]
168 cmd = [config.WKHTMLTOPDF, '-q', '-s', self.papersize,
169 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
170 ] + config.WKHTMLTOPDF_EXTRA_COMMANDS + [
171 html, pdf]
172 log(' '.join(cmd))
173 return cmd
175 def _gecko_command(self, html, pdf):
176 m = [str(x) for x in self.margins]
177 #firefox -P pdfprint -print URL -printprinter "printer_settings"
178 cmd = [FIREFOX, '-P', 'pdfprint', '-print',
179 html, '-printprinter', self.moz_printer]
180 log(' '.join(cmd))
181 return cmd
183 def make_raw_pdf(self, html, pdf, engine='webkit'):
184 func = getattr(self, '_%s_command' % engine)
185 if self.columns == 1:
186 cmd = func(html, pdf)
187 run(cmd)
188 else:
189 printable_width = self.width - 2.0 * self.side_margin - self.gutter
190 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
191 page_width = column_width + self.column_margin
193 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
194 gutter=0, top_margin=self.top_margin,
195 side_margin=self.column_margin * 0.5,
196 bottom_margin=self.bottom_margin)
198 column_pdf = pdf[:-4] + '-single-column.pdf'
199 columnmaker.make_raw_pdf(html, column_pdf, engine=engine)
200 columnmaker.reshape_pdf(column_pdf)
202 cmd = ['pdfnup',
203 '--nup', '%sx1' % int(self.columns),
204 '--paper', self.papersize.lower() + 'paper',
205 '--outfile', pdf,
206 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
207 '--noautoscale', 'true',
208 '--orient', 'portrait',
209 #'--tidy', 'false',
210 column_pdf
212 run(cmd)
216 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
217 even_pages=True):
218 """Spin the pdf for RTL text, resize it to the right size, and
219 shift the gutter left and right"""
220 ops = 'resize'
221 if self.gutter:
222 ops += ',shift'
223 if even_pages:
224 ops += ',even_pages'
225 gutter = self.gutter
226 if dir == 'RTL':
227 gutter = -gutter
228 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
229 'dir=%s' % dir,
230 'filename=%s' % pdf,
231 'output_filename=%s' % pdf,
232 'operation=%s' % ops,
233 'width=%s' % self.width,
234 'height=%s' % self.height,
235 'offset=%s' % gutter,
236 'centre_start=%s' % centre_start,
237 'centre_end=%s' % centre_end,
239 run(cmd)
241 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
242 number_start=1):
243 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
244 'operation=page_numbers',
245 'dir=%s' % dir,
246 'filename=%s' % pdf,
247 'output_filename=%s' % pdf,
248 'number_start=%s' % number_start,
249 'number_style=%s' % numbers,
250 'number_bottom=%s' % self.number_bottom,
251 'number_margin=%s' % self.number_margin,
253 run(cmd)
255 def number_pdf(self, pdf, pages, **kwargs):
256 # if there are too many pages for pdfedit to handle in one go,
257 # split the job into bits. <pages> may not be exact
258 if pages is None or pages <= PDFEDIT_MAX_PAGES:
259 self._number_pdf(pdf, **kwargs)
260 else:
261 # section_size must be even
262 sections = pages // PDFEDIT_MAX_PAGES + 1
263 section_size = (pages // sections + 2) & ~1
265 pdf_sections = []
266 s = kwargs.pop('number_start', 1)
267 while s < pages:
268 e = s + section_size - 1
269 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
270 if e < pages - 1:
271 page_range = '%s-%s' % (s, e)
272 else:
273 page_range = '%s-end' % s
274 run(['pdftk',
275 pdf,
276 'cat',
277 page_range,
278 'output',
279 pdf_section,
281 self._number_pdf(pdf_section, number_start=s, **kwargs)
282 pdf_sections.append(pdf_section)
283 s = e + 1
285 concat_pdfs(pdf, *pdf_sections)
287 def make_barcode_pdf(self, isbn, pdf, corner='br'):
288 """Put ann ISBN barcode in a corner of a single blank page."""
290 position = '%s,%s,%s,%s,%s' %(corner, self.width, self.height, self.side_margin, self.bottom_margin)
291 cmd1 = [config.BOOKLAND,
292 '--position', position,
293 str(isbn)]
294 cmd2 = ['ps2pdf',
295 '-dFIXEDMEDIA',
296 '-dDEVICEWIDTHPOINTS=%s' % self.width,
297 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
298 '-', pdf]
300 p1 = Popen(cmd1, stdout=PIPE)
301 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
302 out, err = p2.communicate()
304 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
305 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
310 def concat_pdfs(name, *args):
311 """Join all the named pdfs together into one and save it as <name>"""
312 cmd = ['pdftk']
313 cmd.extend(x for x in args if x is not None)
314 cmd += ['cat', 'output', name]
315 run(cmd)
317 def index_pdf(pdf, text=None):
318 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
319 separate pages."""
320 if text is None:
321 text = pdf + '.index.txt'
322 cmd = ['pdftotext',
323 #'-layout', #keeps more original formatting
324 pdf,
325 text]
326 run(cmd)
327 return text
329 def rotate_pdf(pdfin, pdfout):
330 """Turn the PDF on its head"""
331 cmd = ['pdftk', pdfin,
332 'cat',
333 '1-endD',
334 'output',
335 pdfout
337 run(cmd)
340 class Book(object):
341 page_numbers = 'latin'
342 preamble_page_numbers = 'roman'
343 engine= 'webkit'
344 _try_cleanup_on_del = True
346 def notify_watcher(self, message=None):
347 if self.watcher:
348 if message is None:
349 #message is the name of the caller
350 #XXX look at using inspect module
351 import traceback
352 message = traceback.extract_stack(None, 2)[0][2]
353 log("notify_watcher called with '%s'" % message)
354 self.watcher(message)
356 def __enter__(self):
357 return self
359 def __exit__(self, exc_type, exc_value, traceback):
360 self.cleanup()
361 #could deal with exceptions here and return true
363 def __init__(self, book, server, bookname,
364 page_settings=None, engine=None, watcher=None, isbn=None,
365 license=config.DEFAULT_LICENSE):
366 log("*** Starting new book %s ***" % bookname)
367 self.book = book
368 self.server = server
369 self.watcher = watcher
370 self.isbn = isbn
371 self.license = license
372 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
373 os.chmod(self.workdir, 0755)
374 defaults = SERVER_DEFAULTS.get(server, SERVER_DEFAULTS[DEFAULT_SERVER])
375 self.default_css = defaults['css']
376 self.lang = defaults['lang']
377 self.dir = defaults['dir']
379 self.body_html_file = self.filepath('body.html')
380 self.body_pdf_file = self.filepath('body.pdf')
381 self.body_index_file = self.filepath('body.txt')
382 self.preamble_html_file = self.filepath('preamble.html')
383 self.preamble_pdf_file = self.filepath('preamble.pdf')
384 self.tail_html_file = self.filepath('tail.html')
385 self.tail_pdf_file = self.filepath('tail.pdf')
386 self.isbn_pdf_file = None
387 self.pdf_file = self.filepath('final.pdf')
389 self.publish_name = bookname
390 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
391 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
393 self.book_url = config.BOOK_URL % (self.server, self.book)
394 self.toc_url = config.TOC_URL % (self.server, self.book)
396 self.set_page_dimensions(page_settings)
398 if engine is not None:
399 self.engine = engine
400 self.notify_watcher()
402 def __del__(self):
403 if os.path.exists(self.workdir) and self._try_cleanup_on_del:
404 self._try_cleanup_on_del = False #or else you can get in bad cycles
405 self.cleanup()
407 def __getattr__(self, attr):
408 """catch unloaded books and load them"""
409 #log('looking for missing attribute "%s"' % (attr))
410 if attr == 'tree':
411 self.load_book()
412 return self.tree
413 if attr == 'toc':
414 self.load_toc()
415 return self.toc
416 raise AttributeError("no such member: '%s'" % attr)
419 def filepath(self, fn):
420 return os.path.join(self.workdir, fn)
422 def save_data(self, fn, data):
423 """Save without tripping up on unicode"""
424 if isinstance(data, unicode):
425 data = data.encode('utf8', 'ignore')
426 f = open(fn, 'w')
427 f.write(data)
428 f.close()
430 def save_tempfile(self, fn, data):
431 """Save the data in a temporary directory that will be cleaned
432 up when all is done. Return the absolute file path."""
433 fn = self.filepath(fn)
434 self.save_data(fn, data)
435 return fn
437 def set_page_dimensions(self, dimensions):
438 self.maker = PageSettings(**dimensions)
441 def extract_pdf_text(self):
442 """Extract the text from the body pdf, split into pages, so
443 that the correct page can be found to generate the table of
444 contents."""
445 index_pdf(self.body_pdf_file, self.body_index_file)
446 f = open(self.body_index_file)
447 s = unicode(f.read(), 'utf8')
448 f.close()
449 #pages are spearated by formfeed character "^L", "\f" or chr(12)
450 self.text_pages = s.split("\f")
451 #there is sometimes (probably always) an unwanted ^L at the end
452 return len(self.text_pages)
454 def make_body_pdf(self):
455 """Make a pdf of the HTML, using webkit"""
456 #1. Save the html
457 html_text = lxml.etree.tostring(self.tree, method="html")
458 self.save_data(self.body_html_file, html_text)
460 #2. Make a pdf of it
461 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file,
462 engine=self.engine)
463 self.notify_watcher('generate_pdf')
465 #3. extract the text for finding contents.
466 n_pages = self.extract_pdf_text()
467 log ("found %s pages in pdf" % n_pages)
468 #4. resize pages, shift gutters, and rotate 180 degrees for RTL
469 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
470 self.notify_watcher('reshape_pdf')
472 #5 add page numbers
473 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
474 numbers=self.page_numbers)
475 self.notify_watcher("number_pdf")
476 self.notify_watcher()
478 def make_preamble_pdf(self):
479 contents = self.make_contents()
480 inside_cover_html = self.compose_inside_cover()
481 html = ('<html dir="%s"><head>\n'
482 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
483 '<link rel="stylesheet" href="%s" />\n'
484 '</head>\n<body>\n'
485 '<h1 class="frontpage">%s</h1>'
486 '%s\n'
487 '<div class="contents">%s</div>\n'
488 '<div style="page-break-after: always; color:#fff" class="unseen">.'
489 '<!--%s--></div></body></html>'
490 ) % (self.dir, self.css_url, self.title, inside_cover_html,
491 contents, self.title)
492 self.save_data(self.preamble_html_file, html)
494 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file,
495 engine=self.engine)
497 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
499 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
500 numbers=self.preamble_page_numbers,
501 number_start=-2)
503 self.notify_watcher()
505 def make_end_matter_pdf(self):
506 """Make an inside back cover and a back cover. If there is an
507 isbn number its barcode will be put on the back cover."""
508 if self.isbn:
509 self.isbn_pdf_file = self.filepath('isbn.pdf')
510 self.maker.make_barcode_pdf(self.isbn, self.isbn_pdf_file)
511 self.notify_watcher('make_barcode_pdf')
513 self.save_data(self.tail_html_file, self.compose_end_matter())
514 self.maker.make_raw_pdf(self.tail_html_file, self.tail_pdf_file,
515 engine=self.engine)
517 self.maker.reshape_pdf(self.tail_pdf_file, self.dir, centre_start=True,
518 centre_end=True, even_pages=False)
519 self.notify_watcher()
521 def make_book_pdf(self):
522 """A convenient wrapper of a few necessary steps"""
523 # now the Xvfb server is needed. make sure it has had long enough to get going
524 self.wait_for_xvfb()
525 self.make_body_pdf()
526 self.make_preamble_pdf()
527 self.make_end_matter_pdf()
529 concat_pdfs(self.pdf_file, self.preamble_pdf_file,
530 self.body_pdf_file, self.tail_pdf_file,
531 self.isbn_pdf_file)
533 self.notify_watcher('concatenated_pdfs')
534 #and move it into place (what place?)
536 def rotate180(self):
537 """Rotate the pdf 180 degrees so an RTL book can print on LTR
538 presses."""
539 rotated = self.filepath('final-rotate.pdf')
540 unrotated = self.filepath('final-pre-rotate.pdf')
541 #leave the unrotated pdf intact at first, in case of error.
542 rotate_pdf(self.pdf_file, rotated)
543 os.rename(self.pdf_file, unrotated)
544 os.rename(rotated, self.pdf_file)
545 self.notify_watcher()
547 def publish_pdf(self):
548 """Move the finished PDF to its final resting place"""
549 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
550 os.rename(self.pdf_file, self.publish_file)
551 self.notify_watcher()
553 def load_toc(self):
554 """From the TOC.txt file create a list of TocItems with
555 the attributes <status>, <chapter>, and <title>.
557 <status> is a number, with the following meaning:
559 0 - section heading with no chapter
560 1 - chapter heading
561 2 - book title
563 The TocItem object has convenience functions <is_chapter> and
564 <is_section>.
566 <chapter> is twiki name of the chapter.
568 <title> is a human readable title for the chapter. It is likely to
569 differ from the title given in the chapter's <h1> heading.
571 f = urlopen(self.toc_url)
572 self.toc = []
573 while True:
574 try:
575 self.toc.append(TocItem(f.next().strip(),
576 f.next().strip(),
577 f.next().strip()))
578 except StopIteration:
579 break
580 f.close()
581 self.notify_watcher()
583 def load_book(self, tidy=True):
584 """Fetch and parse the raw html of the book. If tidy is true
585 (default) links in the document will be made absolute."""
586 f = urlopen(self.book_url)
587 html = f.read()
588 f.close()
589 html = ('<html dir="%s"><head>\n<title>%s</title>\n'
590 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
591 '</head>\n<body>\n'
592 '%s\n'
593 '<div style="page-break-before: always; color:#fff;" class="unseen">'
594 'A FLOSSManuals book</div>\n</body></html>'
595 ) % (self.dir, self.book, html)
597 self.save_tempfile('raw.html', html)
599 tree = lxml.html.document_fromstring(html)
600 if tidy:
601 tree.make_links_absolute(self.book_url)
602 self.tree = tree
603 self.headings = [x for x in tree.cssselect('h1')]
604 if self.headings:
605 self.headings[0].set('class', "first-heading")
606 #self.heading_texts = [x.textcontent() for x in self.headings]
607 for h1 in self.headings:
608 h1.title = h1.text_content().strip()
609 self.notify_watcher()
612 def load(self):
613 """Wrapper around all necessary load methods."""
614 self.load_book()
615 self.load_toc()
617 def find_page(self, element, start_page=1):
618 """Search through a page iterator and return the page
619 number which the element probably occurs."""
620 text = element.cookie
621 for i, content in enumerate(self.text_pages[start_page - 1:]):
622 log("looking for '%s' in page %s below:\n%s[...]" %
623 (text, i + start_page, content[:160]), debug='INDEX')
624 #remove spaces: they can appear spuriously
625 content = ''.join(content.split())
626 if text in content:
627 return i + start_page, True
628 #If it isn't found, return the start page so the next chapter has a chance
629 return start_page, False
631 def make_contents(self):
632 """Generate HTML containing the table of contents. This can
633 only be done after the main PDF has been made."""
634 header = '<h1>Table of Contents</h1><table class="toc">\n'
635 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
636 '<td class="pagenumber">%s</td></tr>\n')
637 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
638 footer = '\n</table>'
640 contents = []
642 chapter = 1
643 page_num = 1
644 subsections = [] # for the subsection heading pages.
646 headings = iter(self.headings)
648 for t in self.toc:
649 if t.is_chapter():
650 try:
651 h1 = headings.next()
652 except StopIteration:
653 log("heading not found for %s (previous h1 missing?). Stopping" % t)
654 break
655 page_num, found = self.find_page(h1, page_num)
656 # sometimes the heading isn't found, which is shown as a frown
657 if found:
658 contents.append(row_tmpl % (chapter, h1.title, page_num))
659 else:
660 contents.append(row_tmpl % (chapter, h1.title, ':-('))
661 chapter += 1
662 elif t.is_section():
663 contents.append(section_tmpl % t.title)
664 else:
665 log("mystery TOC item: %s" % t)
667 doc = header + '\n'.join(contents) + footer
668 self.notify_watcher()
669 return doc
671 def add_section_titles(self):
672 """Add any section heading pages that the TOC.txt file
673 specifies. These are sub-book, super-chapter groupings.
675 Also add initial numbers to chapters.
677 log(self.headings)
678 headings = iter(self.headings)
679 chapter = 1
680 section = None
682 for t in self.toc:
683 if t.is_chapter() and section is not None:
684 try:
685 h1 = headings.next()
686 except StopIteration:
687 log("heading not found for %s (previous h1 missing?)" % t)
688 break
689 item = h1.makeelement('div', Class='chapter')
690 log(h1.title, debug='HTMLGEN')
691 item.text = h1.title
692 _add_initial_number(item, chapter)
694 section.append(item)
696 if not section_placed:
697 log("placing section", debug='HTMLGEN')
698 h1.addprevious(section)
699 section_placed = True
700 else:
701 log("NOT placing section", debug='HTMLGEN')
703 #put a bold number at the beginning of the h1, and a hidden cookie at the end.
704 _add_initial_number(h1, chapter)
705 _add_chapter_cookie(h1)
706 chapter += 1
708 elif t.is_section():
709 section = self.tree.makeelement('div', Class="subsection")
710 # section Element complains when you try to ask it whether it
711 # has been placed (though it does know)
712 section_placed = False
713 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
714 heading.set("Class", "subsection-heading")
715 section.append(heading)
717 self.notify_watcher()
720 def add_css(self, css=None, mode='book'):
721 """If css looks like a url, use it as a stylesheet link.
722 Otherwise it is the CSS itself, which is saved to a temporary file
723 and linked to."""
724 log("css is %r" % css)
725 htmltree = self.tree
726 if css is None or not css.strip():
727 defaults = SERVER_DEFAULTS[self.server]
728 url = 'file://' + os.path.abspath(defaults['css-%s' % mode])
729 elif not re.match(r'^http://\S+$', css):
730 fn = self.save_tempfile('objavi.css', css)
731 url = 'file://' + fn
732 else:
733 url = css
734 #XXX for debugging and perhaps sensible anyway
735 #url = url.replace('file:///home/douglas/objavi2', '')
738 #find the head -- it's probably first child but lets not assume.
739 for child in htmltree:
740 if child.tag == 'head':
741 head = child
742 break
743 else:
744 head = htmltree.makeelement('head')
745 htmltree.insert(0, head)
747 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
748 self.css_url = url
749 self.notify_watcher()
750 return url
752 def set_title(self, title=None):
753 """If a string is supplied, it becomes the book's title.
754 Otherwise a guess is made."""
755 if title:
756 self.title = title
757 else:
758 titles = [x.text_content() for x in self.tree.cssselect('title')]
759 if titles and titles[0]:
760 self.title = titles[0]
761 else:
762 #oh well
763 self.title = 'A Manual About ' + self.book
764 return self.title
766 def _read_localised_template(self, template, fallbacks=['en']):
767 """Try to get the template in the approriate language, otherwise in english."""
768 for lang in [self.lang] + fallbacks:
769 try:
770 fn = template % (lang)
771 f = open(fn)
772 break
773 except IOError, e:
774 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
775 log(e)
776 template = f.read()
777 f.close()
778 return template
780 def compose_inside_cover(self):
781 """create the markup for the preamble inside cover."""
782 template = self._read_localised_template(config.INSIDE_FRONT_COVER_TEMPLATE)
784 if self.isbn:
785 isbn_text = '<b>ISBN :</b> %s <br>' % self.isbn
786 else:
787 isbn_text = ''
789 return template % {'date': time.strftime('%Y-%m-%d'),
790 'isbn': isbn_text,
791 'license': self.license,
795 def compose_end_matter(self):
796 """create the markup for the end_matter inside cover. If
797 self.isbn is not set, the html will result in a pdf that
798 spills onto two pages.
800 template = self._read_localised_template(config.END_MATTER_TEMPLATE)
802 d = {'css_url': self.css_url,
803 'title': self.title
806 if self.isbn:
807 d['inside_cover_style'] = ''
808 else:
809 d['inside_cover_style'] = 'page-break-after: always'
811 return template % d
816 def spawn_x(self):
817 """Start an Xvfb instance, using a new server number. A
818 reference to it is stored in self.xvfb, which is used to kill
819 it when the pdf is done.
821 Note that Xvfb doesn't interact well with dbus which is
822 present on modern desktops.
824 #Find an unused server number (in case two cgis are running at once)
825 while True:
826 servernum = random.randrange(50, 500)
827 if not os.path.exists('/tmp/.X%s-lock' % servernum):
828 break
830 self.xserver_no = ':%s' % servernum
832 authfile = self.filepath('Xauthority')
833 os.environ['XAUTHORITY'] = authfile
835 #mcookie(1) eats into /dev/random, so avoid that
836 from hashlib import md5
837 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
838 mcookie = m.hexdigest()
840 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
842 self.xvfb = Popen(['Xvfb', self.xserver_no,
843 '-screen', '0', '1024x768x24',
844 '-pixdepths', '32',
845 #'-blackpixel', '0',
846 #'-whitepixel', str(2 ** 24 -1),
847 #'+extension', 'Composite',
848 '-dpi', '96',
849 '-kb',
850 '-nolisten', 'tcp',
853 # We need to wait a bit before the Xvfb is ready. but the
854 # downloads are so slow that that probably doesn't matter
856 self.xvfb_ready_time = time.time() + 2
858 os.environ['DISPLAY'] = self.xserver_no
859 log(self.xserver_no)
861 def wait_for_xvfb(self):
862 """wait until a previously set time before continuing. This
863 is so Xvfb has time to properly start."""
864 if hasattr(self, 'xvfb'):
865 d = self.xvfb_ready_time - time.time()
866 if d > 0:
867 time.sleep(d)
868 self.notify_watcher()
870 def cleanup_x(self):
871 """Try very hard to kill off Xvfb. In addition to killing
872 this instance's xvfb, occasionally (randomly) search for
873 escaped Xvfb instances and kill those too."""
874 if not hasattr(self, 'xvfb'):
875 return
876 check_call(['xauth', 'remove', self.xserver_no])
877 p = self.xvfb
878 log("trying to kill Xvfb %s" % p.pid)
879 os.kill(p.pid, 15)
880 for i in range(10):
881 if p.poll() is not None:
882 log("%s died with %s" % (p.pid, p.poll()))
883 break
884 log("%s not dead yet" % p.pid)
885 time.sleep(0.2)
886 else:
887 log("Xvfb would not die! kill -9! kill -9!")
888 os.kill(p.pid, 9)
890 if random.random() < 0.05:
891 #kill old xvfbs occasionally, if there are any.
892 self.kill_old_xvfbs()
894 def kill_old_xvfbs(self):
895 """Sometimes, despite everything, Xvfb instances hang around
896 well after they are wanted -- for example if the cgi process
897 dies particularly badly. So kill them if they have been
898 running for a long time."""
899 log("running kill_old_xvfbs")
900 p = Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
901 data = p.communicate()[0].strip()
902 if data:
903 lines = data.split('\n')
904 for line in lines:
905 log('dealing with ps output "%s"' % line)
906 try:
907 pid, days_, hours, minutes, seconds = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
908 except AttributeError:
909 log("Couldn't parse that line!")
910 # 50 minutes should be enough xvfb time for anyone
911 if days or hours or int(minutes) > 50:
912 log("going to kill pid %s" % pid)
913 os.kill(int(pid), 15)
914 time.sleep(0.5)
915 os.kill(int(pid), 9)
916 self.notify_watcher()
918 def cleanup(self):
919 self.cleanup_x()
920 if not config.KEEP_TEMP_FILES:
921 for fn in os.listdir(self.workdir):
922 os.remove(os.path.join(self.workdir, fn))
923 os.rmdir(self.workdir)
924 else:
925 log("NOT removing '%s', containing the following files:" % self.workdir)
926 log(*os.listdir(self.workdir))
928 self.notify_watcher()