Use correct input for side and bottom margins
[objavi2.git] / fmbook.py
blobe006eb156c674ba9db4c81aab3decdcb813d58f9
1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
3 # their interactions.
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
22 PDF"""
24 import os, sys
25 import tempfile
26 import re, time
27 import random
28 from urllib2 import urlopen
29 from subprocess import Popen, check_call, PIPE
31 import lxml.etree, lxml.html
32 import lxml, lxml.html, lxml.etree
34 import config
35 from config import SERVER_DEFAULTS, DEFAULT_SERVER, POINT_2_MM, PDFEDIT_MAX_PAGES
37 TMPDIR = os.path.abspath(config.TMPDIR)
38 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH = "%s/books/" % DOC_ROOT
42 def log(*messages, **kwargs):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs or config.DEBUG_ALL or kwargs['debug'] in config.DEBUG_MODES:
47 for m in messages:
48 try:
49 print >> sys.stderr, m
50 except Exception:
51 print >> sys.stderr, repr(m)
53 def _add_initial_number(e, n):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial = e.makeelement("strong", Class="initial")
56 e.insert(0, initial)
57 initial.tail = ' '
58 if e.text is not None:
59 initial.tail += e.text
60 e.text = ''
61 initial.text = "%s." % n
64 class TocItem(object):
65 """This makes sense of the tuples from TOC.txt files"""
66 def __init__(self, status, chapter, title):
67 # status is
68 # 0 - section heading with no chapter
69 # 1 - chapter heading
70 # 2 - book title
72 # chapter is twiki name of the chapter
73 # title is a human readable name of the chapter.
74 self.status = status
75 self.chapter = chapter
76 self.title = title
78 def is_chapter(self):
79 return self.status == '1'
81 def is_section(self):
82 return self.status == '0'
84 def __str__(self):
85 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
88 def run(cmd):
89 try:
90 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
91 out, err = p.communicate()
92 except Exception:
93 log("Failed on command: %r" % cmd)
94 raise
95 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
96 (' '.join(cmd), cmd[0], p.poll(), out, err))
99 def find_containing_paper(w, h):
100 size = None
101 for name, pw, ph in config.PAPER_SIZES:
102 if pw >= w and ph >= h:
103 mw = (pw - w) * 0.5
104 mh = (ph - h) * 0.5
105 return (name, mw, mh)
107 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
108 (w * POINT_2_MM, h * POINT_2_MM))
112 class PageSettings(object):
113 """Calculates and wraps commands for the generation and processing
114 of PDFs"""
115 def __init__(self, pointsize, **kwargs):
116 # the formulas for default gutters, margins and column margins
117 # are quite ad-hoc and certainly improvable.
119 self.width, self.height = pointsize
120 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
121 self.grey_scale = 'grey_scale' in kwargs
123 self.gutter = kwargs.get('gutter', (config.BASE_GUTTER +
124 config.PROPORTIONAL_GUTTER * self.width))
126 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
127 self.top_margin = kwargs.get('top_margin', default_margin)
128 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
129 self.side_margin = kwargs.get('side_margin', default_margin)
130 self.bottom_margin = kwargs.get('bottom_margin', default_margin)
131 self.columns = kwargs.get('columns', 1)
133 self.column_margin = kwargs.get('column_margin', default_margin * 2 / (4.0 + self.columns))
135 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
136 self.number_margin = self.side_margin
138 # calculate margins in mm for browsers
139 self.margins = []
140 for m, clip in ((self.top_margin, clipy),
141 (self.side_margin, clipx + 0.5 * self.gutter),
142 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
143 (self.side_margin, clipx + 0.5 * self.gutter),
145 if m is None:
146 m = default_margin
147 self.margins.append((m + clip) * POINT_2_MM)
149 for x in locals().iteritems():
150 log("%s: %s" % x, debug='PDFGEN')
151 for x in dir(self):
152 log("%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
156 def _webkit_command(self, html, pdf, outline=False):
157 m = [str(x) for x in self.margins]
158 outline_args = ['--outline'] * outline
159 greyscale_args = ['-g'] * self.grey_scale
160 cmd = ([config.WKHTMLTOPDF, '-q', '-s', self.papersize,
161 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
162 '-d', '100'] + outline_args + greyscale_args +
163 config.WKHTMLTOPDF_EXTRA_COMMANDS + [html, pdf])
164 log(' '.join(cmd))
165 return cmd
167 def _gecko_command(self, html, pdf, outline=False):
168 m = [str(x) for x in self.margins]
169 #firefox -P pdfprint -print URL -printprinter "printer_settings"
170 cmd = [config.FIREFOX, '-P', 'pdfprint', '-print',
171 html, '-printprinter', self.moz_printer]
172 log(' '.join(cmd))
173 return cmd
175 def make_raw_pdf(self, html, pdf, engine='webkit', outline=False):
176 func = getattr(self, '_%s_command' % engine)
177 if self.columns == 1:
178 cmd = func(html, pdf, outline=outline)
179 run(cmd)
180 else:
181 printable_width = self.width - 2.0 * self.side_margin - self.gutter
182 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
183 page_width = column_width + self.column_margin
185 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
186 gutter=0, top_margin=self.top_margin,
187 side_margin=self.column_margin * 0.5,
188 bottom_margin=self.bottom_margin)
190 column_pdf = pdf[:-4] + '-single-column.pdf'
191 columnmaker.make_raw_pdf(html, column_pdf, engine=engine, outline=outline)
192 columnmaker.reshape_pdf(column_pdf)
194 cmd = ['pdfnup',
195 '--nup', '%sx1' % int(self.columns),
196 '--paper', self.papersize.lower() + 'paper',
197 '--outfile', pdf,
198 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
199 '--noautoscale', 'true',
200 '--orient', 'portrait',
201 #'--tidy', 'false',
202 column_pdf
205 run(cmd)
209 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
210 even_pages=True):
211 """Spin the pdf for RTL text, resize it to the right size, and
212 shift the gutter left and right"""
213 ops = 'resize'
214 if self.gutter:
215 ops += ',shift'
216 if even_pages:
217 ops += ',even_pages'
218 gutter = self.gutter
219 if dir == 'RTL':
220 gutter = -gutter
221 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
222 'dir=%s' % dir,
223 'filename=%s' % pdf,
224 'output_filename=%s' % pdf,
225 'operation=%s' % ops,
226 'width=%s' % self.width,
227 'height=%s' % self.height,
228 'offset=%s' % gutter,
229 'centre_start=%s' % centre_start,
230 'centre_end=%s' % centre_end,
232 run(cmd)
234 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
235 number_start=1):
236 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
237 'operation=page_numbers',
238 'dir=%s' % dir,
239 'filename=%s' % pdf,
240 'output_filename=%s' % pdf,
241 'number_start=%s' % number_start,
242 'number_style=%s' % numbers,
243 'number_bottom=%s' % self.number_bottom,
244 'number_margin=%s' % self.number_margin,
246 run(cmd)
248 def number_pdf(self, pdf, pages, **kwargs):
249 # if there are too many pages for pdfedit to handle in one go,
250 # split the job into bits. <pages> may not be exact
251 if pages is None or pages <= PDFEDIT_MAX_PAGES:
252 self._number_pdf(pdf, **kwargs)
253 else:
254 # section_size must be even
255 sections = pages // PDFEDIT_MAX_PAGES + 1
256 section_size = (pages // sections + 2) & ~1
258 pdf_sections = []
259 s = kwargs.pop('number_start', 1)
260 while s < pages:
261 e = s + section_size - 1
262 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
263 if e < pages - 1:
264 page_range = '%s-%s' % (s, e)
265 else:
266 page_range = '%s-end' % s
267 run(['pdftk',
268 pdf,
269 'cat',
270 page_range,
271 'output',
272 pdf_section,
274 self._number_pdf(pdf_section, number_start=s, **kwargs)
275 pdf_sections.append(pdf_section)
276 s = e + 1
278 concat_pdfs(pdf, *pdf_sections)
280 def make_barcode_pdf(self, isbn, pdf, corner='br'):
281 """Put an ISBN barcode in a corner of a single blank page."""
283 position = '%s,%s,%s,%s,%s' %(corner, self.width, self.height, self.side_margin, self.bottom_margin)
284 cmd1 = [config.BOOKLAND,
285 '--position', position,
286 str(isbn)]
287 cmd2 = ['ps2pdf',
288 '-dFIXEDMEDIA',
289 '-dDEVICEWIDTHPOINTS=%s' % self.width,
290 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
291 '-', pdf]
293 p1 = Popen(cmd1, stdout=PIPE)
294 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
295 out, err = p2.communicate()
297 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
298 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
301 def count_pdf_pages(pdf):
302 """How many pages in the PDF?"""
303 #XXX could also use python-pypdf or python-poppler
304 cmd = ('pdfinfo', pdf)
305 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
306 out, err = p.communicate()
307 m = re.search(r'^\s*Pages:\s*(\d+)\s*$', re.MULTILINE)
308 return int(m.group(1))
311 def concat_pdfs(destination, *pdfs):
312 """Join all the named pdfs together into one and save it as <name>"""
313 cmd = ['pdftk']
314 cmd.extend(x for x in pdfs if x is not None)
315 cmd += ['cat', 'output', destination]
316 run(cmd)
318 def index_pdf(pdf, text=None):
319 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
320 separate pages."""
321 if text is None:
322 text = pdf + '.index.txt'
323 cmd = ['pdftotext',
324 #'-layout', #keeps more original formatting
325 pdf,
326 text]
327 run(cmd)
328 return text
330 def rotate_pdf(pdfin, pdfout):
331 """Turn the PDF on its head"""
332 cmd = ['pdftk', pdfin,
333 'cat',
334 '1-endD',
335 'output',
336 pdfout
338 run(cmd)
340 def parse_outline(pdf, level_threshold):
341 """Create a structure reflecting the outline of a PDF.
342 A chapter heading looks like this:
344 BookmarkTitle: 2. What is sound?
345 BookmarkLevel: 1
346 BookmarkPageNumber: 3
348 cmd = ('pdftk', pdf, 'dump_data')
349 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
350 outline, err = p.communicate()
351 lines = (x.strip() for x in outline.split('\n') if x.strip())
352 contents = []
354 def extract(expected, conv=str.strip):
355 line = lines.next()
356 try:
357 k, v = line.split(':', 1)
358 if k == expected:
359 return conv(v)
360 except ValueError:
361 log("trouble with line %r" %line)
363 #There are a few useless variables, then the pagecount, then the contents.
364 #The pagecount is useful, so pick it up first.
365 page_count = None
366 while page_count == None:
367 page_count = extract('NumberOfPages', int)
369 try:
370 while True:
371 title = extract('BookmarkTitle')
372 if title is not None:
373 level = extract('BookmarkLevel', int)
374 pagenum = extract('BookmarkPageNumber', int)
375 if level <= level_threshold and None not in (level, pagenum):
376 contents.append((title, level, pagenum))
377 except StopIteration:
378 pass
380 return contents, outline, page_count
383 class Book(object):
384 page_numbers = 'latin'
385 preamble_page_numbers = 'roman'
386 engine= 'webkit'
387 _try_cleanup_on_del = config.TRY_BOOK_CLEANUP_ON_DEL
389 def notify_watcher(self, message=None):
390 if self.watcher:
391 if message is None:
392 #message is the name of the caller
393 #XXX look at using inspect module
394 import traceback
395 message = traceback.extract_stack(None, 2)[0][2]
396 log("notify_watcher called with '%s'" % message)
397 self.watcher(message)
399 def __enter__(self):
400 return self
402 def __exit__(self, exc_type, exc_value, traceback):
403 self.cleanup()
404 #could deal with exceptions here and return true
406 def __init__(self, book, server, bookname,
407 page_settings=None, engine=None, watcher=None, isbn=None,
408 license=config.DEFAULT_LICENSE):
409 log("*** Starting new book %s ***" % bookname)
410 self.book = book
411 self.server = server
412 self.watcher = watcher
413 self.isbn = isbn
414 self.license = license
415 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
416 os.chmod(self.workdir, 0755)
417 defaults = SERVER_DEFAULTS[server]
418 self.lang = defaults['lang']
419 self.dir = defaults['dir']
421 self.body_html_file = self.filepath('body.html')
422 self.body_pdf_file = self.filepath('body.pdf')
423 self.body_index_file = self.filepath('body.txt')
424 self.preamble_html_file = self.filepath('preamble.html')
425 self.preamble_pdf_file = self.filepath('preamble.pdf')
426 self.tail_html_file = self.filepath('tail.html')
427 self.tail_pdf_file = self.filepath('tail.pdf')
428 self.isbn_pdf_file = None
429 self.pdf_file = self.filepath('final.pdf')
431 self.publish_name = bookname
432 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
433 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
435 self.book_url = config.BOOK_URL % (self.server, self.book)
436 self.toc_url = config.TOC_URL % (self.server, self.book)
438 self.maker = PageSettings(**page_settings)
440 if engine is not None:
441 self.engine = engine
442 self.notify_watcher()
444 def __del__(self):
445 if self._try_cleanup_on_del and os.path.exists(self.workdir):
446 self._try_cleanup_on_del = False #or else you can get in bad cycles
447 self.cleanup()
449 def __getattr__(self, attr):
450 """catch unloaded books and load them"""
451 #log('looking for missing attribute "%s"' % (attr))
452 if attr == 'tree':
453 self.load_book()
454 return self.tree
455 if attr == 'toc':
456 self.load_toc()
457 return self.toc
458 raise AttributeError("no such member: '%s'" % attr)
461 def filepath(self, fn):
462 return os.path.join(self.workdir, fn)
464 def save_data(self, fn, data):
465 """Save without tripping up on unicode"""
466 if isinstance(data, unicode):
467 data = data.encode('utf8', 'ignore')
468 f = open(fn, 'w')
469 f.write(data)
470 f.close()
472 def save_tempfile(self, fn, data):
473 """Save the data in a temporary directory that will be cleaned
474 up when all is done. Return the absolute file path."""
475 fn = self.filepath(fn)
476 self.save_data(fn, data)
477 return fn
479 def extract_pdf_outline(self):
480 self.outline_contents, self.outline_text, number_of_pages = parse_outline(self.body_pdf_file, 1)
481 for x in self.outline_contents:
482 log(x)
483 return number_of_pages
485 def make_body_pdf(self):
486 """Make a pdf of the HTML, using webkit"""
487 #1. Save the html
488 html_text = lxml.etree.tostring(self.tree, method="html")
489 self.save_data(self.body_html_file, html_text)
491 #2. Make a pdf of it
492 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file,
493 engine=self.engine, outline=True)
494 self.notify_watcher('generate_pdf')
496 n_pages = self.extract_pdf_outline()
498 log ("found %s pages in pdf" % n_pages)
499 #4. resize pages, shift gutters, even pages
500 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
501 self.notify_watcher('reshape_pdf')
503 #5 add page numbers
504 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
505 numbers=self.page_numbers)
506 self.notify_watcher("number_pdf")
507 self.notify_watcher()
509 def make_preamble_pdf(self):
510 contents = self.make_contents()
511 inside_cover_html = self.compose_inside_cover()
512 html = ('<html dir="%s"><head>\n'
513 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
514 '<link rel="stylesheet" href="%s" />\n'
515 '</head>\n<body>\n'
516 '<h1 class="frontpage">%s</h1>'
517 '%s\n'
518 '<div class="contents">%s</div>\n'
519 '<div style="page-break-after: always; color:#fff" class="unseen">.'
520 '<!--%s--></div></body></html>'
521 ) % (self.dir, self.css_url, self.title, inside_cover_html,
522 contents, self.title)
523 self.save_data(self.preamble_html_file, html)
525 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file,
526 engine=self.engine)
528 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
530 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
531 numbers=self.preamble_page_numbers,
532 number_start=-2)
534 self.notify_watcher()
536 def make_end_matter_pdf(self):
537 """Make an inside back cover and a back cover. If there is an
538 isbn number its barcode will be put on the back cover."""
539 if self.isbn:
540 self.isbn_pdf_file = self.filepath('isbn.pdf')
541 self.maker.make_barcode_pdf(self.isbn, self.isbn_pdf_file)
542 self.notify_watcher('make_barcode_pdf')
544 self.save_data(self.tail_html_file, self.compose_end_matter())
545 self.maker.make_raw_pdf(self.tail_html_file, self.tail_pdf_file,
546 engine=self.engine)
548 self.maker.reshape_pdf(self.tail_pdf_file, self.dir, centre_start=True,
549 centre_end=True, even_pages=False)
550 self.notify_watcher()
552 def make_book_pdf(self):
553 """A convenient wrapper of a few necessary steps"""
554 # now the Xvfb server is needed. make sure it has had long enough to get going
555 self.wait_for_xvfb()
556 self.make_body_pdf()
557 self.make_preamble_pdf()
558 self.make_end_matter_pdf()
560 concat_pdfs(self.pdf_file, self.preamble_pdf_file,
561 self.body_pdf_file, self.tail_pdf_file,
562 self.isbn_pdf_file)
564 self.notify_watcher('concatenated_pdfs')
567 def make_simple_pdf(self, mode):
568 """Make a simple pdf document without contents or separate
569 title page. This is used for multicolumn newspapers and for
570 web-destined pdfs."""
571 self.wait_for_xvfb()
572 #0. Add heading to begining of html
573 body = list(self.tree.cssselect('body'))[0]
574 e = body.makeelement('h1', {'id': 'book-title'})
575 e.text = self.title
576 body.insert(0, e)
577 intro = lxml.html.fragment_fromstring(self.compose_inside_cover())
578 e.addnext(intro)
580 #0.5 adjust parameters to suit the particular kind of output
581 if mode == 'web':
582 self.maker.gutter = 0
584 #1. Save the html
585 html_text = lxml.etree.tostring(self.tree, method="html")
586 self.save_data(self.body_html_file, html_text)
588 #2. Make a pdf of it (direct to to final pdf)
589 self.maker.make_raw_pdf(self.body_html_file, self.pdf_file,
590 engine=self.engine, outline=True)
591 self.notify_watcher('generate_pdf')
592 n_pages = self.extract_pdf_outline()
594 if mode != 'web':
595 #3. resize pages and shift gutters.
596 self.maker.reshape_pdf(self.pdf_file, self.dir, centre_end=True)
597 self.notify_watcher('reshape_pdf')
599 #4. add page numbers
600 self.maker.number_pdf(self.pdf_file, n_pages,
601 dir=self.dir, numbers=self.page_numbers)
602 self.notify_watcher("number_pdf")
603 self.notify_watcher()
606 def rotate180(self):
607 """Rotate the pdf 180 degrees so an RTL book can print on LTR
608 presses."""
609 rotated = self.filepath('final-rotate.pdf')
610 unrotated = self.filepath('final-pre-rotate.pdf')
611 #leave the unrotated pdf intact at first, in case of error.
612 rotate_pdf(self.pdf_file, rotated)
613 os.rename(self.pdf_file, unrotated)
614 os.rename(rotated, self.pdf_file)
615 self.notify_watcher()
617 def publish_pdf(self):
618 """Move the finished PDF to its final resting place"""
619 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
620 os.rename(self.pdf_file, self.publish_file)
621 self.notify_watcher()
623 def load_toc(self):
624 """From the TOC.txt file create a list of TocItems with
625 the attributes <status>, <chapter>, and <title>.
627 <status> is a number, with the following meaning:
629 0 - section heading with no chapter
630 1 - chapter heading
631 2 - book title
633 The TocItem object has convenience functions <is_chapter> and
634 <is_section>.
636 <chapter> is twiki name of the chapter.
638 <title> is a human readable title for the chapter. It is likely to
639 differ from the title given in the chapter's <h1> heading.
641 f = urlopen(self.toc_url)
642 self.toc = []
643 while True:
644 try:
645 self.toc.append(TocItem(f.next().strip(),
646 f.next().strip(),
647 f.next().strip()))
648 except StopIteration:
649 break
650 f.close()
651 self.notify_watcher()
653 def load_book(self, tidy=True):
654 """Fetch and parse the raw html of the book. If tidy is true
655 (default) links in the document will be made absolute."""
656 f = urlopen(self.book_url)
657 html = f.read()
658 f.close()
659 html = ('<html dir="%s"><head>\n<title>%s</title>\n'
660 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
661 '</head>\n<body>\n'
662 '%s\n'
663 '<div style="page-break-before: always; color:#fff;" class="unseen">'
664 'A FLOSSManuals book</div>\n</body></html>'
665 ) % (self.dir, self.book, html)
667 self.save_tempfile('raw.html', html)
669 tree = lxml.html.document_fromstring(html)
670 if tidy:
671 tree.make_links_absolute(self.book_url)
672 self.tree = tree
673 self.headings = [x for x in tree.cssselect('h1')]
674 if self.headings:
675 self.headings[0].set('class', "first-heading")
676 for h1 in self.headings:
677 h1.title = h1.text_content().strip()
678 self.notify_watcher()
680 def load(self):
681 """Wrapper around all necessary load methods."""
682 self.load_book()
683 self.load_toc()
685 def make_contents(self):
686 """Generate HTML containing the table of contents. This can
687 only be done after the main PDF has been made."""
688 header = '<h1>Table of Contents</h1><table class="toc">\n'
689 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
690 '<td class="pagenumber">%s</td></tr>\n')
691 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
692 footer = '\n</table>'
694 contents = []
696 chapter = 1
697 page_num = 1
698 subsections = [] # for the subsection heading pages.
700 outline_contents = iter(self.outline_contents)
701 headings = iter(self.headings)
703 for t in self.toc:
704 if t.is_chapter():
705 try:
706 h1 = headings.next()
707 except StopIteration:
708 log("heading not found for %s (previous h1 missing?). Stopping" % t)
709 break
710 h1_text, level, page_num = outline_contents.next()
711 log("%r %r" % (h1.title, h1_text))
712 contents.append(row_tmpl % (chapter, h1.title, page_num))
713 chapter += 1
714 elif t.is_section():
715 contents.append(section_tmpl % t.title)
716 else:
717 log("mystery TOC item: %s" % t)
719 doc = header + '\n'.join(contents) + footer
720 self.notify_watcher()
721 return doc
723 def add_section_titles(self):
724 """Add any section heading pages that the TOC.txt file
725 specifies. These are sub-book, super-chapter groupings.
727 Also add initial numbers to chapters.
729 headings = iter(self.headings)
730 chapter = 1
731 section = None
733 for t in self.toc:
734 if t.is_chapter() and section is not None:
735 try:
736 h1 = headings.next()
737 except StopIteration:
738 log("heading not found for %s (previous h1 missing?)" % t)
739 break
740 item = h1.makeelement('div', Class='chapter')
741 log(h1.title, debug='HTMLGEN')
742 item.text = h1.title
743 _add_initial_number(item, chapter)
745 section.append(item)
747 if not section_placed:
748 log("placing section", debug='HTMLGEN')
749 h1.addprevious(section)
750 section_placed = True
751 else:
752 log("NOT placing section", debug='HTMLGEN')
754 #put a bold number at the beginning of the h1.
755 _add_initial_number(h1, chapter)
756 chapter += 1
758 elif t.is_section():
759 section = self.tree.makeelement('div', Class="subsection")
760 # section Element complains when you try to ask it whether it
761 # has been placed (though it does know)
762 section_placed = False
763 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
764 heading.set("Class", "subsection-heading")
765 section.append(heading)
767 self.notify_watcher()
770 def add_css(self, css=None, mode='book'):
771 """If css looks like a url, use it as a stylesheet link.
772 Otherwise it is the CSS itself, which is saved to a temporary file
773 and linked to."""
774 log("css is %r" % css)
775 htmltree = self.tree
776 if css is None or not css.strip():
777 defaults = SERVER_DEFAULTS[self.server]
778 url = 'file://' + os.path.abspath(defaults['css-%s' % mode])
779 elif not re.match(r'^http://\S+$', css):
780 fn = self.save_tempfile('objavi.css', css)
781 url = 'file://' + fn
782 else:
783 url = css
784 #XXX for debugging and perhaps sensible anyway
785 #url = url.replace('file:///home/douglas/objavi2', '')
788 #find the head -- it's probably first child but lets not assume.
789 for child in htmltree:
790 if child.tag == 'head':
791 head = child
792 break
793 else:
794 head = htmltree.makeelement('head')
795 htmltree.insert(0, head)
797 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
798 self.css_url = url
799 self.notify_watcher()
800 return url
802 def set_title(self, title=None):
803 """If a string is supplied, it becomes the book's title.
804 Otherwise a guess is made."""
805 if title:
806 self.title = title
807 else:
808 titles = [x.text_content() for x in self.tree.cssselect('title')]
809 if titles and titles[0]:
810 self.title = titles[0]
811 else:
812 #oh well
813 self.title = 'A Manual About ' + self.book
814 return self.title
816 def _read_localised_template(self, template, fallbacks=['en']):
817 """Try to get the template in the approriate language, otherwise in english."""
818 for lang in [self.lang] + fallbacks:
819 try:
820 fn = template % (lang)
821 f = open(fn)
822 break
823 except IOError, e:
824 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
825 log(e)
826 template = f.read()
827 f.close()
828 return template
830 def compose_inside_cover(self):
831 """create the markup for the preamble inside cover."""
832 template = self._read_localised_template(config.INSIDE_FRONT_COVER_TEMPLATE)
834 if self.isbn:
835 isbn_text = '<b>ISBN :</b> %s <br>' % self.isbn
836 else:
837 isbn_text = ''
839 return template % {'date': time.strftime('%Y-%m-%d'),
840 'isbn': isbn_text,
841 'license': self.license,
845 def compose_end_matter(self):
846 """create the markup for the end_matter inside cover. If
847 self.isbn is not set, the html will result in a pdf that
848 spills onto two pages.
850 template = self._read_localised_template(config.END_MATTER_TEMPLATE)
852 d = {'css_url': self.css_url,
853 'title': self.title
856 if self.isbn:
857 d['inside_cover_style'] = ''
858 else:
859 d['inside_cover_style'] = 'page-break-after: always'
861 return template % d
866 def spawn_x(self):
867 """Start an Xvfb instance, using a new server number. A
868 reference to it is stored in self.xvfb, which is used to kill
869 it when the pdf is done.
871 Note that Xvfb doesn't interact well with dbus which is
872 present on modern desktops.
874 #Find an unused server number (in case two cgis are running at once)
875 while True:
876 servernum = random.randrange(50, 500)
877 if not os.path.exists('/tmp/.X%s-lock' % servernum):
878 break
880 self.xserver_no = ':%s' % servernum
882 authfile = self.filepath('Xauthority')
883 os.environ['XAUTHORITY'] = authfile
885 #mcookie(1) eats into /dev/random, so avoid that
886 from hashlib import md5
887 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
888 mcookie = m.hexdigest()
890 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
892 self.xvfb = Popen(['Xvfb', self.xserver_no,
893 '-screen', '0', '1024x768x24',
894 '-pixdepths', '32',
895 #'-blackpixel', '0',
896 #'-whitepixel', str(2 ** 24 -1),
897 #'+extension', 'Composite',
898 '-dpi', '96',
899 '-kb',
900 '-nolisten', 'tcp',
903 # We need to wait a bit before the Xvfb is ready. but the
904 # downloads are so slow that that probably doesn't matter
906 self.xvfb_ready_time = time.time() + 2
908 os.environ['DISPLAY'] = self.xserver_no
909 log(self.xserver_no)
911 def wait_for_xvfb(self):
912 """wait until a previously set time before continuing. This
913 is so Xvfb has time to properly start."""
914 if hasattr(self, 'xvfb'):
915 d = self.xvfb_ready_time - time.time()
916 if d > 0:
917 time.sleep(d)
918 self.notify_watcher()
920 def cleanup_x(self):
921 """Try very hard to kill off Xvfb. In addition to killing
922 this instance's xvfb, occasionally (randomly) search for
923 escaped Xvfb instances and kill those too."""
924 if not hasattr(self, 'xvfb'):
925 return
926 check_call(['xauth', 'remove', self.xserver_no])
927 p = self.xvfb
928 log("trying to kill Xvfb %s" % p.pid)
929 os.kill(p.pid, 15)
930 for i in range(10):
931 if p.poll() is not None:
932 log("%s died with %s" % (p.pid, p.poll()))
933 break
934 log("%s not dead yet" % p.pid)
935 time.sleep(0.2)
936 else:
937 log("Xvfb would not die! kill -9! kill -9!")
938 os.kill(p.pid, 9)
940 if random.random() < 0.05:
941 #kill old xvfbs occasionally, if there are any.
942 self.kill_old_xvfbs()
944 def kill_old_xvfbs(self):
945 """Sometimes, despite everything, Xvfb instances hang around
946 well after they are wanted -- for example if the cgi process
947 dies particularly badly. So kill them if they have been
948 running for a long time."""
949 log("running kill_old_xvfbs")
950 p = Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
951 data = p.communicate()[0].strip()
952 if data:
953 lines = data.split('\n')
954 for line in lines:
955 log('dealing with ps output "%s"' % line)
956 try:
957 pid, days_, hours, minutes, seconds = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
958 except AttributeError:
959 log("Couldn't parse that line!")
960 # 50 minutes should be enough xvfb time for anyone
961 if days or hours or int(minutes) > 50:
962 log("going to kill pid %s" % pid)
963 os.kill(int(pid), 15)
964 time.sleep(0.5)
965 os.kill(int(pid), 9)
966 self.notify_watcher()
968 def cleanup(self):
969 self.cleanup_x()
970 if not config.KEEP_TEMP_FILES:
971 for fn in os.listdir(self.workdir):
972 os.remove(os.path.join(self.workdir, fn))
973 os.rmdir(self.workdir)
974 else:
975 log("NOT removing '%s', containing the following files:" % self.workdir)
976 log(*os.listdir(self.workdir))
978 self.notify_watcher()