Don't define Book.__del__ if it is not doing anything
[objavi2.git] / fmbook.py
blob7624252c782e4f2bcb7df5ba40184efb202a8e92
1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
3 # their interactions.
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
22 PDF"""
24 import os, sys
25 import tempfile
26 import re, time
27 import random
28 from urllib2 import urlopen
29 from subprocess import Popen, check_call, PIPE
31 import lxml.etree, lxml.html
32 import lxml, lxml.html, lxml.etree
34 import config
35 from config import SERVER_DEFAULTS, DEFAULT_SERVER, POINT_2_MM, PDFEDIT_MAX_PAGES
37 TMPDIR = os.path.abspath(config.TMPDIR)
38 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH = "%s/books/" % DOC_ROOT
42 def log(*messages, **kwargs):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs or config.DEBUG_ALL or kwargs['debug'] in config.DEBUG_MODES:
47 for m in messages:
48 try:
49 print >> sys.stderr, m
50 except Exception:
51 print >> sys.stderr, repr(m)
53 def _add_initial_number(e, n):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial = e.makeelement("strong", Class="initial")
56 e.insert(0, initial)
57 initial.tail = ' '
58 if e.text is not None:
59 initial.tail += e.text
60 e.text = ''
61 initial.text = "%s." % n
64 class TocItem(object):
65 """This makes sense of the tuples from TOC.txt files"""
66 def __init__(self, status, chapter, title):
67 # status is
68 # 0 - section heading with no chapter
69 # 1 - chapter heading
70 # 2 - book title
72 # chapter is twiki name of the chapter
73 # title is a human readable name of the chapter.
74 self.status = status
75 self.chapter = chapter
76 self.title = title
78 def is_chapter(self):
79 return self.status == '1'
81 def is_section(self):
82 return self.status == '0'
84 def __str__(self):
85 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
88 def run(cmd):
89 try:
90 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
91 out, err = p.communicate()
92 except Exception:
93 log("Failed on command: %r" % cmd)
94 raise
95 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
96 (' '.join(cmd), cmd[0], p.poll(), out, err))
99 def find_containing_paper(w, h):
100 size = None
101 for name, pw, ph in config.PAPER_SIZES:
102 if pw >= w and ph >= h:
103 mw = (pw - w) * 0.5
104 mh = (ph - h) * 0.5
105 return (name, mw, mh)
107 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
108 (w * POINT_2_MM, h * POINT_2_MM))
112 class PageSettings(object):
113 """Calculates and wraps commands for the generation and processing
114 of PDFs"""
115 def __init__(self, pointsize, **kwargs):
116 # the formulas for default gutters, margins and column margins
117 # are quite ad-hoc and certainly improvable.
119 self.width, self.height = pointsize
120 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
121 self.grey_scale = 'grey_scale' in kwargs
123 # All measurements in points unless otherwise stated
124 # user interaction is in *mm*, but is converted in objavi2.py
125 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
126 default_gutter = (config.BASE_GUTTER + config.PROPORTIONAL_GUTTER * self.width)
128 self.top_margin = kwargs.get('top_margin', default_margin)
129 self.side_margin = kwargs.get('side_margin', default_margin)
130 self.bottom_margin = kwargs.get('bottom_margin', default_margin)
131 self.gutter = kwargs.get('gutter', default_gutter)
133 self.columns = kwargs.get('columns', 1)
134 if self.columns == 'auto': #default for newspapers is to work out columns
135 self.columns = int(self.width // config.MIN_COLUMN_WIDTH)
137 self.column_margin = kwargs.get('column_margin',
138 default_margin * 2 / (5.0 + self.columns))
140 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
141 self.number_margin = self.side_margin
143 # calculate margins in mm for browsers
144 self.margins = []
145 for m, clip in ((self.top_margin, clipy),
146 (self.side_margin, clipx + 0.5 * self.gutter),
147 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
148 (self.side_margin, clipx + 0.5 * self.gutter),
150 self.margins.append((m + clip) * POINT_2_MM)
152 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
153 for x in locals().iteritems():
154 log("%s: %s" % x, debug='PDFGEN')
155 for x in dir(self):
156 log("%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
160 def _webkit_command(self, html, pdf, outline=False):
161 m = [str(x) for x in self.margins]
162 outline_args = ['--outline'] * outline
163 greyscale_args = ['-g'] * self.grey_scale
164 cmd = ([config.WKHTMLTOPDF, '-q', '-s', self.papersize,
165 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
166 '-d', '100'] + outline_args + greyscale_args +
167 config.WKHTMLTOPDF_EXTRA_COMMANDS + [html, pdf])
168 log(' '.join(cmd))
169 return cmd
171 def _gecko_command(self, html, pdf, outline=False):
172 m = [str(x) for x in self.margins]
173 #firefox -P pdfprint -print URL -printprinter "printer_settings"
174 cmd = [config.FIREFOX, '-P', 'pdfprint', '-print',
175 html, '-printprinter', self.moz_printer]
176 log(' '.join(cmd))
177 return cmd
179 def make_raw_pdf(self, html, pdf, engine='webkit', outline=False):
180 func = getattr(self, '_%s_command' % engine)
181 if self.columns == 1:
182 cmd = func(html, pdf, outline=outline)
183 run(cmd)
184 else:
185 printable_width = self.width - 2.0 * self.side_margin - self.gutter
186 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
187 page_width = column_width + self.column_margin
188 side_margin = self.column_margin * 0.5
190 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
191 gutter=0, top_margin=self.top_margin,
192 side_margin=side_margin,
193 bottom_margin=self.bottom_margin,
194 grey_scale=self.grey_scale,
197 column_pdf = pdf[:-4] + '-single-column.pdf'
198 columnmaker.make_raw_pdf(html, column_pdf, engine=engine, outline=outline)
199 columnmaker.reshape_pdf(column_pdf)
201 cmd = ['pdfnup',
202 '--nup', '%sx1' % int(self.columns),
203 '--paper', self.papersize.lower() + 'paper',
204 '--outfile', pdf,
205 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
206 '--noautoscale', 'true',
207 '--orient', 'portrait',
208 #'--tidy', 'false',
209 column_pdf
212 run(cmd)
216 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
217 even_pages=True):
218 """Spin the pdf for RTL text, resize it to the right size, and
219 shift the gutter left and right"""
220 ops = 'resize'
221 if self.gutter:
222 ops += ',shift'
223 if even_pages:
224 ops += ',even_pages'
225 gutter = self.gutter
226 if dir == 'RTL':
227 gutter = -gutter
228 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
229 'dir=%s' % dir,
230 'filename=%s' % pdf,
231 'output_filename=%s' % pdf,
232 'operation=%s' % ops,
233 'width=%s' % self.width,
234 'height=%s' % self.height,
235 'offset=%s' % gutter,
236 'centre_start=%s' % centre_start,
237 'centre_end=%s' % centre_end,
239 run(cmd)
241 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
242 number_start=1):
243 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
244 'operation=page_numbers',
245 'dir=%s' % dir,
246 'filename=%s' % pdf,
247 'output_filename=%s' % pdf,
248 'number_start=%s' % number_start,
249 'number_style=%s' % numbers,
250 'number_bottom=%s' % self.number_bottom,
251 'number_margin=%s' % self.number_margin,
253 run(cmd)
255 def number_pdf(self, pdf, pages, **kwargs):
256 # if there are too many pages for pdfedit to handle in one go,
257 # split the job into bits. <pages> may not be exact
258 if pages is None or pages <= PDFEDIT_MAX_PAGES:
259 self._number_pdf(pdf, **kwargs)
260 else:
261 # section_size must be even
262 sections = pages // PDFEDIT_MAX_PAGES + 1
263 section_size = (pages // sections + 2) & ~1
265 pdf_sections = []
266 s = kwargs.pop('number_start', 1)
267 while s < pages:
268 e = s + section_size - 1
269 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
270 if e < pages - 1:
271 page_range = '%s-%s' % (s, e)
272 else:
273 page_range = '%s-end' % s
274 run(['pdftk',
275 pdf,
276 'cat',
277 page_range,
278 'output',
279 pdf_section,
281 self._number_pdf(pdf_section, number_start=s, **kwargs)
282 pdf_sections.append(pdf_section)
283 s = e + 1
285 concat_pdfs(pdf, *pdf_sections)
287 def make_barcode_pdf(self, isbn, pdf, corner='br'):
288 """Put an ISBN barcode in a corner of a single blank page."""
290 position = '%s,%s,%s,%s,%s' %(corner, self.width, self.height, self.side_margin, self.bottom_margin)
291 cmd1 = [config.BOOKLAND,
292 '--position', position,
293 str(isbn)]
294 cmd2 = ['ps2pdf',
295 '-dFIXEDMEDIA',
296 '-dDEVICEWIDTHPOINTS=%s' % self.width,
297 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
298 '-', pdf]
300 p1 = Popen(cmd1, stdout=PIPE)
301 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
302 out, err = p2.communicate()
304 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
305 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
308 def count_pdf_pages(pdf):
309 """How many pages in the PDF?"""
310 #XXX could also use python-pypdf or python-poppler
311 cmd = ('pdfinfo', pdf)
312 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
313 out, err = p.communicate()
314 m = re.search(r'^\s*Pages:\s*(\d+)\s*$', re.MULTILINE)
315 return int(m.group(1))
318 def concat_pdfs(destination, *pdfs):
319 """Join all the named pdfs together into one and save it as <name>"""
320 cmd = ['pdftk']
321 cmd.extend(x for x in pdfs if x is not None)
322 cmd += ['cat', 'output', destination]
323 run(cmd)
325 def index_pdf(pdf, text=None):
326 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
327 separate pages."""
328 if text is None:
329 text = pdf + '.index.txt'
330 cmd = ['pdftotext',
331 #'-layout', #keeps more original formatting
332 pdf,
333 text]
334 run(cmd)
335 return text
337 def rotate_pdf(pdfin, pdfout):
338 """Turn the PDF on its head"""
339 cmd = ['pdftk', pdfin,
340 'cat',
341 '1-endD',
342 'output',
343 pdfout
345 run(cmd)
347 def parse_outline(pdf, level_threshold):
348 """Create a structure reflecting the outline of a PDF.
349 A chapter heading looks like this:
351 BookmarkTitle: 2. What is sound?
352 BookmarkLevel: 1
353 BookmarkPageNumber: 3
355 cmd = ('pdftk', pdf, 'dump_data')
356 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
357 outline, err = p.communicate()
358 lines = (x.strip() for x in outline.split('\n') if x.strip())
359 contents = []
361 def extract(expected, conv=str.strip):
362 line = lines.next()
363 try:
364 k, v = line.split(':', 1)
365 if k == expected:
366 return conv(v)
367 except ValueError:
368 log("trouble with line %r" %line)
370 #There are a few useless variables, then the pagecount, then the contents.
371 #The pagecount is useful, so pick it up first.
372 page_count = None
373 while page_count == None:
374 page_count = extract('NumberOfPages', int)
376 try:
377 while True:
378 title = extract('BookmarkTitle')
379 if title is not None:
380 level = extract('BookmarkLevel', int)
381 pagenum = extract('BookmarkPageNumber', int)
382 if level <= level_threshold and None not in (level, pagenum):
383 contents.append((title, level, pagenum))
384 except StopIteration:
385 pass
387 return contents, outline, page_count
390 class Book(object):
391 page_numbers = 'latin'
392 preamble_page_numbers = 'roman'
393 engine= 'webkit'
394 _try_cleanup_on_del = config.TRY_BOOK_CLEANUP_ON_DEL
396 def notify_watcher(self, message=None):
397 if self.watcher:
398 if message is None:
399 #message is the name of the caller
400 #XXX look at using inspect module
401 import traceback
402 message = traceback.extract_stack(None, 2)[0][2]
403 log("notify_watcher called with '%s'" % message)
404 self.watcher(message)
406 def __enter__(self):
407 return self
409 def __exit__(self, exc_type, exc_value, traceback):
410 self.cleanup()
411 #could deal with exceptions here and return true
413 def __init__(self, book, server, bookname,
414 page_settings=None, engine=None, watcher=None, isbn=None,
415 license=config.DEFAULT_LICENSE):
416 log("*** Starting new book %s ***" % bookname)
417 self.book = book
418 self.server = server
419 self.watcher = watcher
420 self.isbn = isbn
421 self.license = license
422 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
423 os.chmod(self.workdir, 0755)
424 defaults = SERVER_DEFAULTS[server]
425 self.lang = defaults['lang']
426 self.dir = defaults['dir']
428 self.body_html_file = self.filepath('body.html')
429 self.body_pdf_file = self.filepath('body.pdf')
430 self.body_index_file = self.filepath('body.txt')
431 self.preamble_html_file = self.filepath('preamble.html')
432 self.preamble_pdf_file = self.filepath('preamble.pdf')
433 self.tail_html_file = self.filepath('tail.html')
434 self.tail_pdf_file = self.filepath('tail.pdf')
435 self.isbn_pdf_file = None
436 self.pdf_file = self.filepath('final.pdf')
438 self.publish_name = bookname
439 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
440 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
442 self.book_url = config.BOOK_URL % (self.server, self.book)
443 self.toc_url = config.TOC_URL % (self.server, self.book)
445 self.maker = PageSettings(**page_settings)
447 if engine is not None:
448 self.engine = engine
449 self.notify_watcher()
451 if config.TRY_BOOK_CLEANUP_ON_DEL:
452 #Dont even define __del__ if it is not used.
453 _try_cleanup_on_del = True
454 def __del__(self):
455 if self._try_cleanup_on_del and os.path.exists(self.workdir):
456 self._try_cleanup_on_del = False #or else you can get in bad cycles
457 self.cleanup()
459 def __getattr__(self, attr):
460 """catch unloaded books and load them"""
461 #log('looking for missing attribute "%s"' % (attr))
462 if attr == 'tree':
463 self.load_book()
464 return self.tree
465 if attr == 'toc':
466 self.load_toc()
467 return self.toc
468 raise AttributeError("no such member: '%s'" % attr)
471 def filepath(self, fn):
472 return os.path.join(self.workdir, fn)
474 def save_data(self, fn, data):
475 """Save without tripping up on unicode"""
476 if isinstance(data, unicode):
477 data = data.encode('utf8', 'ignore')
478 f = open(fn, 'w')
479 f.write(data)
480 f.close()
482 def save_tempfile(self, fn, data):
483 """Save the data in a temporary directory that will be cleaned
484 up when all is done. Return the absolute file path."""
485 fn = self.filepath(fn)
486 self.save_data(fn, data)
487 return fn
489 def extract_pdf_outline(self):
490 self.outline_contents, self.outline_text, number_of_pages = parse_outline(self.body_pdf_file, 1)
491 for x in self.outline_contents:
492 log(x)
493 return number_of_pages
495 def make_body_pdf(self):
496 """Make a pdf of the HTML, using webkit"""
497 #1. Save the html
498 html_text = lxml.etree.tostring(self.tree, method="html")
499 self.save_data(self.body_html_file, html_text)
501 #2. Make a pdf of it
502 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file,
503 engine=self.engine, outline=True)
504 self.notify_watcher('generate_pdf')
506 n_pages = self.extract_pdf_outline()
508 log ("found %s pages in pdf" % n_pages)
509 #4. resize pages, shift gutters, even pages
510 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
511 self.notify_watcher('reshape_pdf')
513 #5 add page numbers
514 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
515 numbers=self.page_numbers)
516 self.notify_watcher("number_pdf")
517 self.notify_watcher()
519 def make_preamble_pdf(self):
520 contents = self.make_contents()
521 inside_cover_html = self.compose_inside_cover()
522 html = ('<html dir="%s"><head>\n'
523 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
524 '<link rel="stylesheet" href="%s" />\n'
525 '</head>\n<body>\n'
526 '<h1 class="frontpage">%s</h1>'
527 '%s\n'
528 '<div class="contents">%s</div>\n'
529 '<div style="page-break-after: always; color:#fff" class="unseen">.'
530 '<!--%s--></div></body></html>'
531 ) % (self.dir, self.css_url, self.title, inside_cover_html,
532 contents, self.title)
533 self.save_data(self.preamble_html_file, html)
535 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file,
536 engine=self.engine)
538 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
540 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
541 numbers=self.preamble_page_numbers,
542 number_start=-2)
544 self.notify_watcher()
546 def make_end_matter_pdf(self):
547 """Make an inside back cover and a back cover. If there is an
548 isbn number its barcode will be put on the back cover."""
549 if self.isbn:
550 self.isbn_pdf_file = self.filepath('isbn.pdf')
551 self.maker.make_barcode_pdf(self.isbn, self.isbn_pdf_file)
552 self.notify_watcher('make_barcode_pdf')
554 self.save_data(self.tail_html_file, self.compose_end_matter())
555 self.maker.make_raw_pdf(self.tail_html_file, self.tail_pdf_file,
556 engine=self.engine)
558 self.maker.reshape_pdf(self.tail_pdf_file, self.dir, centre_start=True,
559 centre_end=True, even_pages=False)
560 self.notify_watcher()
562 def make_book_pdf(self):
563 """A convenient wrapper of a few necessary steps"""
564 # now the Xvfb server is needed. make sure it has had long enough to get going
565 self.wait_for_xvfb()
566 self.make_body_pdf()
567 self.make_preamble_pdf()
568 self.make_end_matter_pdf()
570 concat_pdfs(self.pdf_file, self.preamble_pdf_file,
571 self.body_pdf_file, self.tail_pdf_file,
572 self.isbn_pdf_file)
574 self.notify_watcher('concatenated_pdfs')
577 def make_simple_pdf(self, mode):
578 """Make a simple pdf document without contents or separate
579 title page. This is used for multicolumn newspapers and for
580 web-destined pdfs."""
581 self.wait_for_xvfb()
582 #0. Add heading to begining of html
583 body = list(self.tree.cssselect('body'))[0]
584 e = body.makeelement('h1', {'id': 'book-title'})
585 e.text = self.title
586 body.insert(0, e)
587 intro = lxml.html.fragment_fromstring(self.compose_inside_cover())
588 e.addnext(intro)
590 #0.5 adjust parameters to suit the particular kind of output
591 if mode == 'web':
592 self.maker.gutter = 0
594 #1. Save the html
595 html_text = lxml.etree.tostring(self.tree, method="html")
596 self.save_data(self.body_html_file, html_text)
598 #2. Make a pdf of it (direct to to final pdf)
599 self.maker.make_raw_pdf(self.body_html_file, self.pdf_file,
600 engine=self.engine, outline=True)
601 self.notify_watcher('generate_pdf')
602 n_pages = self.extract_pdf_outline()
604 if mode != 'web':
605 #3. resize pages and shift gutters.
606 self.maker.reshape_pdf(self.pdf_file, self.dir, centre_end=True)
607 self.notify_watcher('reshape_pdf')
609 #4. add page numbers
610 self.maker.number_pdf(self.pdf_file, n_pages,
611 dir=self.dir, numbers=self.page_numbers)
612 self.notify_watcher("number_pdf")
613 self.notify_watcher()
616 def rotate180(self):
617 """Rotate the pdf 180 degrees so an RTL book can print on LTR
618 presses."""
619 rotated = self.filepath('final-rotate.pdf')
620 unrotated = self.filepath('final-pre-rotate.pdf')
621 #leave the unrotated pdf intact at first, in case of error.
622 rotate_pdf(self.pdf_file, rotated)
623 os.rename(self.pdf_file, unrotated)
624 os.rename(rotated, self.pdf_file)
625 self.notify_watcher()
627 def publish_pdf(self):
628 """Move the finished PDF to its final resting place"""
629 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
630 os.rename(self.pdf_file, self.publish_file)
631 self.notify_watcher()
633 def load_toc(self):
634 """From the TOC.txt file create a list of TocItems with
635 the attributes <status>, <chapter>, and <title>.
637 <status> is a number, with the following meaning:
639 0 - section heading with no chapter
640 1 - chapter heading
641 2 - book title
643 The TocItem object has convenience functions <is_chapter> and
644 <is_section>.
646 <chapter> is twiki name of the chapter.
648 <title> is a human readable title for the chapter. It is likely to
649 differ from the title given in the chapter's <h1> heading.
651 f = urlopen(self.toc_url)
652 self.toc = []
653 while True:
654 try:
655 self.toc.append(TocItem(f.next().strip(),
656 f.next().strip(),
657 f.next().strip()))
658 except StopIteration:
659 break
660 f.close()
661 self.notify_watcher()
663 def load_book(self, tidy=True):
664 """Fetch and parse the raw html of the book. If tidy is true
665 (default) links in the document will be made absolute."""
666 f = urlopen(self.book_url)
667 html = f.read()
668 f.close()
669 html = ('<html dir="%s"><head>\n<title>%s</title>\n'
670 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
671 '</head>\n<body>\n'
672 '%s\n'
673 '<div style="page-break-before: always; color:#fff;" class="unseen">'
674 'A FLOSSManuals book</div>\n</body></html>'
675 ) % (self.dir, self.book, html)
677 self.save_tempfile('raw.html', html)
679 tree = lxml.html.document_fromstring(html)
680 if tidy:
681 tree.make_links_absolute(self.book_url)
682 self.tree = tree
683 self.headings = [x for x in tree.cssselect('h1')]
684 if self.headings:
685 self.headings[0].set('class', "first-heading")
686 for h1 in self.headings:
687 h1.title = h1.text_content().strip()
688 self.notify_watcher()
690 def load(self):
691 """Wrapper around all necessary load methods."""
692 self.load_book()
693 self.load_toc()
695 def make_contents(self):
696 """Generate HTML containing the table of contents. This can
697 only be done after the main PDF has been made."""
698 header = '<h1>Table of Contents</h1><table class="toc">\n'
699 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
700 '<td class="pagenumber">%s</td></tr>\n')
701 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
702 footer = '\n</table>'
704 contents = []
706 chapter = 1
707 page_num = 1
708 subsections = [] # for the subsection heading pages.
710 outline_contents = iter(self.outline_contents)
711 headings = iter(self.headings)
713 for t in self.toc:
714 if t.is_chapter():
715 try:
716 h1 = headings.next()
717 except StopIteration:
718 log("heading not found for %s (previous h1 missing?). Stopping" % t)
719 break
720 h1_text, level, page_num = outline_contents.next()
721 log("%r %r" % (h1.title, h1_text))
722 contents.append(row_tmpl % (chapter, h1.title, page_num))
723 chapter += 1
724 elif t.is_section():
725 contents.append(section_tmpl % t.title)
726 else:
727 log("mystery TOC item: %s" % t)
729 doc = header + '\n'.join(contents) + footer
730 self.notify_watcher()
731 return doc
733 def add_section_titles(self):
734 """Add any section heading pages that the TOC.txt file
735 specifies. These are sub-book, super-chapter groupings.
737 Also add initial numbers to chapters.
739 headings = iter(self.headings)
740 chapter = 1
741 section = None
743 for t in self.toc:
744 if t.is_chapter() and section is not None:
745 try:
746 h1 = headings.next()
747 except StopIteration:
748 log("heading not found for %s (previous h1 missing?)" % t)
749 break
750 item = h1.makeelement('div', Class='chapter')
751 log(h1.title, debug='HTMLGEN')
752 item.text = h1.title
753 _add_initial_number(item, chapter)
755 section.append(item)
757 if not section_placed:
758 log("placing section", debug='HTMLGEN')
759 h1.addprevious(section)
760 section_placed = True
761 else:
762 log("NOT placing section", debug='HTMLGEN')
764 #put a bold number at the beginning of the h1.
765 _add_initial_number(h1, chapter)
766 chapter += 1
768 elif t.is_section():
769 section = self.tree.makeelement('div', Class="subsection")
770 # section Element complains when you try to ask it whether it
771 # has been placed (though it does know)
772 section_placed = False
773 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
774 heading.set("Class", "subsection-heading")
775 section.append(heading)
777 self.notify_watcher()
780 def add_css(self, css=None, mode='book'):
781 """If css looks like a url, use it as a stylesheet link.
782 Otherwise it is the CSS itself, which is saved to a temporary file
783 and linked to."""
784 log("css is %r" % css)
785 htmltree = self.tree
786 if css is None or not css.strip():
787 defaults = SERVER_DEFAULTS[self.server]
788 url = 'file://' + os.path.abspath(defaults['css-%s' % mode])
789 elif not re.match(r'^http://\S+$', css):
790 fn = self.save_tempfile('objavi.css', css)
791 url = 'file://' + fn
792 else:
793 url = css
794 #XXX for debugging and perhaps sensible anyway
795 #url = url.replace('file:///home/douglas/objavi2', '')
798 #find the head -- it's probably first child but lets not assume.
799 for child in htmltree:
800 if child.tag == 'head':
801 head = child
802 break
803 else:
804 head = htmltree.makeelement('head')
805 htmltree.insert(0, head)
807 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
808 self.css_url = url
809 self.notify_watcher()
810 return url
812 def set_title(self, title=None):
813 """If a string is supplied, it becomes the book's title.
814 Otherwise a guess is made."""
815 if title:
816 self.title = title
817 else:
818 titles = [x.text_content() for x in self.tree.cssselect('title')]
819 if titles and titles[0]:
820 self.title = titles[0]
821 else:
822 #oh well
823 self.title = 'A Manual About ' + self.book
824 return self.title
826 def _read_localised_template(self, template, fallbacks=['en']):
827 """Try to get the template in the approriate language, otherwise in english."""
828 for lang in [self.lang] + fallbacks:
829 try:
830 fn = template % (lang)
831 f = open(fn)
832 break
833 except IOError, e:
834 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
835 log(e)
836 template = f.read()
837 f.close()
838 return template
840 def compose_inside_cover(self):
841 """create the markup for the preamble inside cover."""
842 template = self._read_localised_template(config.INSIDE_FRONT_COVER_TEMPLATE)
844 if self.isbn:
845 isbn_text = '<b>ISBN :</b> %s <br>' % self.isbn
846 else:
847 isbn_text = ''
849 return template % {'date': time.strftime('%Y-%m-%d'),
850 'isbn': isbn_text,
851 'license': self.license,
855 def compose_end_matter(self):
856 """create the markup for the end_matter inside cover. If
857 self.isbn is not set, the html will result in a pdf that
858 spills onto two pages.
860 template = self._read_localised_template(config.END_MATTER_TEMPLATE)
862 d = {'css_url': self.css_url,
863 'title': self.title
866 if self.isbn:
867 d['inside_cover_style'] = ''
868 else:
869 d['inside_cover_style'] = 'page-break-after: always'
871 return template % d
876 def spawn_x(self):
877 """Start an Xvfb instance, using a new server number. A
878 reference to it is stored in self.xvfb, which is used to kill
879 it when the pdf is done.
881 Note that Xvfb doesn't interact well with dbus which is
882 present on modern desktops.
884 #Find an unused server number (in case two cgis are running at once)
885 while True:
886 servernum = random.randrange(50, 500)
887 if not os.path.exists('/tmp/.X%s-lock' % servernum):
888 break
890 self.xserver_no = ':%s' % servernum
892 authfile = self.filepath('Xauthority')
893 os.environ['XAUTHORITY'] = authfile
895 #mcookie(1) eats into /dev/random, so avoid that
896 from hashlib import md5
897 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
898 mcookie = m.hexdigest()
900 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
902 self.xvfb = Popen(['Xvfb', self.xserver_no,
903 '-screen', '0', '1024x768x24',
904 '-pixdepths', '32',
905 #'-blackpixel', '0',
906 #'-whitepixel', str(2 ** 24 -1),
907 #'+extension', 'Composite',
908 '-dpi', '96',
909 '-kb',
910 '-nolisten', 'tcp',
913 # We need to wait a bit before the Xvfb is ready. but the
914 # downloads are so slow that that probably doesn't matter
916 self.xvfb_ready_time = time.time() + 2
918 os.environ['DISPLAY'] = self.xserver_no
919 log(self.xserver_no)
921 def wait_for_xvfb(self):
922 """wait until a previously set time before continuing. This
923 is so Xvfb has time to properly start."""
924 if hasattr(self, 'xvfb'):
925 d = self.xvfb_ready_time - time.time()
926 if d > 0:
927 time.sleep(d)
928 self.notify_watcher()
930 def cleanup_x(self):
931 """Try very hard to kill off Xvfb. In addition to killing
932 this instance's xvfb, occasionally (randomly) search for
933 escaped Xvfb instances and kill those too."""
934 if not hasattr(self, 'xvfb'):
935 return
936 check_call(['xauth', 'remove', self.xserver_no])
937 p = self.xvfb
938 log("trying to kill Xvfb %s" % p.pid)
939 os.kill(p.pid, 15)
940 for i in range(10):
941 if p.poll() is not None:
942 log("%s died with %s" % (p.pid, p.poll()))
943 break
944 log("%s not dead yet" % p.pid)
945 time.sleep(0.2)
946 else:
947 log("Xvfb would not die! kill -9! kill -9!")
948 os.kill(p.pid, 9)
950 if random.random() < 0.05:
951 #kill old xvfbs occasionally, if there are any.
952 self.kill_old_xvfbs()
954 def kill_old_xvfbs(self):
955 """Sometimes, despite everything, Xvfb instances hang around
956 well after they are wanted -- for example if the cgi process
957 dies particularly badly. So kill them if they have been
958 running for a long time."""
959 log("running kill_old_xvfbs")
960 p = Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
961 data = p.communicate()[0].strip()
962 if data:
963 lines = data.split('\n')
964 for line in lines:
965 log('dealing with ps output "%s"' % line)
966 try:
967 pid, days_, hours, minutes, seconds = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
968 except AttributeError:
969 log("Couldn't parse that line!")
970 # 50 minutes should be enough xvfb time for anyone
971 if days or hours or int(minutes) > 50:
972 log("going to kill pid %s" % pid)
973 os.kill(int(pid), 15)
974 time.sleep(0.5)
975 os.kill(int(pid), 9)
976 self.notify_watcher()
978 def cleanup(self):
979 self.cleanup_x()
980 if not config.KEEP_TEMP_FILES:
981 for fn in os.listdir(self.workdir):
982 os.remove(os.path.join(self.workdir, fn))
983 os.rmdir(self.workdir)
984 else:
985 log("NOT removing '%s', containing the following files:" % self.workdir)
986 log(*os.listdir(self.workdir))
988 self.notify_watcher()