routine to make pages containing ISBN barcodes
[objavi2.git] / fmbook.py
blobf8e1c2e18cc721cee5d4fc765f0a01acde7492a3
1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
3 # their interactions.
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
22 PDF"""
24 import os, sys
25 import tempfile
26 import re, time
27 import random
28 from urllib2 import urlopen
29 from subprocess import Popen, check_call, PIPE
31 import lxml.etree, lxml.html
32 import lxml, lxml.html, lxml.etree
34 import config
35 from config import SERVER_DEFAULTS, DEFAULT_SERVER, POINT_2_MM, PDFEDIT_MAX_PAGES
37 TMPDIR = os.path.abspath(config.TMPDIR)
38 DOC_ROOT = os.environ.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH = "%s/books/" % DOC_ROOT
42 def log(*messages, **kwargs):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs or config.DEBUG_ALL or kwargs['debug'] in config.DEBUG_MODES:
47 for m in messages:
48 try:
49 print >> sys.stderr, m
50 except Exception:
51 print >> sys.stderr, repr(m)
53 def _add_initial_number(e, n):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial = e.makeelement("strong", Class="initial")
56 e.insert(0, initial)
57 initial.tail = ' '
58 if e.text is not None:
59 initial.tail += e.text
60 e.text = ''
61 initial.text = "%s." % n
63 def _add_chapter_cookie(e):
64 """add magic hidden text to help with contents generation"""
65 cookie = e.makeelement("span", Class="heading-cookie", dir="ltr",
66 style="font-size:6pt; line-height: 6pt; color: #fff; width:0;"
67 " float:left; margin:-2em; z-index: -67; display: block;"
69 cookie.text = ''.join(random.choice(config.CHAPTER_COOKIE_CHARS) for x in range(8))
70 e.cookie = cookie.text
71 e.addnext(cookie)
72 #e.append(cookie)
75 class TocItem(object):
76 """This makes sense of the tuples from TOC.txt files"""
77 def __init__(self, status, chapter, title):
78 # status is
79 # 0 - section heading with no chapter
80 # 1 - chapter heading
81 # 2 - book title
83 # chapter is twiki name of the chapter
84 # title is a human readable name of the chapter.
85 self.status = status
86 self.chapter = chapter
87 self.title = title
89 def is_chapter(self):
90 return self.status == '1'
92 def is_section(self):
93 return self.status == '0'
95 def __str__(self):
96 return '<toc: %s>' % ', '.join('%s: %s' % x for x in self.__dict__.iteritems())
99 def run(cmd):
100 try:
101 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
102 out, err = p.communicate()
103 except Exception:
104 log("Failed on command: %r" % cmd)
105 raise
106 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
107 (' '.join(cmd), cmd[0], p.poll(), out, err))
110 def find_containing_paper(w, h):
111 size = None
112 for name, pw, ph in config.PAPER_SIZES:
113 if pw >= w and ph >= h:
114 mw = (pw - w) * 0.5
115 mh = (ph - h) * 0.5
116 return (name, mw, mh)
118 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
119 (w * POINT_2_MM, h * POINT_2_MM))
123 class PageSettings(object):
124 """Calculates and wraps commands for the generation and processing
125 of PDFs"""
126 def __init__(self, pointsize, **kwargs):
127 # the formulas for default gutters, margins and column margins
128 # are quite ad-hoc and certainly improvable.
130 self.width, self.height = pointsize
131 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
133 self.gutter = kwargs.get('gutter', (config.BASE_GUTTER +
134 config.PROPORTIONAL_GUTTER * self.width))
136 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
137 self.top_margin = kwargs.get('top_margin', default_margin)
138 self.side_margin = kwargs.get('top_margin', default_margin)
139 self.bottom_margin = kwargs.get('top_margin', default_margin)
140 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
141 self.columns = kwargs.get('columns', 1)
143 self.column_margin = kwargs.get('column_margin', default_margin * 2 / (4.0 + self.columns))
145 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
146 self.number_margin = self.side_margin
148 # calculate margins in mm for browsers
149 self.margins = []
150 for m, clip in ((self.top_margin, clipy),
151 (self.side_margin, clipx + 0.5 * self.gutter),
152 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
153 (self.side_margin, clipx + 0.5 * self.gutter),
155 if m is None:
156 m = default_margin
157 self.margins.append((m + clip) * POINT_2_MM)
159 for x in locals().iteritems():
160 log("%s: %s" % x, debug='PDFGEN')
161 for x in dir(self):
162 log("%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
166 def _webkit_command(self, html, pdf):
167 m = [str(x) for x in self.margins]
168 cmd = [config.WKHTMLTOPDF, '-q', '-s', self.papersize,
169 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
170 ] + config.WKHTMLTOPDF_EXTRA_COMMANDS + [
171 html, pdf]
172 log(' '.join(cmd))
173 return cmd
175 def _gecko_command(self, html, pdf):
176 m = [str(x) for x in self.margins]
177 #firefox -P pdfprint -print URL -printprinter "printer_settings"
178 cmd = [FIREFOX, '-P', 'pdfprint', '-print',
179 html, '-printprinter', self.moz_printer]
180 log(' '.join(cmd))
181 return cmd
183 def make_raw_pdf(self, html, pdf, engine='webkit'):
184 func = getattr(self, '_%s_command' % engine)
185 if self.columns == 1:
186 cmd = func(html, pdf)
187 run(cmd)
188 else:
189 printable_width = self.width - 2.0 * self.side_margin - self.gutter
190 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
191 page_width = column_width + self.column_margin
193 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
194 gutter=0, top_margin=self.top_margin,
195 side_margin=self.column_margin * 0.5,
196 bottom_margin=self.bottom_margin)
198 column_pdf = pdf[:-4] + '-single-column.pdf'
199 columnmaker.make_raw_pdf(html, column_pdf, engine=engine)
200 columnmaker.reshape_pdf(column_pdf)
202 cmd = ['pdfnup',
203 '--nup', '%sx1' % int(self.columns),
204 '--paper', self.papersize.lower() + 'paper',
205 '--outfile', pdf,
206 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
207 '--noautoscale', 'true',
208 '--orient', 'portrait',
209 #'--tidy', 'false',
210 column_pdf
212 run(cmd)
216 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
217 even_pages=True):
218 """Spin the pdf for RTL text, resize it to the right size, and
219 shift the gutter left and right"""
220 ops = 'resize'
221 if self.gutter:
222 ops += ',shift'
223 if even_pages:
224 ops += ',even_pages'
225 gutter = self.gutter
226 if dir == 'RTL':
227 gutter = -gutter
228 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
229 'dir=%s' % dir,
230 'filename=%s' % pdf,
231 'output_filename=%s' % pdf,
232 'operation=%s' % ops,
233 'width=%s' % self.width,
234 'height=%s' % self.height,
235 'offset=%s' % gutter,
236 'centre_start=%s' % centre_start,
237 'centre_end=%s' % centre_end,
239 run(cmd)
241 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
242 number_start=1):
243 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
244 'operation=page_numbers',
245 'dir=%s' % dir,
246 'filename=%s' % pdf,
247 'output_filename=%s' % pdf,
248 'number_start=%s' % number_start,
249 'number_style=%s' % numbers,
250 'number_bottom=%s' % self.number_bottom,
251 'number_margin=%s' % self.number_margin,
253 run(cmd)
255 def number_pdf(self, pdf, pages, **kwargs):
256 # if there are too many pages for pdfedit to handle in one go,
257 # split the job into bits. <pages> may not be exact
258 if pages is None or pages <= PDFEDIT_MAX_PAGES:
259 self._number_pdf(pdf, **kwargs)
260 else:
261 # section_size must be even
262 sections = pages // PDFEDIT_MAX_PAGES + 1
263 section_size = (pages // sections + 2) & ~1
265 pdf_sections = []
266 s = kwargs.pop('number_start', 1)
267 while s < pages:
268 e = s + section_size - 1
269 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
270 if e < pages - 1:
271 page_range = '%s-%s' % (s, e)
272 else:
273 page_range = '%s-end' % s
274 run(['pdftk',
275 pdf,
276 'cat',
277 page_range,
278 'output',
279 pdf_section,
281 self._number_pdf(pdf_section, number_start=s, **kwargs)
282 pdf_sections.append(pdf_section)
283 s = e + 1
285 concat_pdfs(pdf, *pdf_sections)
287 def make_barcode_pdf(self, isbn, pdf, corner='br'):
288 """Put ann ISBN barcode in a corner of a single blank page."""
290 position = '%s,%s,%s,%s,%s' %(corner, self.width, self.height, self.side_margin, self.bottom_margin)
291 cmd1 = [config.BOOKLAND,
292 '--position', position,
293 str(isbn)]
294 cmd2 = ['ps2pdf',
295 '-dFIXEDMEDIA',
296 '-dDEVICEWIDTHPOINTS=%s' % self.width,
297 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
298 '-', pdf]
300 p1 = Popen(cmd1, stdout=PIPE)
301 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
302 out, err = p2.communicate()
304 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
305 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
310 def concat_pdfs(name, *args):
311 """Join all the named pdfs together into one and save it as <name>"""
312 cmd = ['pdftk']
313 cmd.extend(args)
314 cmd += ['cat', 'output', name]
315 run(cmd)
317 def index_pdf(pdf, text=None):
318 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
319 separate pages."""
320 if text is None:
321 text = pdf + '.index.txt'
322 cmd = ['pdftotext',
323 #'-layout', #keeps more original formatting
324 pdf,
325 text]
326 run(cmd)
327 return text
329 def rotate_pdf(pdfin, pdfout):
330 """Turn the PDF on its head"""
331 cmd = ['pdftk', pdfin,
332 'cat',
333 '1-endD',
334 'output',
335 pdfout
337 run(cmd)
340 class Book(object):
341 page_numbers = 'latin'
342 preamble_page_numbers = 'roman'
343 engine= 'webkit'
344 _try_cleanup_on_del = True
346 def notify_watcher(self, message=None):
347 if self.watcher:
348 if message is None:
349 #message is the name of the caller
350 #XXX look at using inspect module
351 import traceback
352 message = traceback.extract_stack(None, 2)[0][2]
353 log("notify_watcher called by '%s'" % message)
354 self.watcher(message)
356 def __enter__(self):
357 return self
359 def __exit__(self, exc_type, exc_value, traceback):
360 self.cleanup()
361 #could deal with exceptions here and return true
363 def __init__(self, book, server, bookname,
364 page_settings=None, engine=None, watcher=None):
365 log("*** Starting new book %s ***" % bookname)
366 self.book = book
367 self.server = server
368 self.watcher = watcher
369 self.workdir = tempfile.mkdtemp(prefix=bookname, dir=TMPDIR)
370 os.chmod(self.workdir, 0755)
371 defaults = SERVER_DEFAULTS.get(server, SERVER_DEFAULTS[DEFAULT_SERVER])
372 self.default_css = defaults['css']
373 self.lang = defaults['lang']
374 self.dir = defaults['dir']
376 self.body_html_file = self.filepath('body.html')
377 self.body_pdf_file = self.filepath('body.pdf')
378 self.body_index_file = self.filepath('body.txt')
379 self.preamble_html_file = self.filepath('preamble.html')
380 self.preamble_pdf_file = self.filepath('preamble.pdf')
381 self.pdf_file = self.filepath('final.pdf')
383 self.publish_name = bookname
384 self.publish_file = os.path.join(PUBLISH_PATH, self.publish_name)
385 self.publish_url = os.path.join(config.PUBLISH_URL, self.publish_name)
387 self.book_url = config.BOOK_URL % (self.server, self.book)
388 self.toc_url = config.TOC_URL % (self.server, self.book)
390 self.set_page_dimensions(page_settings)
392 if engine is not None:
393 self.engine = engine
394 self.notify_watcher()
396 def __del__(self):
397 if os.path.exists(self.workdir) and self._try_cleanup_on_del:
398 self._try_cleanup_on_del = False #or else you can get in bad cycles
399 self.cleanup()
401 def __getattr__(self, attr):
402 """catch unloaded books and load them"""
403 #log('looking for missing attribute "%s"' % (attr))
404 if attr == 'tree':
405 self.load_book()
406 return self.tree
407 if attr == 'toc':
408 self.load_toc()
409 return self.toc
410 raise AttributeError("no such member: '%s'" % attr)
413 def filepath(self, fn):
414 return os.path.join(self.workdir, fn)
416 def save_data(self, fn, data):
417 """Save without tripping up on unicode"""
418 if isinstance(data, unicode):
419 data = data.encode('utf8', 'ignore')
420 f = open(fn, 'w')
421 f.write(data)
422 f.close()
424 def save_tempfile(self, fn, data):
425 """Save the data in a temporary directory that will be cleaned
426 up when all is done. Return the absolute file path."""
427 fn = self.filepath(fn)
428 self.save_data(fn, data)
429 return fn
431 def set_page_dimensions(self, dimensions):
432 self.maker = PageSettings(**dimensions)
435 def extract_pdf_text(self):
436 """Extract the text from the body pdf, split into pages, so
437 that the correct page can be found to generate the table of
438 contents."""
439 index_pdf(self.body_pdf_file, self.body_index_file)
440 f = open(self.body_index_file)
441 s = unicode(f.read(), 'utf8')
442 f.close()
443 #pages are spearated by formfeed character "^L", "\f" or chr(12)
444 self.text_pages = s.split("\f")
445 #there is sometimes (probably always) an unwanted ^L at the end
446 return len(self.text_pages)
448 def make_body_pdf(self):
449 """Make a pdf of the HTML, using webkit"""
450 #1. Save the html
451 html_text = lxml.etree.tostring(self.tree, method="html")
452 self.save_data(self.body_html_file, html_text)
454 #2. Make a pdf of it
455 self.maker.make_raw_pdf(self.body_html_file, self.body_pdf_file,
456 engine=self.engine)
457 self.notify_watcher('generate_pdf')
459 #3. extract the text for finding contents.
460 n_pages = self.extract_pdf_text()
461 log ("found %s pages in pdf" % n_pages)
462 #4. resize pages, shift gutters, and rotate 180 degrees for RTL
463 self.maker.reshape_pdf(self.body_pdf_file, self.dir, centre_end=True)
464 self.notify_watcher('reshape_pdf')
466 #5 add page numbers
467 self.maker.number_pdf(self.body_pdf_file, n_pages, dir=self.dir,
468 numbers=self.page_numbers)
469 self.notify_watcher("number_pdf")
470 self.notify_watcher()
472 def make_preamble_pdf(self):
473 contents = self.make_contents()
474 html = ('<html dir="%s"><head>\n'
475 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
476 '<link rel="stylesheet" href="%s" />\n'
477 '</head>\n<body>\n'
478 '<h1 class="frontpage">%s</h1>'
479 '%s\n'
480 '<div class="contents">%s</div>\n'
481 '<div style="page-break-after: always; color:#fff" class="unseen">.'
482 '<!--%s--></div></body></html>'
483 ) % (self.dir, self.css_url, self.title, self.inside_cover_html,
484 contents, self.title)
485 self.save_data(self.preamble_html_file, html)
487 self.maker.make_raw_pdf(self.preamble_html_file, self.preamble_pdf_file,
488 engine=self.engine)
490 self.maker.reshape_pdf(self.preamble_pdf_file, self.dir, centre_start=True)
492 self.maker.number_pdf(self.preamble_pdf_file, None, dir=self.dir,
493 numbers=self.preamble_page_numbers,
494 number_start=-2)
496 self.notify_watcher()
498 def make_pdf(self):
499 """A convenient wrapper of a few necessary steps"""
500 # now the Xvfb server is needed. make sure it has had long enough to get going
501 self.wait_for_xvfb()
502 self.make_body_pdf()
503 self.make_preamble_pdf()
504 concat_pdfs(self.pdf_file, self.preamble_pdf_file, self.body_pdf_file)
505 self.notify_watcher('concatenated_pdfs')
506 #and move it into place (what place?)
508 def rotate180(self):
509 """Rotate the pdf 180 degrees so an RTL book can print on LTR
510 presses."""
511 rotated = self.filepath('final-rotate.pdf')
512 unrotated = self.filepath('final-pre-rotate.pdf')
513 #leave the unrotated pdf intact at first, in case of error.
514 rotate_pdf(self.pdf_file, rotated)
515 os.rename(self.pdf_file, unrotated)
516 os.rename(rotated, self.pdf_file)
517 self.notify_watcher()
519 def publish_pdf(self):
520 """Move the finished PDF to its final resting place"""
521 log("Publishing %r as %r" % (self.pdf_file, self.publish_file))
522 os.rename(self.pdf_file, self.publish_file)
523 self.notify_watcher()
525 def load_toc(self):
526 """From the TOC.txt file create a list of TocItems with
527 the attributes <status>, <chapter>, and <title>.
529 <status> is a number, with the following meaning:
531 0 - section heading with no chapter
532 1 - chapter heading
533 2 - book title
535 The TocItem object has convenience functions <is_chapter> and
536 <is_section>.
538 <chapter> is twiki name of the chapter.
540 <title> is a human readable title for the chapter. It is likely to
541 differ from the title given in the chapter's <h1> heading.
543 f = urlopen(self.toc_url)
544 self.toc = []
545 while True:
546 try:
547 self.toc.append(TocItem(f.next().strip(),
548 f.next().strip(),
549 f.next().strip()))
550 except StopIteration:
551 break
552 f.close()
553 self.notify_watcher()
555 def load_book(self, tidy=True):
556 """Fetch and parse the raw html of the book. If tidy is true
557 (default) links in the document will be made absolute."""
558 f = urlopen(self.book_url)
559 html = f.read()
560 f.close()
561 html = ('<html dir="%s"><head>\n<title>%s</title>\n'
562 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
563 '</head>\n<body>\n'
564 '%s\n'
565 '<div style="page-break-before: always; color:#fff;" class="unseen">'
566 'A FLOSSManuals book</div>\n</body></html>'
567 ) % (self.dir, self.book, html)
569 self.save_tempfile('raw.html', html)
571 tree = lxml.html.document_fromstring(html)
572 if tidy:
573 tree.make_links_absolute(self.book_url)
574 self.tree = tree
575 self.headings = [x for x in tree.cssselect('h1')]
576 if self.headings:
577 self.headings[0].set('class', "first-heading")
578 #self.heading_texts = [x.textcontent() for x in self.headings]
579 for h1 in self.headings:
580 h1.title = h1.text_content().strip()
581 self.notify_watcher()
584 def load(self):
585 """Wrapper around all necessary load methods."""
586 self.load_book()
587 self.load_toc()
589 def find_page(self, element, start_page=1):
590 """Search through a page iterator and return the page
591 number which the element probably occurs."""
592 text = element.cookie
593 for i, content in enumerate(self.text_pages[start_page - 1:]):
594 log("looking for '%s' in page %s below:\n%s[...]" %
595 (text, i + start_page, content[:160]), debug='INDEX')
596 #remove spaces: they can appear spuriously
597 content = ''.join(content.split())
598 if text in content:
599 return i + start_page, True
600 #If it isn't found, return the start page so the next chapter has a chance
601 return start_page, False
603 def make_contents(self):
604 """Generate HTML containing the table of contents. This can
605 only be done after the main PDF has been made."""
606 header = '<h1>Table of Contents</h1><table class="toc">\n'
607 row_tmpl = ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
608 '<td class="pagenumber">%s</td></tr>\n')
609 section_tmpl = ('<tr><td class="section" colspan="3">%s</td></tr>\n')
610 footer = '\n</table>'
612 contents = []
614 chapter = 1
615 page_num = 1
616 subsections = [] # for the subsection heading pages.
618 headings = iter(self.headings)
620 for t in self.toc:
621 if t.is_chapter():
622 try:
623 h1 = headings.next()
624 except StopIteration:
625 log("heading not found for %s (previous h1 missing?). Stopping" % t)
626 break
627 page_num, found = self.find_page(h1, page_num)
628 # sometimes the heading isn't found, which is shown as a frown
629 if found:
630 contents.append(row_tmpl % (chapter, h1.title, page_num))
631 else:
632 contents.append(row_tmpl % (chapter, h1.title, ':-('))
633 chapter += 1
634 elif t.is_section():
635 contents.append(section_tmpl % t.title)
636 else:
637 log("mystery TOC item: %s" % t)
639 doc = header + '\n'.join(contents) + footer
640 self.notify_watcher()
641 return doc
643 def add_section_titles(self):
644 """Add any section heading pages that the TOC.txt file
645 specifies. These are sub-book, super-chapter groupings.
647 Also add initial numbers to chapters.
649 log(self.headings)
650 headings = iter(self.headings)
651 chapter = 1
652 section = None
654 for t in self.toc:
655 if t.is_chapter() and section is not None:
656 try:
657 h1 = headings.next()
658 except StopIteration:
659 log("heading not found for %s (previous h1 missing?)" % t)
660 break
661 item = h1.makeelement('div', Class='chapter')
662 log(h1.title, debug='HTMLGEN')
663 item.text = h1.title
664 _add_initial_number(item, chapter)
666 section.append(item)
668 if not section_placed:
669 log("placing section", debug='HTMLGEN')
670 h1.addprevious(section)
671 section_placed = True
672 else:
673 log("NOT placing section", debug='HTMLGEN')
675 #put a bold number at the beginning of the h1, and a hidden cookie at the end.
676 _add_initial_number(h1, chapter)
677 _add_chapter_cookie(h1)
678 chapter += 1
680 elif t.is_section():
681 section = self.tree.makeelement('div', Class="subsection")
682 # section Element complains when you try to ask it whether it
683 # has been placed (though it does know)
684 section_placed = False
685 heading = lxml.html.fragment_fromstring(t.title, create_parent='div')
686 heading.set("Class", "subsection-heading")
687 section.append(heading)
689 self.notify_watcher()
692 def add_css(self, css=None):
693 """If css looks like a url, use it as a stylesheet link.
694 Otherwise it is the CSS itself, which is saved to a temporary file
695 and linked to."""
696 log("css is %r" % css)
697 htmltree = self.tree
698 if css is None or not css.strip():
699 url = 'file://' + os.path.abspath(self.default_css)
700 elif not re.match(r'^http://\S+$', css):
701 fn = self.save_tempfile('objavi.css', css)
702 url = 'file://' + fn
703 else:
704 url = css
705 #XXX for debugging and perhaps sensible anyway
706 #url = url.replace('file:///home/douglas/objavi2', '')
709 #find the head -- it's probably first child but lets not assume.
710 for child in htmltree:
711 if child.tag == 'head':
712 head = child
713 break
714 else:
715 head = htmltree.makeelement('head')
716 htmltree.insert(0, head)
718 link = lxml.etree.SubElement(head, 'link', rel='stylesheet', type='text/css', href=url)
719 self.css_url = url
720 self.notify_watcher()
721 return url
723 def set_title(self, title=None):
724 """If a string is supplied, it becomes the book's title.
725 Otherwise a guess is made."""
726 if title:
727 self.title = title
728 else:
729 titles = [x.text_content() for x in self.tree.cssselect('title')]
730 if titles and titles[0]:
731 self.title = titles[0]
732 else:
733 #oh well
734 self.title = 'A Manual About ' + self.book
735 return self.title
737 def compose_inside_cover(self, license=config.DEFAULT_LICENSE, isbn=None):
738 """create the markup for the preamble inside cover, storing it
739 in self.inside_cover_html."""
740 #XXX this should go in make_preamble_pdf, but that needs to be extracted from make_pdf
742 if isbn:
743 isbn_text = '<b>ISBN :</b> %s <br>' % isbn
744 #XXX make a barcode
745 else:
746 isbn_text = ''
748 for lang in (self.lang, 'en'):
749 try:
750 fn = INSIDE_FRONT_COVER_TEMPLATE % (lang)
751 f = open(fn)
752 except IOError, e:
753 log("couldn't open inside front cover for lang %s (filename %s)" % (lang, fn))
754 log(e)
756 template = f.read()
757 f.close()
759 self.inside_cover_html = template % {'date': time.strftime('%Y-%m-%d'),
760 'isbn': isbn_text,
761 'license': license,
765 def spawn_x(self):
766 """Start an Xvfb instance, using a new server number. A
767 reference to it is stored in self.xvfb, which is used to kill
768 it when the pdf is done.
770 Note that Xvfb doesn't interact well with dbus which is
771 present on modern desktops.
773 #Find an unused server number (in case two cgis are running at once)
774 while True:
775 servernum = random.randrange(50, 500)
776 if not os.path.exists('/tmp/.X%s-lock' % servernum):
777 break
779 self.xserver_no = ':%s' % servernum
781 authfile = self.filepath('Xauthority')
782 os.environ['XAUTHORITY'] = authfile
784 #mcookie(1) eats into /dev/random, so avoid that
785 from hashlib import md5
786 m = md5("%r %r %r %r %r" % (self, os.environ, os.getpid(), time.time(), os.urandom(32)))
787 mcookie = m.hexdigest()
789 check_call(['xauth', 'add', self.xserver_no, '.', mcookie])
791 self.xvfb = Popen(['Xvfb', self.xserver_no,
792 '-screen', '0', '1024x768x24',
793 '-pixdepths', '32',
794 #'-blackpixel', '0',
795 #'-whitepixel', str(2 ** 24 -1),
796 #'+extension', 'Composite',
797 '-dpi', '96',
798 '-kb',
799 '-nolisten', 'tcp',
802 # We need to wait a bit before the Xvfb is ready. but the
803 # downloads are so slow that that probably doesn't matter
805 self.xvfb_ready_time = time.time() + 2
807 os.environ['DISPLAY'] = self.xserver_no
808 log(self.xserver_no)
810 def wait_for_xvfb(self):
811 """wait until a previously set time before continuing. This
812 is so Xvfb has time to properly start."""
813 if hasattr(self, 'xvfb'):
814 d = self.xvfb_ready_time - time.time()
815 if d > 0:
816 time.sleep(d)
817 self.notify_watcher()
819 def cleanup_x(self):
820 """Try very hard to kill off Xvfb. In addition to killing
821 this instance's xvfb, occasionally (randomly) search for
822 escaped Xvfb instances and kill those too."""
823 if not hasattr(self, 'xvfb'):
824 return
825 check_call(['xauth', 'remove', self.xserver_no])
826 p = self.xvfb
827 log("trying to kill Xvfb %s" % p.pid)
828 os.kill(p.pid, 15)
829 for i in range(10):
830 if p.poll() is not None:
831 log("%s died with %s" % (p.pid, p.poll()))
832 break
833 log("%s not dead yet" % p.pid)
834 time.sleep(0.2)
835 else:
836 log("Xvfb would not die! kill -9! kill -9!")
837 os.kill(p.pid, 9)
839 if random.random() < 0.05:
840 #kill old xvfbs occasionally, if there are any.
841 self.kill_old_xvfbs()
843 def kill_old_xvfbs(self):
844 """Sometimes, despite everything, Xvfb instances hang around
845 well after they are wanted -- for example if the cgi process
846 dies particularly badly. So kill them if they have been
847 running for a long time."""
848 log("running kill_old_xvfbs")
849 p = Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout=PIPE)
850 data = p.communicate()[0].strip()
851 if data:
852 lines = data.split('\n')
853 for line in lines:
854 log('dealing with ps output "%s"' % line)
855 try:
856 pid, days_, hours, minutes, seconds = re.match(r'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
857 except AttributeError:
858 log("Couldn't parse that line!")
859 # 50 minutes should be enough xvfb time for anyone
860 if days or hours or int(minutes) > 50:
861 log("going to kill pid %s" % pid)
862 os.kill(int(pid), 15)
863 time.sleep(0.5)
864 os.kill(int(pid), 9)
865 self.notify_watcher()
867 def cleanup(self):
868 self.cleanup_x()
869 if not config.KEEP_TEMP_FILES:
870 for fn in os.listdir(self.workdir):
871 os.remove(os.path.join(self.workdir, fn))
872 os.rmdir(self.workdir)
873 else:
874 log("NOT removing '%s', containing the following files:" % self.workdir)
875 log(*os.listdir(self.workdir))
877 self.notify_watcher()