1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
28 from urllib2
import urlopen
29 from subprocess
import Popen
, check_call
, PIPE
31 import lxml
.etree
, lxml
.html
32 import lxml
, lxml
.html
, lxml
.etree
35 from config
import SERVER_DEFAULTS
, DEFAULT_SERVER
, POINT_2_MM
, PDFEDIT_MAX_PAGES
37 TMPDIR
= os
.path
.abspath(config
.TMPDIR
)
38 DOC_ROOT
= os
.environ
.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH
= "%s/books/" % DOC_ROOT
42 def log(*messages
, **kwargs
):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs
or config
.DEBUG_ALL
or kwargs
['debug'] in config
.DEBUG_MODES
:
49 print >> sys
.stderr
, m
51 print >> sys
.stderr
, repr(m
)
53 def _add_initial_number(e
, n
):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial
= e
.makeelement("strong", Class
="initial")
58 if e
.text
is not None:
59 initial
.tail
+= e
.text
61 initial
.text
= "%s." % n
63 def _add_chapter_cookie(e
):
64 """add magic hidden text to help with contents generation"""
65 cookie
= e
.makeelement("span", Class
="heading-cookie", dir="ltr",
66 style
="font-size:6pt; line-height: 6pt; color: #fff; width:0;"
67 " float:left; margin:-2em; z-index: -67; display: block;"
69 cookie
.text
= ''.join(random
.choice(config
.CHAPTER_COOKIE_CHARS
) for x
in range(8))
70 e
.cookie
= cookie
.text
75 class TocItem(object):
76 """This makes sense of the tuples from TOC.txt files"""
77 def __init__(self
, status
, chapter
, title
):
79 # 0 - section heading with no chapter
83 # chapter is twiki name of the chapter
84 # title is a human readable name of the chapter.
86 self
.chapter
= chapter
90 return self
.status
== '1'
93 return self
.status
== '0'
96 return '<toc: %s>' % ', '.join('%s: %s' % x
for x
in self
.__dict
__.iteritems())
101 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
102 out
, err
= p
.communicate()
104 log("Failed on command: %r" % cmd
)
106 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
107 (' '.join(cmd
), cmd
[0], p
.poll(), out
, err
))
110 def find_containing_paper(w
, h
):
112 for name
, pw
, ph
in config
.PAPER_SIZES
:
113 if pw
>= w
and ph
>= h
:
116 return (name
, mw
, mh
)
118 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
119 (w
* POINT_2_MM
, h
* POINT_2_MM
))
123 class PageSettings(object):
124 """Calculates and wraps commands for the generation and processing
126 def __init__(self
, pointsize
, **kwargs
):
127 # the formulas for default gutters, margins and column margins
128 # are quite ad-hoc and certainly improvable.
130 self
.width
, self
.height
= pointsize
131 self
.papersize
, clipx
, clipy
= find_containing_paper(self
.width
, self
.height
)
133 self
.gutter
= kwargs
.get('gutter', (config
.BASE_GUTTER
+
134 config
.PROPORTIONAL_GUTTER
* self
.width
))
136 default_margin
= (config
.BASE_MARGIN
+ config
.PROPORTIONAL_MARGIN
* min(pointsize
))
137 self
.top_margin
= kwargs
.get('top_margin', default_margin
)
138 self
.side_margin
= kwargs
.get('top_margin', default_margin
)
139 self
.bottom_margin
= kwargs
.get('top_margin', default_margin
)
140 self
.moz_printer
= kwargs
.get('moz_printer', ('objavi_' + self
.papersize
))
141 self
.columns
= kwargs
.get('columns', 1)
143 self
.column_margin
= kwargs
.get('column_margin', default_margin
* 2 / (4.0 + self
.columns
))
145 self
.number_bottom
= self
.bottom_margin
- 0.6 * config
.PAGE_NUMBER_SIZE
146 self
.number_margin
= self
.side_margin
148 # calculate margins in mm for browsers
150 for m
, clip
in ((self
.top_margin
, clipy
),
151 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
152 (self
.bottom_margin
, clipy
+ 0.5 * config
.PAGE_NUMBER_SIZE
),
153 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
157 self
.margins
.append((m
+ clip
) * POINT_2_MM
)
159 for x
in locals().iteritems():
160 log("%s: %s" % x
, debug
='PDFGEN')
162 log("%s: %s" % (x
, getattr(self
, x
)), debug
='PDFGEN')
166 def _webkit_command(self
, html
, pdf
):
167 m
= [str(x
) for x
in self
.margins
]
168 cmd
= [config
.WKHTMLTOPDF
, '-q', '-s', self
.papersize
,
169 '-T', m
[0], '-R', m
[1], '-B', m
[2], '-L', m
[3],
170 ] + config
.WKHTMLTOPDF_EXTRA_COMMANDS
+ [
175 def _gecko_command(self
, html
, pdf
):
176 m
= [str(x
) for x
in self
.margins
]
177 #firefox -P pdfprint -print URL -printprinter "printer_settings"
178 cmd
= [FIREFOX
, '-P', 'pdfprint', '-print',
179 html
, '-printprinter', self
.moz_printer
]
183 def make_raw_pdf(self
, html
, pdf
, engine
='webkit'):
184 func
= getattr(self
, '_%s_command' % engine
)
185 if self
.columns
== 1:
186 cmd
= func(html
, pdf
)
189 printable_width
= self
.width
- 2.0 * self
.side_margin
- self
.gutter
190 column_width
= (printable_width
- (self
.columns
- 1) * self
.column_margin
) / self
.columns
191 page_width
= column_width
+ self
.column_margin
193 columnmaker
= PageSettings((page_width
, self
.height
), moz_printer
=self
.moz_printer
,
194 gutter
=0, top_margin
=self
.top_margin
,
195 side_margin
=self
.column_margin
* 0.5,
196 bottom_margin
=self
.bottom_margin
)
198 column_pdf
= pdf
[:-4] + '-single-column.pdf'
199 columnmaker
.make_raw_pdf(html
, column_pdf
, engine
=engine
)
200 columnmaker
.reshape_pdf(column_pdf
)
203 '--nup', '%sx1' % int(self
.columns
),
204 '--paper', self
.papersize
.lower() + 'paper',
206 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
207 '--noautoscale', 'true',
208 '--orient', 'portrait',
216 def reshape_pdf(self
, pdf
, dir='LTR', centre_start
=False, centre_end
=False,
218 """Spin the pdf for RTL text, resize it to the right size, and
219 shift the gutter left and right"""
228 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
231 'output_filename=%s' % pdf
,
232 'operation=%s' % ops
,
233 'width=%s' % self
.width
,
234 'height=%s' % self
.height
,
235 'offset=%s' % gutter
,
236 'centre_start=%s' % centre_start
,
237 'centre_end=%s' % centre_end
,
241 def _number_pdf(self
, pdf
, numbers
='latin', dir='LTR',
243 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
244 'operation=page_numbers',
247 'output_filename=%s' % pdf
,
248 'number_start=%s' % number_start
,
249 'number_style=%s' % numbers
,
250 'number_bottom=%s' % self
.number_bottom
,
251 'number_margin=%s' % self
.number_margin
,
255 def number_pdf(self
, pdf
, pages
, **kwargs
):
256 # if there are too many pages for pdfedit to handle in one go,
257 # split the job into bits. <pages> may not be exact
258 if pages
is None or pages
<= PDFEDIT_MAX_PAGES
:
259 self
._number
_pdf
(pdf
, **kwargs
)
261 # section_size must be even
262 sections
= pages
// PDFEDIT_MAX_PAGES
+ 1
263 section_size
= (pages
// sections
+ 2) & ~
1
266 s
= kwargs
.pop('number_start', 1)
268 e
= s
+ section_size
- 1
269 pdf_section
= '%s-%s-%s.pdf' % (pdf
[:-4], s
, e
)
271 page_range
= '%s-%s' % (s
, e
)
273 page_range
= '%s-end' % s
281 self
._number
_pdf
(pdf_section
, number_start
=s
, **kwargs
)
282 pdf_sections
.append(pdf_section
)
285 concat_pdfs(pdf
, *pdf_sections
)
287 def make_barcode_pdf(self
, isbn
, pdf
, corner
='br'):
288 """Put ann ISBN barcode in a corner of a single blank page."""
290 position
= '%s,%s,%s,%s,%s' %(corner
, self
.width
, self
.height
, self
.side_margin
, self
.bottom_margin
)
291 cmd1
= [config
.BOOKLAND
,
292 '--position', position
,
296 '-dDEVICEWIDTHPOINTS=%s' % self
.width
,
297 '-dDEVICEHEIGHTPOINTS=%s' % self
.height
,
300 p1
= Popen(cmd1
, stdout
=PIPE
)
301 p2
= Popen(cmd2
, stdin
=p1
.stdout
, stdout
=PIPE
, stderr
=PIPE
)
302 out
, err
= p2
.communicate()
304 log('ran:\n%s | %s' % (' '.join(cmd1
), ' '.join(cmd2
)))
305 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1
.poll(), p2
.poll(), out
, err
))
310 def concat_pdfs(name
, *args
):
311 """Join all the named pdfs together into one and save it as <name>"""
313 cmd
.extend(x
for x
in args
if x
is not None)
314 cmd
+= ['cat', 'output', name
]
317 def index_pdf(pdf
, text
=None):
318 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
321 text
= pdf
+ '.index.txt'
323 #'-layout', #keeps more original formatting
329 def rotate_pdf(pdfin
, pdfout
):
330 """Turn the PDF on its head"""
331 cmd
= ['pdftk', pdfin
,
341 page_numbers
= 'latin'
342 preamble_page_numbers
= 'roman'
344 _try_cleanup_on_del
= True
346 def notify_watcher(self
, message
=None):
349 #message is the name of the caller
350 #XXX look at using inspect module
352 message
= traceback
.extract_stack(None, 2)[0][2]
353 log("notify_watcher called with '%s'" % message
)
354 self
.watcher(message
)
359 def __exit__(self
, exc_type
, exc_value
, traceback
):
361 #could deal with exceptions here and return true
363 def __init__(self
, book
, server
, bookname
,
364 page_settings
=None, engine
=None, watcher
=None, isbn
=None,
365 license
=config
.DEFAULT_LICENSE
):
366 log("*** Starting new book %s ***" % bookname
)
369 self
.watcher
= watcher
371 self
.license
= license
372 self
.workdir
= tempfile
.mkdtemp(prefix
=bookname
, dir=TMPDIR
)
373 os
.chmod(self
.workdir
, 0755)
374 defaults
= SERVER_DEFAULTS
.get(server
, SERVER_DEFAULTS
[DEFAULT_SERVER
])
375 self
.default_css
= defaults
['css']
376 self
.lang
= defaults
['lang']
377 self
.dir = defaults
['dir']
379 self
.body_html_file
= self
.filepath('body.html')
380 self
.body_pdf_file
= self
.filepath('body.pdf')
381 self
.body_index_file
= self
.filepath('body.txt')
382 self
.preamble_html_file
= self
.filepath('preamble.html')
383 self
.preamble_pdf_file
= self
.filepath('preamble.pdf')
384 self
.tail_html_file
= self
.filepath('tail.html')
385 self
.tail_pdf_file
= self
.filepath('tail.pdf')
386 self
.isbn_pdf_file
= None
387 self
.pdf_file
= self
.filepath('final.pdf')
389 self
.publish_name
= bookname
390 self
.publish_file
= os
.path
.join(PUBLISH_PATH
, self
.publish_name
)
391 self
.publish_url
= os
.path
.join(config
.PUBLISH_URL
, self
.publish_name
)
393 self
.book_url
= config
.BOOK_URL
% (self
.server
, self
.book
)
394 self
.toc_url
= config
.TOC_URL
% (self
.server
, self
.book
)
396 self
.set_page_dimensions(page_settings
)
398 if engine
is not None:
400 self
.notify_watcher()
403 if os
.path
.exists(self
.workdir
) and self
._try
_cleanup
_on
_del
:
404 self
._try
_cleanup
_on
_del
= False #or else you can get in bad cycles
407 def __getattr__(self
, attr
):
408 """catch unloaded books and load them"""
409 #log('looking for missing attribute "%s"' % (attr))
416 raise AttributeError("no such member: '%s'" % attr
)
419 def filepath(self
, fn
):
420 return os
.path
.join(self
.workdir
, fn
)
422 def save_data(self
, fn
, data
):
423 """Save without tripping up on unicode"""
424 if isinstance(data
, unicode):
425 data
= data
.encode('utf8', 'ignore')
430 def save_tempfile(self
, fn
, data
):
431 """Save the data in a temporary directory that will be cleaned
432 up when all is done. Return the absolute file path."""
433 fn
= self
.filepath(fn
)
434 self
.save_data(fn
, data
)
437 def set_page_dimensions(self
, dimensions
):
438 self
.maker
= PageSettings(**dimensions
)
441 def extract_pdf_text(self
):
442 """Extract the text from the body pdf, split into pages, so
443 that the correct page can be found to generate the table of
445 index_pdf(self
.body_pdf_file
, self
.body_index_file
)
446 f
= open(self
.body_index_file
)
447 s
= unicode(f
.read(), 'utf8')
449 #pages are spearated by formfeed character "^L", "\f" or chr(12)
450 self
.text_pages
= s
.split("\f")
451 #there is sometimes (probably always) an unwanted ^L at the end
452 return len(self
.text_pages
)
454 def make_body_pdf(self
):
455 """Make a pdf of the HTML, using webkit"""
457 html_text
= lxml
.etree
.tostring(self
.tree
, method
="html")
458 self
.save_data(self
.body_html_file
, html_text
)
461 self
.maker
.make_raw_pdf(self
.body_html_file
, self
.body_pdf_file
,
463 self
.notify_watcher('generate_pdf')
465 #3. extract the text for finding contents.
466 n_pages
= self
.extract_pdf_text()
467 log ("found %s pages in pdf" % n_pages
)
468 #4. resize pages, shift gutters, and rotate 180 degrees for RTL
469 self
.maker
.reshape_pdf(self
.body_pdf_file
, self
.dir, centre_end
=True)
470 self
.notify_watcher('reshape_pdf')
473 self
.maker
.number_pdf(self
.body_pdf_file
, n_pages
, dir=self
.dir,
474 numbers
=self
.page_numbers
)
475 self
.notify_watcher("number_pdf")
476 self
.notify_watcher()
478 def make_preamble_pdf(self
):
479 contents
= self
.make_contents()
480 inside_cover_html
= self
.compose_inside_cover()
481 html
= ('<html dir="%s"><head>\n'
482 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
483 '<link rel="stylesheet" href="%s" />\n'
485 '<h1 class="frontpage">%s</h1>'
487 '<div class="contents">%s</div>\n'
488 '<div style="page-break-after: always; color:#fff" class="unseen">.'
489 '<!--%s--></div></body></html>'
490 ) % (self
.dir, self
.css_url
, self
.title
, inside_cover_html
,
491 contents
, self
.title
)
492 self
.save_data(self
.preamble_html_file
, html
)
494 self
.maker
.make_raw_pdf(self
.preamble_html_file
, self
.preamble_pdf_file
,
497 self
.maker
.reshape_pdf(self
.preamble_pdf_file
, self
.dir, centre_start
=True)
499 self
.maker
.number_pdf(self
.preamble_pdf_file
, None, dir=self
.dir,
500 numbers
=self
.preamble_page_numbers
,
503 self
.notify_watcher()
506 """A convenient wrapper of a few necessary steps"""
507 # now the Xvfb server is needed. make sure it has had long enough to get going
510 self
.make_preamble_pdf()
511 self
.make_end_matter_pdf()
513 concat_pdfs(self
.pdf_file
, self
.preamble_pdf_file
,
514 self
.body_pdf_file
, self
.tail_pdf_file
,
517 self
.notify_watcher('concatenated_pdfs')
518 #and move it into place (what place?)
521 """Rotate the pdf 180 degrees so an RTL book can print on LTR
523 rotated
= self
.filepath('final-rotate.pdf')
524 unrotated
= self
.filepath('final-pre-rotate.pdf')
525 #leave the unrotated pdf intact at first, in case of error.
526 rotate_pdf(self
.pdf_file
, rotated
)
527 os
.rename(self
.pdf_file
, unrotated
)
528 os
.rename(rotated
, self
.pdf_file
)
529 self
.notify_watcher()
531 def publish_pdf(self
):
532 """Move the finished PDF to its final resting place"""
533 log("Publishing %r as %r" % (self
.pdf_file
, self
.publish_file
))
534 os
.rename(self
.pdf_file
, self
.publish_file
)
535 self
.notify_watcher()
538 """From the TOC.txt file create a list of TocItems with
539 the attributes <status>, <chapter>, and <title>.
541 <status> is a number, with the following meaning:
543 0 - section heading with no chapter
547 The TocItem object has convenience functions <is_chapter> and
550 <chapter> is twiki name of the chapter.
552 <title> is a human readable title for the chapter. It is likely to
553 differ from the title given in the chapter's <h1> heading.
555 f
= urlopen(self
.toc_url
)
559 self
.toc
.append(TocItem(f
.next().strip(),
562 except StopIteration:
565 self
.notify_watcher()
567 def load_book(self
, tidy
=True):
568 """Fetch and parse the raw html of the book. If tidy is true
569 (default) links in the document will be made absolute."""
570 f
= urlopen(self
.book_url
)
573 html
= ('<html dir="%s"><head>\n<title>%s</title>\n'
574 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
577 '<div style="page-break-before: always; color:#fff;" class="unseen">'
578 'A FLOSSManuals book</div>\n</body></html>'
579 ) % (self
.dir, self
.book
, html
)
581 self
.save_tempfile('raw.html', html
)
583 tree
= lxml
.html
.document_fromstring(html
)
585 tree
.make_links_absolute(self
.book_url
)
587 self
.headings
= [x
for x
in tree
.cssselect('h1')]
589 self
.headings
[0].set('class', "first-heading")
590 #self.heading_texts = [x.textcontent() for x in self.headings]
591 for h1
in self
.headings
:
592 h1
.title
= h1
.text_content().strip()
593 self
.notify_watcher()
597 """Wrapper around all necessary load methods."""
601 def find_page(self
, element
, start_page
=1):
602 """Search through a page iterator and return the page
603 number which the element probably occurs."""
604 text
= element
.cookie
605 for i
, content
in enumerate(self
.text_pages
[start_page
- 1:]):
606 log("looking for '%s' in page %s below:\n%s[...]" %
607 (text
, i
+ start_page
, content
[:160]), debug
='INDEX')
608 #remove spaces: they can appear spuriously
609 content
= ''.join(content
.split())
611 return i
+ start_page
, True
612 #If it isn't found, return the start page so the next chapter has a chance
613 return start_page
, False
615 def make_contents(self
):
616 """Generate HTML containing the table of contents. This can
617 only be done after the main PDF has been made."""
618 header
= '<h1>Table of Contents</h1><table class="toc">\n'
619 row_tmpl
= ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
620 '<td class="pagenumber">%s</td></tr>\n')
621 section_tmpl
= ('<tr><td class="section" colspan="3">%s</td></tr>\n')
622 footer
= '\n</table>'
628 subsections
= [] # for the subsection heading pages.
630 headings
= iter(self
.headings
)
636 except StopIteration:
637 log("heading not found for %s (previous h1 missing?). Stopping" % t
)
639 page_num
, found
= self
.find_page(h1
, page_num
)
640 # sometimes the heading isn't found, which is shown as a frown
642 contents
.append(row_tmpl
% (chapter
, h1
.title
, page_num
))
644 contents
.append(row_tmpl
% (chapter
, h1
.title
, ':-('))
647 contents
.append(section_tmpl
% t
.title
)
649 log("mystery TOC item: %s" % t
)
651 doc
= header
+ '\n'.join(contents
) + footer
652 self
.notify_watcher()
655 def add_section_titles(self
):
656 """Add any section heading pages that the TOC.txt file
657 specifies. These are sub-book, super-chapter groupings.
659 Also add initial numbers to chapters.
662 headings
= iter(self
.headings
)
667 if t
.is_chapter() and section
is not None:
670 except StopIteration:
671 log("heading not found for %s (previous h1 missing?)" % t
)
673 item
= h1
.makeelement('div', Class
='chapter')
674 log(h1
.title
, debug
='HTMLGEN')
676 _add_initial_number(item
, chapter
)
680 if not section_placed
:
681 log("placing section", debug
='HTMLGEN')
682 h1
.addprevious(section
)
683 section_placed
= True
685 log("NOT placing section", debug
='HTMLGEN')
687 #put a bold number at the beginning of the h1, and a hidden cookie at the end.
688 _add_initial_number(h1
, chapter
)
689 _add_chapter_cookie(h1
)
693 section
= self
.tree
.makeelement('div', Class
="subsection")
694 # section Element complains when you try to ask it whether it
695 # has been placed (though it does know)
696 section_placed
= False
697 heading
= lxml
.html
.fragment_fromstring(t
.title
, create_parent
='div')
698 heading
.set("Class", "subsection-heading")
699 section
.append(heading
)
701 self
.notify_watcher()
704 def add_css(self
, css
=None):
705 """If css looks like a url, use it as a stylesheet link.
706 Otherwise it is the CSS itself, which is saved to a temporary file
708 log("css is %r" % css
)
710 if css
is None or not css
.strip():
711 url
= 'file://' + os
.path
.abspath(self
.default_css
)
712 elif not re
.match(r
'^http://\S+$', css
):
713 fn
= self
.save_tempfile('objavi.css', css
)
717 #XXX for debugging and perhaps sensible anyway
718 #url = url.replace('file:///home/douglas/objavi2', '')
721 #find the head -- it's probably first child but lets not assume.
722 for child
in htmltree
:
723 if child
.tag
== 'head':
727 head
= htmltree
.makeelement('head')
728 htmltree
.insert(0, head
)
730 link
= lxml
.etree
.SubElement(head
, 'link', rel
='stylesheet', type='text/css', href
=url
)
732 self
.notify_watcher()
735 def set_title(self
, title
=None):
736 """If a string is supplied, it becomes the book's title.
737 Otherwise a guess is made."""
741 titles
= [x
.text_content() for x
in self
.tree
.cssselect('title')]
742 if titles
and titles
[0]:
743 self
.title
= titles
[0]
746 self
.title
= 'A Manual About ' + self
.book
749 def _read_localised_template(self
, template
, fallbacks
=['en']):
750 """Try to get the template in the approriate language, otherwise in english."""
751 for lang
in [self
.lang
] + fallbacks
:
753 fn
= template
% (lang
)
757 log("couldn't open inside front cover for lang %s (filename %s)" % (lang
, fn
))
763 def compose_inside_cover(self
):
764 """create the markup for the preamble inside cover."""
765 template
= self
._read
_localised
_template
(config
.INSIDE_FRONT_COVER_TEMPLATE
)
768 isbn_text
= '<b>ISBN :</b> %s <br>' % self
.isbn
772 return template
% {'date': time
.strftime('%Y-%m-%d'),
774 'license': self
.license
,
778 def compose_end_matter(self
):
779 """create the markup for the end_matter inside cover. If
780 self.isbn is not set, the html will result in a pdf that
781 spills onto two pages.
783 template
= self
._read
_localised
_template
(config
.END_MATTER_TEMPLATE
)
785 d
= {'css_url': self
.css_url
,
790 d
['inside_cover_style'] = ''
792 d
['inside_cover_style'] = 'page-break-after: always'
800 """Start an Xvfb instance, using a new server number. A
801 reference to it is stored in self.xvfb, which is used to kill
802 it when the pdf is done.
804 Note that Xvfb doesn't interact well with dbus which is
805 present on modern desktops.
807 #Find an unused server number (in case two cgis are running at once)
809 servernum
= random
.randrange(50, 500)
810 if not os
.path
.exists('/tmp/.X%s-lock' % servernum
):
813 self
.xserver_no
= ':%s' % servernum
815 authfile
= self
.filepath('Xauthority')
816 os
.environ
['XAUTHORITY'] = authfile
818 #mcookie(1) eats into /dev/random, so avoid that
819 from hashlib
import md5
820 m
= md5("%r %r %r %r %r" % (self
, os
.environ
, os
.getpid(), time
.time(), os
.urandom(32)))
821 mcookie
= m
.hexdigest()
823 check_call(['xauth', 'add', self
.xserver_no
, '.', mcookie
])
825 self
.xvfb
= Popen(['Xvfb', self
.xserver_no
,
826 '-screen', '0', '1024x768x24',
829 #'-whitepixel', str(2 ** 24 -1),
830 #'+extension', 'Composite',
836 # We need to wait a bit before the Xvfb is ready. but the
837 # downloads are so slow that that probably doesn't matter
839 self
.xvfb_ready_time
= time
.time() + 2
841 os
.environ
['DISPLAY'] = self
.xserver_no
844 def wait_for_xvfb(self
):
845 """wait until a previously set time before continuing. This
846 is so Xvfb has time to properly start."""
847 if hasattr(self
, 'xvfb'):
848 d
= self
.xvfb_ready_time
- time
.time()
851 self
.notify_watcher()
854 """Try very hard to kill off Xvfb. In addition to killing
855 this instance's xvfb, occasionally (randomly) search for
856 escaped Xvfb instances and kill those too."""
857 if not hasattr(self
, 'xvfb'):
859 check_call(['xauth', 'remove', self
.xserver_no
])
861 log("trying to kill Xvfb %s" % p
.pid
)
864 if p
.poll() is not None:
865 log("%s died with %s" % (p
.pid
, p
.poll()))
867 log("%s not dead yet" % p
.pid
)
870 log("Xvfb would not die! kill -9! kill -9!")
873 if random
.random() < 0.05:
874 #kill old xvfbs occasionally, if there are any.
875 self
.kill_old_xvfbs()
877 def kill_old_xvfbs(self
):
878 """Sometimes, despite everything, Xvfb instances hang around
879 well after they are wanted -- for example if the cgi process
880 dies particularly badly. So kill them if they have been
881 running for a long time."""
882 log("running kill_old_xvfbs")
883 p
= Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout
=PIPE
)
884 data
= p
.communicate()[0].strip()
886 lines
= data
.split('\n')
888 log('dealing with ps output "%s"' % line
)
890 pid
, days_
, hours
, minutes
, seconds
= re
.match(r
'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
891 except AttributeError:
892 log("Couldn't parse that line!")
893 # 50 minutes should be enough xvfb time for anyone
894 if days
or hours
or int(minutes
) > 50:
895 log("going to kill pid %s" % pid
)
896 os
.kill(int(pid
), 15)
899 self
.notify_watcher()
903 if not config
.KEEP_TEMP_FILES
:
904 for fn
in os
.listdir(self
.workdir
):
905 os
.remove(os
.path
.join(self
.workdir
, fn
))
906 os
.rmdir(self
.workdir
)
908 log("NOT removing '%s', containing the following files:" % self
.workdir
)
909 log(*os
.listdir(self
.workdir
))
911 self
.notify_watcher()