1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
28 from urllib2
import urlopen
29 from subprocess
import Popen
, check_call
, PIPE
31 import lxml
.etree
, lxml
.html
32 import lxml
, lxml
.html
, lxml
.etree
35 from config
import SERVER_DEFAULTS
, DEFAULT_SERVER
, POINT_2_MM
, PDFEDIT_MAX_PAGES
37 TMPDIR
= os
.path
.abspath(config
.TMPDIR
)
38 DOC_ROOT
= os
.environ
.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH
= "%s/books/" % DOC_ROOT
42 def log(*messages
, **kwargs
):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs
or config
.DEBUG_ALL
or kwargs
['debug'] in config
.DEBUG_MODES
:
49 print >> sys
.stderr
, m
51 print >> sys
.stderr
, repr(m
)
53 def _add_initial_number(e
, n
):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial
= e
.makeelement("strong", Class
="initial")
58 if e
.text
is not None:
59 initial
.tail
+= e
.text
61 initial
.text
= "%s." % n
64 class TocItem(object):
65 """This makes sense of the tuples from TOC.txt files"""
66 def __init__(self
, status
, chapter
, title
):
68 # 0 - section heading with no chapter
72 # chapter is twiki name of the chapter
73 # title is a human readable name of the chapter.
75 self
.chapter
= chapter
79 return self
.status
== '1'
82 return self
.status
== '0'
85 return '<toc: %s>' % ', '.join('%s: %s' % x
for x
in self
.__dict
__.iteritems())
90 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
91 out
, err
= p
.communicate()
93 log("Failed on command: %r" % cmd
)
95 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
96 (' '.join(cmd
), cmd
[0], p
.poll(), out
, err
))
99 def find_containing_paper(w
, h
):
101 for name
, pw
, ph
in config
.PAPER_SIZES
:
102 if pw
>= w
and ph
>= h
:
105 return (name
, mw
, mh
)
107 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
108 (w
* POINT_2_MM
, h
* POINT_2_MM
))
112 class PageSettings(object):
113 """Calculates and wraps commands for the generation and processing
115 def __init__(self
, pointsize
, **kwargs
):
116 # the formulas for default gutters, margins and column margins
117 # are quite ad-hoc and certainly improvable.
119 self
.width
, self
.height
= pointsize
120 self
.papersize
, clipx
, clipy
= find_containing_paper(self
.width
, self
.height
)
121 self
.grey_scale
= 'grey_scale' in kwargs
123 # All measurements in points unless otherwise stated
124 # user interaction is in *mm*, but is converted in objavi2.py
125 default_margin
= (config
.BASE_MARGIN
+ config
.PROPORTIONAL_MARGIN
* min(pointsize
))
126 default_gutter
= (config
.BASE_GUTTER
+ config
.PROPORTIONAL_GUTTER
* self
.width
)
128 self
.top_margin
= kwargs
.get('top_margin', default_margin
)
129 self
.side_margin
= kwargs
.get('side_margin', default_margin
)
130 self
.bottom_margin
= kwargs
.get('bottom_margin', default_margin
)
131 self
.gutter
= kwargs
.get('gutter', default_gutter
)
133 self
.columns
= kwargs
.get('columns', 1)
134 if self
.columns
== 'auto': #default for newspapers is to work out columns
135 self
.columns
= int(self
.width
// config
.MIN_COLUMN_WIDTH
)
137 self
.column_margin
= kwargs
.get('column_margin',
138 default_margin
* 2 / (5.0 + self
.columns
))
140 self
.number_bottom
= self
.bottom_margin
- 0.6 * config
.PAGE_NUMBER_SIZE
141 self
.number_margin
= self
.side_margin
143 # calculate margins in mm for browsers
145 for m
, clip
in ((self
.top_margin
, clipy
),
146 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
147 (self
.bottom_margin
, clipy
+ 0.5 * config
.PAGE_NUMBER_SIZE
),
148 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
150 self
.margins
.append((m
+ clip
) * POINT_2_MM
)
152 self
.moz_printer
= kwargs
.get('moz_printer', ('objavi_' + self
.papersize
))
153 for x
in locals().iteritems():
154 log("%s: %s" % x
, debug
='PDFGEN')
156 log("%s: %s" % (x
, getattr(self
, x
)), debug
='PDFGEN')
160 def _webkit_command(self
, html
, pdf
, outline
=False):
161 m
= [str(x
) for x
in self
.margins
]
162 outline_args
= ['--outline'] * outline
163 greyscale_args
= ['-g'] * self
.grey_scale
164 cmd
= ([config
.WKHTMLTOPDF
, '-q', '-s', self
.papersize
,
165 '-T', m
[0], '-R', m
[1], '-B', m
[2], '-L', m
[3],
166 '-d', '100'] + outline_args
+ greyscale_args
+
167 config
.WKHTMLTOPDF_EXTRA_COMMANDS
+ [html
, pdf
])
171 def _gecko_command(self
, html
, pdf
, outline
=False):
172 m
= [str(x
) for x
in self
.margins
]
173 #firefox -P pdfprint -print URL -printprinter "printer_settings"
174 cmd
= [config
.FIREFOX
, '-P', 'pdfprint', '-print',
175 html
, '-printprinter', self
.moz_printer
]
179 def make_raw_pdf(self
, html
, pdf
, engine
='webkit', outline
=False):
180 func
= getattr(self
, '_%s_command' % engine
)
181 if self
.columns
== 1:
182 cmd
= func(html
, pdf
, outline
=outline
)
185 printable_width
= self
.width
- 2.0 * self
.side_margin
- self
.gutter
186 column_width
= (printable_width
- (self
.columns
- 1) * self
.column_margin
) / self
.columns
187 page_width
= column_width
+ self
.column_margin
188 side_margin
= self
.column_margin
* 0.5
190 columnmaker
= PageSettings((page_width
, self
.height
), moz_printer
=self
.moz_printer
,
191 gutter
=0, top_margin
=self
.top_margin
,
192 side_margin
=side_margin
,
193 bottom_margin
=self
.bottom_margin
,
194 grey_scale
=self
.grey_scale
,
197 column_pdf
= pdf
[:-4] + '-single-column.pdf'
198 columnmaker
.make_raw_pdf(html
, column_pdf
, engine
=engine
, outline
=outline
)
199 columnmaker
.reshape_pdf(column_pdf
)
202 '--nup', '%sx1' % int(self
.columns
),
203 '--paper', self
.papersize
.lower() + 'paper',
205 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
206 '--noautoscale', 'true',
207 '--orient', 'portrait',
216 def reshape_pdf(self
, pdf
, dir='LTR', centre_start
=False, centre_end
=False,
218 """Spin the pdf for RTL text, resize it to the right size, and
219 shift the gutter left and right"""
228 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
231 'output_filename=%s' % pdf
,
232 'operation=%s' % ops
,
233 'width=%s' % self
.width
,
234 'height=%s' % self
.height
,
235 'offset=%s' % gutter
,
236 'centre_start=%s' % centre_start
,
237 'centre_end=%s' % centre_end
,
241 def _number_pdf(self
, pdf
, numbers
='latin', dir='LTR',
243 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
244 'operation=page_numbers',
247 'output_filename=%s' % pdf
,
248 'number_start=%s' % number_start
,
249 'number_style=%s' % numbers
,
250 'number_bottom=%s' % self
.number_bottom
,
251 'number_margin=%s' % self
.number_margin
,
255 def number_pdf(self
, pdf
, pages
, **kwargs
):
256 # if there are too many pages for pdfedit to handle in one go,
257 # split the job into bits. <pages> may not be exact
258 if pages
is None or pages
<= PDFEDIT_MAX_PAGES
:
259 self
._number
_pdf
(pdf
, **kwargs
)
261 # section_size must be even
262 sections
= pages
// PDFEDIT_MAX_PAGES
+ 1
263 section_size
= (pages
// sections
+ 2) & ~
1
266 s
= kwargs
.pop('number_start', 1)
268 e
= s
+ section_size
- 1
269 pdf_section
= '%s-%s-%s.pdf' % (pdf
[:-4], s
, e
)
271 page_range
= '%s-%s' % (s
, e
)
273 page_range
= '%s-end' % s
281 self
._number
_pdf
(pdf_section
, number_start
=s
, **kwargs
)
282 pdf_sections
.append(pdf_section
)
285 concat_pdfs(pdf
, *pdf_sections
)
287 def make_barcode_pdf(self
, isbn
, pdf
, corner
='br'):
288 """Put an ISBN barcode in a corner of a single blank page."""
290 position
= '%s,%s,%s,%s,%s' %(corner
, self
.width
, self
.height
, self
.side_margin
, self
.bottom_margin
)
291 cmd1
= [config
.BOOKLAND
,
292 '--position', position
,
296 '-dDEVICEWIDTHPOINTS=%s' % self
.width
,
297 '-dDEVICEHEIGHTPOINTS=%s' % self
.height
,
300 p1
= Popen(cmd1
, stdout
=PIPE
)
301 p2
= Popen(cmd2
, stdin
=p1
.stdout
, stdout
=PIPE
, stderr
=PIPE
)
302 out
, err
= p2
.communicate()
304 log('ran:\n%s | %s' % (' '.join(cmd1
), ' '.join(cmd2
)))
305 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1
.poll(), p2
.poll(), out
, err
))
308 def count_pdf_pages(pdf
):
309 """How many pages in the PDF?"""
310 #XXX could also use python-pypdf or python-poppler
311 cmd
= ('pdfinfo', pdf
)
312 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
313 out
, err
= p
.communicate()
314 m
= re
.search(r
'^\s*Pages:\s*(\d+)\s*$', re
.MULTILINE
)
315 return int(m
.group(1))
318 def concat_pdfs(destination
, *pdfs
):
319 """Join all the named pdfs together into one and save it as <name>"""
321 cmd
.extend(x
for x
in pdfs
if x
is not None)
322 cmd
+= ['cat', 'output', destination
]
325 def index_pdf(pdf
, text
=None):
326 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
329 text
= pdf
+ '.index.txt'
331 #'-layout', #keeps more original formatting
337 def rotate_pdf(pdfin
, pdfout
):
338 """Turn the PDF on its head"""
339 cmd
= ['pdftk', pdfin
,
347 def parse_outline(pdf
, level_threshold
):
348 """Create a structure reflecting the outline of a PDF.
349 A chapter heading looks like this:
351 BookmarkTitle: 2. What is sound?
353 BookmarkPageNumber: 3
355 cmd
= ('pdftk', pdf
, 'dump_data')
356 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
357 outline
, err
= p
.communicate()
358 lines
= (x
.strip() for x
in outline
.split('\n') if x
.strip())
361 def extract(expected
, conv
=str.strip
):
364 k
, v
= line
.split(':', 1)
368 log("trouble with line %r" %line
)
370 #There are a few useless variables, then the pagecount, then the contents.
371 #The pagecount is useful, so pick it up first.
373 while page_count
== None:
374 page_count
= extract('NumberOfPages', int)
378 title
= extract('BookmarkTitle')
379 if title
is not None:
380 level
= extract('BookmarkLevel', int)
381 pagenum
= extract('BookmarkPageNumber', int)
382 if level
<= level_threshold
and None not in (level
, pagenum
):
383 contents
.append((title
, level
, pagenum
))
384 except StopIteration:
387 return contents
, outline
, page_count
391 page_numbers
= 'latin'
392 preamble_page_numbers
= 'roman'
394 _try_cleanup_on_del
= config
.TRY_BOOK_CLEANUP_ON_DEL
396 def notify_watcher(self
, message
=None):
399 #message is the name of the caller
400 #XXX look at using inspect module
402 message
= traceback
.extract_stack(None, 2)[0][2]
403 log("notify_watcher called with '%s'" % message
)
404 self
.watcher(message
)
409 def __exit__(self
, exc_type
, exc_value
, traceback
):
411 #could deal with exceptions here and return true
413 def __init__(self
, book
, server
, bookname
,
414 page_settings
=None, engine
=None, watcher
=None, isbn
=None,
415 license
=config
.DEFAULT_LICENSE
):
416 log("*** Starting new book %s ***" % bookname
)
419 self
.watcher
= watcher
421 self
.license
= license
422 self
.workdir
= tempfile
.mkdtemp(prefix
=bookname
, dir=TMPDIR
)
423 os
.chmod(self
.workdir
, 0755)
424 defaults
= SERVER_DEFAULTS
[server
]
425 self
.lang
= defaults
['lang']
426 self
.dir = defaults
['dir']
428 self
.body_html_file
= self
.filepath('body.html')
429 self
.body_pdf_file
= self
.filepath('body.pdf')
430 self
.body_index_file
= self
.filepath('body.txt')
431 self
.preamble_html_file
= self
.filepath('preamble.html')
432 self
.preamble_pdf_file
= self
.filepath('preamble.pdf')
433 self
.tail_html_file
= self
.filepath('tail.html')
434 self
.tail_pdf_file
= self
.filepath('tail.pdf')
435 self
.isbn_pdf_file
= None
436 self
.pdf_file
= self
.filepath('final.pdf')
438 self
.publish_name
= bookname
439 self
.publish_file
= os
.path
.join(PUBLISH_PATH
, self
.publish_name
)
440 self
.publish_url
= os
.path
.join(config
.PUBLISH_URL
, self
.publish_name
)
442 self
.book_url
= config
.BOOK_URL
% (self
.server
, self
.book
)
443 self
.toc_url
= config
.TOC_URL
% (self
.server
, self
.book
)
445 self
.maker
= PageSettings(**page_settings
)
447 if engine
is not None:
449 self
.notify_watcher()
451 if config
.TRY_BOOK_CLEANUP_ON_DEL
:
452 #Dont even define __del__ if it is not used.
453 _try_cleanup_on_del
= True
455 if self
._try
_cleanup
_on
_del
and os
.path
.exists(self
.workdir
):
456 self
._try
_cleanup
_on
_del
= False #or else you can get in bad cycles
459 def __getattr__(self
, attr
):
460 """catch unloaded books and load them"""
461 #log('looking for missing attribute "%s"' % (attr))
468 raise AttributeError("no such member: '%s'" % attr
)
471 def filepath(self
, fn
):
472 return os
.path
.join(self
.workdir
, fn
)
474 def save_data(self
, fn
, data
):
475 """Save without tripping up on unicode"""
476 if isinstance(data
, unicode):
477 data
= data
.encode('utf8', 'ignore')
482 def save_tempfile(self
, fn
, data
):
483 """Save the data in a temporary directory that will be cleaned
484 up when all is done. Return the absolute file path."""
485 fn
= self
.filepath(fn
)
486 self
.save_data(fn
, data
)
489 def extract_pdf_outline(self
):
490 self
.outline_contents
, self
.outline_text
, number_of_pages
= parse_outline(self
.body_pdf_file
, 1)
491 for x
in self
.outline_contents
:
493 return number_of_pages
495 def make_body_pdf(self
):
496 """Make a pdf of the HTML, using webkit"""
498 html_text
= lxml
.etree
.tostring(self
.tree
, method
="html")
499 self
.save_data(self
.body_html_file
, html_text
)
502 self
.maker
.make_raw_pdf(self
.body_html_file
, self
.body_pdf_file
,
503 engine
=self
.engine
, outline
=True)
504 self
.notify_watcher('generate_pdf')
506 n_pages
= self
.extract_pdf_outline()
508 log ("found %s pages in pdf" % n_pages
)
509 #4. resize pages, shift gutters, even pages
510 self
.maker
.reshape_pdf(self
.body_pdf_file
, self
.dir, centre_end
=True)
511 self
.notify_watcher('reshape_pdf')
514 self
.maker
.number_pdf(self
.body_pdf_file
, n_pages
, dir=self
.dir,
515 numbers
=self
.page_numbers
)
516 self
.notify_watcher("number_pdf")
517 self
.notify_watcher()
519 def make_preamble_pdf(self
):
520 contents
= self
.make_contents()
521 inside_cover_html
= self
.compose_inside_cover()
522 html
= ('<html dir="%s"><head>\n'
523 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
524 '<link rel="stylesheet" href="%s" />\n'
526 '<h1 class="frontpage">%s</h1>'
528 '<div class="contents">%s</div>\n'
529 '<div style="page-break-after: always; color:#fff" class="unseen">.'
530 '<!--%s--></div></body></html>'
531 ) % (self
.dir, self
.css_url
, self
.title
, inside_cover_html
,
532 contents
, self
.title
)
533 self
.save_data(self
.preamble_html_file
, html
)
535 self
.maker
.make_raw_pdf(self
.preamble_html_file
, self
.preamble_pdf_file
,
538 self
.maker
.reshape_pdf(self
.preamble_pdf_file
, self
.dir, centre_start
=True)
540 self
.maker
.number_pdf(self
.preamble_pdf_file
, None, dir=self
.dir,
541 numbers
=self
.preamble_page_numbers
,
544 self
.notify_watcher()
546 def make_end_matter_pdf(self
):
547 """Make an inside back cover and a back cover. If there is an
548 isbn number its barcode will be put on the back cover."""
550 self
.isbn_pdf_file
= self
.filepath('isbn.pdf')
551 self
.maker
.make_barcode_pdf(self
.isbn
, self
.isbn_pdf_file
)
552 self
.notify_watcher('make_barcode_pdf')
554 self
.save_data(self
.tail_html_file
, self
.compose_end_matter())
555 self
.maker
.make_raw_pdf(self
.tail_html_file
, self
.tail_pdf_file
,
558 self
.maker
.reshape_pdf(self
.tail_pdf_file
, self
.dir, centre_start
=True,
559 centre_end
=True, even_pages
=False)
560 self
.notify_watcher()
562 def make_book_pdf(self
):
563 """A convenient wrapper of a few necessary steps"""
564 # now the Xvfb server is needed. make sure it has had long enough to get going
567 self
.make_preamble_pdf()
568 self
.make_end_matter_pdf()
570 concat_pdfs(self
.pdf_file
, self
.preamble_pdf_file
,
571 self
.body_pdf_file
, self
.tail_pdf_file
,
574 self
.notify_watcher('concatenated_pdfs')
577 def make_simple_pdf(self
, mode
):
578 """Make a simple pdf document without contents or separate
579 title page. This is used for multicolumn newspapers and for
580 web-destined pdfs."""
582 #0. Add heading to begining of html
583 body
= list(self
.tree
.cssselect('body'))[0]
584 e
= body
.makeelement('h1', {'id': 'book-title'})
587 intro
= lxml
.html
.fragment_fromstring(self
.compose_inside_cover())
590 #0.5 adjust parameters to suit the particular kind of output
592 self
.maker
.gutter
= 0
595 html_text
= lxml
.etree
.tostring(self
.tree
, method
="html")
596 self
.save_data(self
.body_html_file
, html_text
)
598 #2. Make a pdf of it (direct to to final pdf)
599 self
.maker
.make_raw_pdf(self
.body_html_file
, self
.pdf_file
,
600 engine
=self
.engine
, outline
=True)
601 self
.notify_watcher('generate_pdf')
602 n_pages
= self
.extract_pdf_outline()
605 #3. resize pages and shift gutters.
606 self
.maker
.reshape_pdf(self
.pdf_file
, self
.dir, centre_end
=True)
607 self
.notify_watcher('reshape_pdf')
610 self
.maker
.number_pdf(self
.pdf_file
, n_pages
,
611 dir=self
.dir, numbers
=self
.page_numbers
)
612 self
.notify_watcher("number_pdf")
613 self
.notify_watcher()
617 """Rotate the pdf 180 degrees so an RTL book can print on LTR
619 rotated
= self
.filepath('final-rotate.pdf')
620 unrotated
= self
.filepath('final-pre-rotate.pdf')
621 #leave the unrotated pdf intact at first, in case of error.
622 rotate_pdf(self
.pdf_file
, rotated
)
623 os
.rename(self
.pdf_file
, unrotated
)
624 os
.rename(rotated
, self
.pdf_file
)
625 self
.notify_watcher()
627 def publish_pdf(self
):
628 """Move the finished PDF to its final resting place"""
629 log("Publishing %r as %r" % (self
.pdf_file
, self
.publish_file
))
630 os
.rename(self
.pdf_file
, self
.publish_file
)
631 self
.notify_watcher()
634 """From the TOC.txt file create a list of TocItems with
635 the attributes <status>, <chapter>, and <title>.
637 <status> is a number, with the following meaning:
639 0 - section heading with no chapter
643 The TocItem object has convenience functions <is_chapter> and
646 <chapter> is twiki name of the chapter.
648 <title> is a human readable title for the chapter. It is likely to
649 differ from the title given in the chapter's <h1> heading.
651 f
= urlopen(self
.toc_url
)
655 self
.toc
.append(TocItem(f
.next().strip(),
658 except StopIteration:
661 self
.notify_watcher()
663 def load_book(self
, tidy
=True):
664 """Fetch and parse the raw html of the book. If tidy is true
665 (default) links in the document will be made absolute."""
666 f
= urlopen(self
.book_url
)
669 html
= ('<html dir="%s"><head>\n<title>%s</title>\n'
670 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
673 '<div style="page-break-before: always; color:#fff;" class="unseen">'
674 'A FLOSSManuals book</div>\n</body></html>'
675 ) % (self
.dir, self
.book
, html
)
677 self
.save_tempfile('raw.html', html
)
679 tree
= lxml
.html
.document_fromstring(html
)
681 tree
.make_links_absolute(self
.book_url
)
683 self
.headings
= [x
for x
in tree
.cssselect('h1')]
685 self
.headings
[0].set('class', "first-heading")
686 for h1
in self
.headings
:
687 h1
.title
= h1
.text_content().strip()
688 self
.notify_watcher()
691 """Wrapper around all necessary load methods."""
695 def make_contents(self
):
696 """Generate HTML containing the table of contents. This can
697 only be done after the main PDF has been made."""
698 header
= '<h1>Table of Contents</h1><table class="toc">\n'
699 row_tmpl
= ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
700 '<td class="pagenumber">%s</td></tr>\n')
701 section_tmpl
= ('<tr><td class="section" colspan="3">%s</td></tr>\n')
702 footer
= '\n</table>'
708 subsections
= [] # for the subsection heading pages.
710 outline_contents
= iter(self
.outline_contents
)
711 headings
= iter(self
.headings
)
717 except StopIteration:
718 log("heading not found for %s (previous h1 missing?). Stopping" % t
)
720 h1_text
, level
, page_num
= outline_contents
.next()
721 log("%r %r" % (h1
.title
, h1_text
))
722 contents
.append(row_tmpl
% (chapter
, h1
.title
, page_num
))
725 contents
.append(section_tmpl
% t
.title
)
727 log("mystery TOC item: %s" % t
)
729 doc
= header
+ '\n'.join(contents
) + footer
730 self
.notify_watcher()
733 def add_section_titles(self
):
734 """Add any section heading pages that the TOC.txt file
735 specifies. These are sub-book, super-chapter groupings.
737 Also add initial numbers to chapters.
739 headings
= iter(self
.headings
)
744 if t
.is_chapter() and section
is not None:
747 except StopIteration:
748 log("heading not found for %s (previous h1 missing?)" % t
)
750 item
= h1
.makeelement('div', Class
='chapter')
751 log(h1
.title
, debug
='HTMLGEN')
753 _add_initial_number(item
, chapter
)
757 if not section_placed
:
758 log("placing section", debug
='HTMLGEN')
759 h1
.addprevious(section
)
760 section_placed
= True
762 log("NOT placing section", debug
='HTMLGEN')
764 #put a bold number at the beginning of the h1.
765 _add_initial_number(h1
, chapter
)
769 section
= self
.tree
.makeelement('div', Class
="subsection")
770 # section Element complains when you try to ask it whether it
771 # has been placed (though it does know)
772 section_placed
= False
773 heading
= lxml
.html
.fragment_fromstring(t
.title
, create_parent
='div')
774 heading
.set("Class", "subsection-heading")
775 section
.append(heading
)
777 self
.notify_watcher()
780 def add_css(self
, css
=None, mode
='book'):
781 """If css looks like a url, use it as a stylesheet link.
782 Otherwise it is the CSS itself, which is saved to a temporary file
784 log("css is %r" % css
)
786 if css
is None or not css
.strip():
787 defaults
= SERVER_DEFAULTS
[self
.server
]
788 url
= 'file://' + os
.path
.abspath(defaults
['css-%s' % mode
])
789 elif not re
.match(r
'^http://\S+$', css
):
790 fn
= self
.save_tempfile('objavi.css', css
)
794 #XXX for debugging and perhaps sensible anyway
795 #url = url.replace('file:///home/douglas/objavi2', '')
798 #find the head -- it's probably first child but lets not assume.
799 for child
in htmltree
:
800 if child
.tag
== 'head':
804 head
= htmltree
.makeelement('head')
805 htmltree
.insert(0, head
)
807 link
= lxml
.etree
.SubElement(head
, 'link', rel
='stylesheet', type='text/css', href
=url
)
809 self
.notify_watcher()
812 def set_title(self
, title
=None):
813 """If a string is supplied, it becomes the book's title.
814 Otherwise a guess is made."""
818 titles
= [x
.text_content() for x
in self
.tree
.cssselect('title')]
819 if titles
and titles
[0]:
820 self
.title
= titles
[0]
823 self
.title
= 'A Manual About ' + self
.book
826 def _read_localised_template(self
, template
, fallbacks
=['en']):
827 """Try to get the template in the approriate language, otherwise in english."""
828 for lang
in [self
.lang
] + fallbacks
:
830 fn
= template
% (lang
)
834 log("couldn't open inside front cover for lang %s (filename %s)" % (lang
, fn
))
840 def compose_inside_cover(self
):
841 """create the markup for the preamble inside cover."""
842 template
= self
._read
_localised
_template
(config
.INSIDE_FRONT_COVER_TEMPLATE
)
845 isbn_text
= '<b>ISBN :</b> %s <br>' % self
.isbn
849 return template
% {'date': time
.strftime('%Y-%m-%d'),
851 'license': self
.license
,
855 def compose_end_matter(self
):
856 """create the markup for the end_matter inside cover. If
857 self.isbn is not set, the html will result in a pdf that
858 spills onto two pages.
860 template
= self
._read
_localised
_template
(config
.END_MATTER_TEMPLATE
)
862 d
= {'css_url': self
.css_url
,
867 d
['inside_cover_style'] = ''
869 d
['inside_cover_style'] = 'page-break-after: always'
877 """Start an Xvfb instance, using a new server number. A
878 reference to it is stored in self.xvfb, which is used to kill
879 it when the pdf is done.
881 Note that Xvfb doesn't interact well with dbus which is
882 present on modern desktops.
884 #Find an unused server number (in case two cgis are running at once)
886 servernum
= random
.randrange(50, 500)
887 if not os
.path
.exists('/tmp/.X%s-lock' % servernum
):
890 self
.xserver_no
= ':%s' % servernum
892 authfile
= self
.filepath('Xauthority')
893 os
.environ
['XAUTHORITY'] = authfile
895 #mcookie(1) eats into /dev/random, so avoid that
896 from hashlib
import md5
897 m
= md5("%r %r %r %r %r" % (self
, os
.environ
, os
.getpid(), time
.time(), os
.urandom(32)))
898 mcookie
= m
.hexdigest()
900 check_call(['xauth', 'add', self
.xserver_no
, '.', mcookie
])
902 self
.xvfb
= Popen(['Xvfb', self
.xserver_no
,
903 '-screen', '0', '1024x768x24',
906 #'-whitepixel', str(2 ** 24 -1),
907 #'+extension', 'Composite',
913 # We need to wait a bit before the Xvfb is ready. but the
914 # downloads are so slow that that probably doesn't matter
916 self
.xvfb_ready_time
= time
.time() + 2
918 os
.environ
['DISPLAY'] = self
.xserver_no
921 def wait_for_xvfb(self
):
922 """wait until a previously set time before continuing. This
923 is so Xvfb has time to properly start."""
924 if hasattr(self
, 'xvfb'):
925 d
= self
.xvfb_ready_time
- time
.time()
928 self
.notify_watcher()
931 """Try very hard to kill off Xvfb. In addition to killing
932 this instance's xvfb, occasionally (randomly) search for
933 escaped Xvfb instances and kill those too."""
934 if not hasattr(self
, 'xvfb'):
936 check_call(['xauth', 'remove', self
.xserver_no
])
938 log("trying to kill Xvfb %s" % p
.pid
)
941 if p
.poll() is not None:
942 log("%s died with %s" % (p
.pid
, p
.poll()))
944 log("%s not dead yet" % p
.pid
)
947 log("Xvfb would not die! kill -9! kill -9!")
950 if random
.random() < 0.05:
951 #kill old xvfbs occasionally, if there are any.
952 self
.kill_old_xvfbs()
954 def kill_old_xvfbs(self
):
955 """Sometimes, despite everything, Xvfb instances hang around
956 well after they are wanted -- for example if the cgi process
957 dies particularly badly. So kill them if they have been
958 running for a long time."""
959 log("running kill_old_xvfbs")
960 p
= Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout
=PIPE
)
961 data
= p
.communicate()[0].strip()
963 lines
= data
.split('\n')
965 log('dealing with ps output "%s"' % line
)
967 pid
, days_
, hours
, minutes
, seconds
= re
.match(r
'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
968 except AttributeError:
969 log("Couldn't parse that line!")
970 # 50 minutes should be enough xvfb time for anyone
971 if days
or hours
or int(minutes
) > 50:
972 log("going to kill pid %s" % pid
)
973 os
.kill(int(pid
), 15)
976 self
.notify_watcher()
980 if not config
.KEEP_TEMP_FILES
:
981 for fn
in os
.listdir(self
.workdir
):
982 os
.remove(os
.path
.join(self
.workdir
, fn
))
983 os
.rmdir(self
.workdir
)
985 log("NOT removing '%s', containing the following files:" % self
.workdir
)
986 log(*os
.listdir(self
.workdir
))
988 self
.notify_watcher()