1 # Part of Objavi2, which turns html manuals into books.
2 # This provides abstractions of texts and virtual printers and manages
5 # Copyright (C) 2009 Douglas Bagnall
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 """Library module representing a complete FM book being turned into a
28 from urllib2
import urlopen
29 from subprocess
import Popen
, check_call
, PIPE
31 import lxml
.etree
, lxml
.html
32 import lxml
, lxml
.html
, lxml
.etree
35 from config
import SERVER_DEFAULTS
, DEFAULT_SERVER
, POINT_2_MM
, PDFEDIT_MAX_PAGES
37 TMPDIR
= os
.path
.abspath(config
.TMPDIR
)
38 DOC_ROOT
= os
.environ
.get('DOCUMENT_ROOT', '.')
39 PUBLISH_PATH
= "%s/books/" % DOC_ROOT
42 def log(*messages
, **kwargs
):
43 """Send the messages to the appropriate place (stderr, or syslog).
44 If a <debug> keyword is specified, the message is only printed if
45 its value ias in the global DEBUG_MODES."""
46 if 'debug' not in kwargs
or config
.DEBUG_ALL
or kwargs
['debug'] in config
.DEBUG_MODES
:
49 print >> sys
.stderr
, m
51 print >> sys
.stderr
, repr(m
)
53 def _add_initial_number(e
, n
):
54 """Put a styled chapter number n at the beginning of element e."""
55 initial
= e
.makeelement("strong", Class
="initial")
58 if e
.text
is not None:
59 initial
.tail
+= e
.text
61 initial
.text
= "%s." % n
64 class TocItem(object):
65 """This makes sense of the tuples from TOC.txt files"""
66 def __init__(self
, status
, chapter
, title
):
68 # 0 - section heading with no chapter
72 # chapter is twiki name of the chapter
73 # title is a human readable name of the chapter.
75 self
.chapter
= chapter
79 return self
.status
== '1'
82 return self
.status
== '0'
85 return '<toc: %s>' % ', '.join('%s: %s' % x
for x
in self
.__dict
__.iteritems())
90 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
91 out
, err
= p
.communicate()
93 log("Failed on command: %r" % cmd
)
95 log("%s\n%s returned %s and produced\nstdout:%s\nstderr:%s" %
96 (' '.join(cmd
), cmd
[0], p
.poll(), out
, err
))
99 def find_containing_paper(w
, h
):
101 for name
, pw
, ph
in config
.PAPER_SIZES
:
102 if pw
>= w
and ph
>= h
:
105 return (name
, mw
, mh
)
107 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
108 (w
* POINT_2_MM
, h
* POINT_2_MM
))
112 class PageSettings(object):
113 """Calculates and wraps commands for the generation and processing
115 def __init__(self
, pointsize
, **kwargs
):
116 # the formulas for default gutters, margins and column margins
117 # are quite ad-hoc and certainly improvable.
119 self
.width
, self
.height
= pointsize
120 self
.papersize
, clipx
, clipy
= find_containing_paper(self
.width
, self
.height
)
121 self
.grey_scale
= 'grey_scale' in kwargs
123 # All measurements in points unless otherwise stated
124 # user interaction is in *mm*, but is converted in objavi2.py
125 default_margin
= (config
.BASE_MARGIN
+ config
.PROPORTIONAL_MARGIN
* min(pointsize
))
126 default_gutter
= (config
.BASE_GUTTER
+ config
.PROPORTIONAL_GUTTER
* self
.width
)
128 self
.top_margin
= kwargs
.get('top_margin', default_margin
)
129 self
.side_margin
= kwargs
.get('side_margin', default_margin
)
130 self
.bottom_margin
= kwargs
.get('bottom_margin', default_margin
)
131 self
.gutter
= kwargs
.get('gutter', default_gutter
)
133 self
.columns
= kwargs
.get('columns', 1)
134 if self
.columns
== 'auto': #default for newspapers is to work out columns
135 self
.columns
= int(self
.width
// config
.MIN_COLUMN_WIDTH
)
137 self
.column_margin
= kwargs
.get('column_margin',
138 default_margin
* 2 / (5.0 + self
.columns
))
140 self
.number_bottom
= self
.bottom_margin
- 0.6 * config
.PAGE_NUMBER_SIZE
141 self
.number_margin
= self
.side_margin
143 # calculate margins in mm for browsers
145 for m
, clip
in ((self
.top_margin
, clipy
),
146 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
147 (self
.bottom_margin
, clipy
+ 0.5 * config
.PAGE_NUMBER_SIZE
),
148 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
150 self
.margins
.append((m
+ clip
) * POINT_2_MM
)
152 self
.moz_printer
= kwargs
.get('moz_printer', ('objavi_' + self
.papersize
))
154 if 'PDFGEN' in config
.DEBUG_MODES
:
155 log("making PageSettings with:")
156 for x
in locals().iteritems():
157 log("%s: %s" % x
, debug
='PDFGEN')
159 if not x
.startswith('__'):
160 log("self.%s: %s" % (x
, getattr(self
, x
)), debug
='PDFGEN')
164 def _webkit_command(self
, html
, pdf
, outline
=False):
165 m
= [str(x
) for x
in self
.margins
]
166 outline_args
= ['--outline'] * outline
167 greyscale_args
= ['-g'] * self
.grey_scale
168 cmd
= ([config
.WKHTMLTOPDF
, '-q', '-s', self
.papersize
,
169 '-T', m
[0], '-R', m
[1], '-B', m
[2], '-L', m
[3],
170 '-d', '100'] + outline_args
+ greyscale_args
+
171 config
.WKHTMLTOPDF_EXTRA_COMMANDS
+ [html
, pdf
])
175 def _gecko_command(self
, html
, pdf
, outline
=False):
176 m
= [str(x
) for x
in self
.margins
]
177 #firefox -P pdfprint -print URL -printprinter "printer_settings"
178 cmd
= [config
.FIREFOX
, '-P', 'pdfprint', '-print',
179 html
, '-printprinter', self
.moz_printer
]
183 def make_raw_pdf(self
, html
, pdf
, engine
='webkit', outline
=False):
184 func
= getattr(self
, '_%s_command' % engine
)
185 if self
.columns
== 1:
186 cmd
= func(html
, pdf
, outline
=outline
)
189 printable_width
= self
.width
- 2.0 * self
.side_margin
- self
.gutter
190 column_width
= (printable_width
- (self
.columns
- 1) * self
.column_margin
) / self
.columns
191 page_width
= column_width
+ self
.column_margin
192 side_margin
= self
.column_margin
* 0.5
193 if 'PDFGEN' in config
.DEBUG_MODES
:
194 log("making columns with:")
195 for k
, v
in locals().iteritems():
196 log("%s: %r" % (k
, v
))
197 for k
in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'):
198 log("self.%s: %r" % (k
, getattr(self
, k
)))
200 columnmaker
= PageSettings((page_width
, self
.height
), moz_printer
=self
.moz_printer
,
201 gutter
=0, top_margin
=self
.top_margin
,
202 side_margin
=side_margin
,
203 bottom_margin
=self
.bottom_margin
,
204 grey_scale
=self
.grey_scale
,
207 column_pdf
= pdf
[:-4] + '-single-column.pdf'
208 columnmaker
.make_raw_pdf(html
, column_pdf
, engine
=engine
, outline
=outline
)
209 columnmaker
.reshape_pdf(column_pdf
)
212 '--nup', '%sx1' % int(self
.columns
),
213 '--paper', self
.papersize
.lower() + 'paper',
215 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
216 '--noautoscale', 'true',
217 '--orient', 'portrait',
226 def reshape_pdf(self
, pdf
, dir='LTR', centre_start
=False, centre_end
=False,
228 """Spin the pdf for RTL text, resize it to the right size, and
229 shift the gutter left and right"""
238 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
241 'output_filename=%s' % pdf
,
242 'operation=%s' % ops
,
243 'width=%s' % self
.width
,
244 'height=%s' % self
.height
,
245 'offset=%s' % gutter
,
246 'centre_start=%s' % centre_start
,
247 'centre_end=%s' % centre_end
,
251 def _number_pdf(self
, pdf
, numbers
='latin', dir='LTR',
253 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
254 'operation=page_numbers',
257 'output_filename=%s' % pdf
,
258 'number_start=%s' % number_start
,
259 'number_style=%s' % numbers
,
260 'number_bottom=%s' % self
.number_bottom
,
261 'number_margin=%s' % self
.number_margin
,
265 def number_pdf(self
, pdf
, pages
, **kwargs
):
266 # if there are too many pages for pdfedit to handle in one go,
267 # split the job into bits. <pages> may not be exact
268 if pages
is None or pages
<= PDFEDIT_MAX_PAGES
:
269 self
._number
_pdf
(pdf
, **kwargs
)
271 # section_size must be even
272 sections
= pages
// PDFEDIT_MAX_PAGES
+ 1
273 section_size
= (pages
// sections
+ 2) & ~
1
276 s
= kwargs
.pop('number_start', 1)
278 e
= s
+ section_size
- 1
279 pdf_section
= '%s-%s-%s.pdf' % (pdf
[:-4], s
, e
)
281 page_range
= '%s-%s' % (s
, e
)
283 page_range
= '%s-end' % s
291 self
._number
_pdf
(pdf_section
, number_start
=s
, **kwargs
)
292 pdf_sections
.append(pdf_section
)
295 concat_pdfs(pdf
, *pdf_sections
)
297 def make_barcode_pdf(self
, isbn
, pdf
, corner
='br'):
298 """Put an ISBN barcode in a corner of a single blank page."""
300 position
= '%s,%s,%s,%s,%s' %(corner
, self
.width
, self
.height
, self
.side_margin
, self
.bottom_margin
)
301 cmd1
= [config
.BOOKLAND
,
302 '--position', position
,
306 '-dDEVICEWIDTHPOINTS=%s' % self
.width
,
307 '-dDEVICEHEIGHTPOINTS=%s' % self
.height
,
310 p1
= Popen(cmd1
, stdout
=PIPE
)
311 p2
= Popen(cmd2
, stdin
=p1
.stdout
, stdout
=PIPE
, stderr
=PIPE
)
312 out
, err
= p2
.communicate()
314 log('ran:\n%s | %s' % (' '.join(cmd1
), ' '.join(cmd2
)))
315 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1
.poll(), p2
.poll(), out
, err
))
318 def count_pdf_pages(pdf
):
319 """How many pages in the PDF?"""
320 #XXX could also use python-pypdf or python-poppler
321 cmd
= ('pdfinfo', pdf
)
322 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
323 out
, err
= p
.communicate()
324 m
= re
.search(r
'^\s*Pages:\s*(\d+)\s*$', re
.MULTILINE
)
325 return int(m
.group(1))
328 def concat_pdfs(destination
, *pdfs
):
329 """Join all the named pdfs together into one and save it as <name>"""
331 cmd
.extend(x
for x
in pdfs
if x
is not None)
332 cmd
+= ['cat', 'output', destination
]
335 def index_pdf(pdf
, text
=None):
336 """Use pdftotext to extract utf-8 text from a pdf, using ^L to
339 text
= pdf
+ '.index.txt'
341 #'-layout', #keeps more original formatting
347 def rotate_pdf(pdfin
, pdfout
):
348 """Turn the PDF on its head"""
349 cmd
= ['pdftk', pdfin
,
357 def parse_outline(pdf
, level_threshold
):
358 """Create a structure reflecting the outline of a PDF.
359 A chapter heading looks like this:
361 BookmarkTitle: 2. What is sound?
363 BookmarkPageNumber: 3
365 cmd
= ('pdftk', pdf
, 'dump_data')
366 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
367 outline
, err
= p
.communicate()
368 lines
= (x
.strip() for x
in outline
.split('\n') if x
.strip())
371 def extract(expected
, conv
=str.strip
):
374 k
, v
= line
.split(':', 1)
378 log("trouble with line %r" %line
)
380 #There are a few useless variables, then the pagecount, then the contents.
381 #The pagecount is useful, so pick it up first.
383 while page_count
== None:
384 page_count
= extract('NumberOfPages', int)
388 title
= extract('BookmarkTitle')
389 if title
is not None:
390 level
= extract('BookmarkLevel', int)
391 pagenum
= extract('BookmarkPageNumber', int)
392 if level
<= level_threshold
and None not in (level
, pagenum
):
393 contents
.append((title
, level
, pagenum
))
394 except StopIteration:
397 return contents
, outline
, page_count
401 page_numbers
= 'latin'
402 preamble_page_numbers
= 'roman'
404 _try_cleanup_on_del
= config
.TRY_BOOK_CLEANUP_ON_DEL
406 def notify_watcher(self
, message
=None):
409 #message is the name of the caller
410 #XXX look at using inspect module
412 message
= traceback
.extract_stack(None, 2)[0][2]
413 log("notify_watcher called with '%s'" % message
)
414 self
.watcher(message
)
419 def __exit__(self
, exc_type
, exc_value
, traceback
):
421 #could deal with exceptions here and return true
423 def __init__(self
, book
, server
, bookname
,
424 page_settings
=None, engine
=None, watcher
=None, isbn
=None,
425 license
=config
.DEFAULT_LICENSE
):
426 log("*** Starting new book %s ***" % bookname
)
429 self
.watcher
= watcher
431 self
.license
= license
432 self
.workdir
= tempfile
.mkdtemp(prefix
=bookname
, dir=TMPDIR
)
433 os
.chmod(self
.workdir
, 0755)
434 defaults
= SERVER_DEFAULTS
[server
]
435 self
.lang
= defaults
['lang']
436 self
.dir = defaults
['dir']
438 self
.body_html_file
= self
.filepath('body.html')
439 self
.body_pdf_file
= self
.filepath('body.pdf')
440 self
.body_index_file
= self
.filepath('body.txt')
441 self
.preamble_html_file
= self
.filepath('preamble.html')
442 self
.preamble_pdf_file
= self
.filepath('preamble.pdf')
443 self
.tail_html_file
= self
.filepath('tail.html')
444 self
.tail_pdf_file
= self
.filepath('tail.pdf')
445 self
.isbn_pdf_file
= None
446 self
.pdf_file
= self
.filepath('final.pdf')
448 self
.publish_name
= bookname
449 self
.publish_file
= os
.path
.join(PUBLISH_PATH
, self
.publish_name
)
450 self
.publish_url
= os
.path
.join(config
.PUBLISH_URL
, self
.publish_name
)
452 self
.book_url
= config
.BOOK_URL
% (self
.server
, self
.book
)
453 self
.toc_url
= config
.TOC_URL
% (self
.server
, self
.book
)
455 self
.maker
= PageSettings(**page_settings
)
457 if engine
is not None:
459 self
.notify_watcher()
461 if config
.TRY_BOOK_CLEANUP_ON_DEL
:
462 #Dont even define __del__ if it is not used.
463 _try_cleanup_on_del
= True
465 if self
._try
_cleanup
_on
_del
and os
.path
.exists(self
.workdir
):
466 self
._try
_cleanup
_on
_del
= False #or else you can get in bad cycles
469 def __getattr__(self
, attr
):
470 """catch unloaded books and load them"""
471 #log('looking for missing attribute "%s"' % (attr))
478 raise AttributeError("no such member: '%s'" % attr
)
481 def filepath(self
, fn
):
482 return os
.path
.join(self
.workdir
, fn
)
484 def save_data(self
, fn
, data
):
485 """Save without tripping up on unicode"""
486 if isinstance(data
, unicode):
487 data
= data
.encode('utf8', 'ignore')
492 def save_tempfile(self
, fn
, data
):
493 """Save the data in a temporary directory that will be cleaned
494 up when all is done. Return the absolute file path."""
495 fn
= self
.filepath(fn
)
496 self
.save_data(fn
, data
)
499 def extract_pdf_outline(self
):
500 self
.outline_contents
, self
.outline_text
, number_of_pages
= parse_outline(self
.body_pdf_file
, 1)
501 for x
in self
.outline_contents
:
503 return number_of_pages
505 def make_body_pdf(self
):
506 """Make a pdf of the HTML, using webkit"""
508 html_text
= lxml
.etree
.tostring(self
.tree
, method
="html")
509 self
.save_data(self
.body_html_file
, html_text
)
512 self
.maker
.make_raw_pdf(self
.body_html_file
, self
.body_pdf_file
,
513 engine
=self
.engine
, outline
=True)
514 self
.notify_watcher('generate_pdf')
516 n_pages
= self
.extract_pdf_outline()
518 log ("found %s pages in pdf" % n_pages
)
519 #4. resize pages, shift gutters, even pages
520 self
.maker
.reshape_pdf(self
.body_pdf_file
, self
.dir, centre_end
=True)
521 self
.notify_watcher('reshape_pdf')
524 self
.maker
.number_pdf(self
.body_pdf_file
, n_pages
, dir=self
.dir,
525 numbers
=self
.page_numbers
)
526 self
.notify_watcher("number_pdf")
527 self
.notify_watcher()
529 def make_preamble_pdf(self
):
530 contents
= self
.make_contents()
531 inside_cover_html
= self
.compose_inside_cover()
532 html
= ('<html dir="%s"><head>\n'
533 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
534 '<link rel="stylesheet" href="%s" />\n'
536 '<h1 class="frontpage">%s</h1>'
538 '<div class="contents">%s</div>\n'
539 '<div style="page-break-after: always; color:#fff" class="unseen">.'
540 '<!--%s--></div></body></html>'
541 ) % (self
.dir, self
.css_url
, self
.title
, inside_cover_html
,
542 contents
, self
.title
)
543 self
.save_data(self
.preamble_html_file
, html
)
545 self
.maker
.make_raw_pdf(self
.preamble_html_file
, self
.preamble_pdf_file
,
548 self
.maker
.reshape_pdf(self
.preamble_pdf_file
, self
.dir, centre_start
=True)
550 self
.maker
.number_pdf(self
.preamble_pdf_file
, None, dir=self
.dir,
551 numbers
=self
.preamble_page_numbers
,
554 self
.notify_watcher()
556 def make_end_matter_pdf(self
):
557 """Make an inside back cover and a back cover. If there is an
558 isbn number its barcode will be put on the back cover."""
560 self
.isbn_pdf_file
= self
.filepath('isbn.pdf')
561 self
.maker
.make_barcode_pdf(self
.isbn
, self
.isbn_pdf_file
)
562 self
.notify_watcher('make_barcode_pdf')
564 self
.save_data(self
.tail_html_file
, self
.compose_end_matter())
565 self
.maker
.make_raw_pdf(self
.tail_html_file
, self
.tail_pdf_file
,
568 self
.maker
.reshape_pdf(self
.tail_pdf_file
, self
.dir, centre_start
=True,
569 centre_end
=True, even_pages
=False)
570 self
.notify_watcher()
572 def make_book_pdf(self
):
573 """A convenient wrapper of a few necessary steps"""
574 # now the Xvfb server is needed. make sure it has had long enough to get going
577 self
.make_preamble_pdf()
578 self
.make_end_matter_pdf()
580 concat_pdfs(self
.pdf_file
, self
.preamble_pdf_file
,
581 self
.body_pdf_file
, self
.tail_pdf_file
,
584 self
.notify_watcher('concatenated_pdfs')
587 def make_simple_pdf(self
, mode
):
588 """Make a simple pdf document without contents or separate
589 title page. This is used for multicolumn newspapers and for
590 web-destined pdfs."""
592 #0. Add heading to begining of html
593 body
= list(self
.tree
.cssselect('body'))[0]
594 e
= body
.makeelement('h1', {'id': 'book-title'})
597 intro
= lxml
.html
.fragment_fromstring(self
.compose_inside_cover())
600 #0.5 adjust parameters to suit the particular kind of output
602 self
.maker
.gutter
= 0
605 html_text
= lxml
.etree
.tostring(self
.tree
, method
="html")
606 self
.save_data(self
.body_html_file
, html_text
)
608 #2. Make a pdf of it (direct to to final pdf)
609 self
.maker
.make_raw_pdf(self
.body_html_file
, self
.pdf_file
,
610 engine
=self
.engine
, outline
=True)
611 self
.notify_watcher('generate_pdf')
612 #n_pages = self.extract_pdf_outline()
613 n_pages
= count_pdf_pages(self
.pdf_file
)
616 #3. resize pages and shift gutters.
617 self
.maker
.reshape_pdf(self
.pdf_file
, self
.dir, centre_end
=True)
618 self
.notify_watcher('reshape_pdf')
621 self
.maker
.number_pdf(self
.pdf_file
, n_pages
,
622 dir=self
.dir, numbers
=self
.page_numbers
)
623 self
.notify_watcher("number_pdf")
624 self
.notify_watcher()
628 """Rotate the pdf 180 degrees so an RTL book can print on LTR
630 rotated
= self
.filepath('final-rotate.pdf')
631 unrotated
= self
.filepath('final-pre-rotate.pdf')
632 #leave the unrotated pdf intact at first, in case of error.
633 rotate_pdf(self
.pdf_file
, rotated
)
634 os
.rename(self
.pdf_file
, unrotated
)
635 os
.rename(rotated
, self
.pdf_file
)
636 self
.notify_watcher()
638 def publish_pdf(self
):
639 """Move the finished PDF to its final resting place"""
640 log("Publishing %r as %r" % (self
.pdf_file
, self
.publish_file
))
641 os
.rename(self
.pdf_file
, self
.publish_file
)
642 self
.notify_watcher()
645 """From the TOC.txt file create a list of TocItems with
646 the attributes <status>, <chapter>, and <title>.
648 <status> is a number, with the following meaning:
650 0 - section heading with no chapter
654 The TocItem object has convenience functions <is_chapter> and
657 <chapter> is twiki name of the chapter.
659 <title> is a human readable title for the chapter. It is likely to
660 differ from the title given in the chapter's <h1> heading.
662 f
= urlopen(self
.toc_url
)
666 self
.toc
.append(TocItem(f
.next().strip(),
669 except StopIteration:
672 self
.notify_watcher()
674 def load_book(self
, tidy
=True):
675 """Fetch and parse the raw html of the book. If tidy is true
676 (default) links in the document will be made absolute."""
677 f
= urlopen(self
.book_url
)
680 html
= ('<html dir="%s"><head>\n<title>%s</title>\n'
681 '<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />\n'
684 '<div style="page-break-before: always; color:#fff;" class="unseen">'
685 'A FLOSSManuals book</div>\n</body></html>'
686 ) % (self
.dir, self
.book
, html
)
688 self
.save_tempfile('raw.html', html
)
690 tree
= lxml
.html
.document_fromstring(html
)
692 tree
.make_links_absolute(self
.book_url
)
694 self
.headings
= [x
for x
in tree
.cssselect('h1')]
696 self
.headings
[0].set('class', "first-heading")
697 for h1
in self
.headings
:
698 h1
.title
= h1
.text_content().strip()
699 self
.notify_watcher()
702 """Wrapper around all necessary load methods."""
706 def make_contents(self
):
707 """Generate HTML containing the table of contents. This can
708 only be done after the main PDF has been made."""
709 header
= '<h1>Table of Contents</h1><table class="toc">\n'
710 row_tmpl
= ('<tr><td class="chapter">%s</td><td class="title">%s</td>'
711 '<td class="pagenumber">%s</td></tr>\n')
712 section_tmpl
= ('<tr><td class="section" colspan="3">%s</td></tr>\n')
713 footer
= '\n</table>'
719 subsections
= [] # for the subsection heading pages.
721 outline_contents
= iter(self
.outline_contents
)
722 headings
= iter(self
.headings
)
728 except StopIteration:
729 log("heading not found for %s (previous h1 missing?). Stopping" % t
)
731 h1_text
, level
, page_num
= outline_contents
.next()
732 log("%r %r" % (h1
.title
, h1_text
))
733 contents
.append(row_tmpl
% (chapter
, h1
.title
, page_num
))
736 contents
.append(section_tmpl
% t
.title
)
738 log("mystery TOC item: %s" % t
)
740 doc
= header
+ '\n'.join(contents
) + footer
741 self
.notify_watcher()
744 def add_section_titles(self
):
745 """Add any section heading pages that the TOC.txt file
746 specifies. These are sub-book, super-chapter groupings.
748 Also add initial numbers to chapters.
750 headings
= iter(self
.headings
)
755 if t
.is_chapter() and section
is not None:
758 except StopIteration:
759 log("heading not found for %s (previous h1 missing?)" % t
)
761 item
= h1
.makeelement('div', Class
='chapter')
762 log(h1
.title
, debug
='HTMLGEN')
764 _add_initial_number(item
, chapter
)
768 if not section_placed
:
769 log("placing section", debug
='HTMLGEN')
770 h1
.addprevious(section
)
771 section_placed
= True
773 log("NOT placing section", debug
='HTMLGEN')
775 #put a bold number at the beginning of the h1.
776 _add_initial_number(h1
, chapter
)
780 section
= self
.tree
.makeelement('div', Class
="subsection")
781 # section Element complains when you try to ask it whether it
782 # has been placed (though it does know)
783 section_placed
= False
784 heading
= lxml
.html
.fragment_fromstring(t
.title
, create_parent
='div')
785 heading
.set("Class", "subsection-heading")
786 section
.append(heading
)
788 self
.notify_watcher()
791 def add_css(self
, css
=None, mode
='book'):
792 """If css looks like a url, use it as a stylesheet link.
793 Otherwise it is the CSS itself, which is saved to a temporary file
795 log("css is %r" % css
)
797 if css
is None or not css
.strip():
798 defaults
= SERVER_DEFAULTS
[self
.server
]
799 url
= 'file://' + os
.path
.abspath(defaults
['css-%s' % mode
])
800 elif not re
.match(r
'^http://\S+$', css
):
801 fn
= self
.save_tempfile('objavi.css', css
)
805 #XXX for debugging and perhaps sensible anyway
806 #url = url.replace('file:///home/douglas/objavi2', '')
809 #find the head -- it's probably first child but lets not assume.
810 for child
in htmltree
:
811 if child
.tag
== 'head':
815 head
= htmltree
.makeelement('head')
816 htmltree
.insert(0, head
)
818 link
= lxml
.etree
.SubElement(head
, 'link', rel
='stylesheet', type='text/css', href
=url
)
820 self
.notify_watcher()
823 def set_title(self
, title
=None):
824 """If a string is supplied, it becomes the book's title.
825 Otherwise a guess is made."""
829 titles
= [x
.text_content() for x
in self
.tree
.cssselect('title')]
830 if titles
and titles
[0]:
831 self
.title
= titles
[0]
834 self
.title
= 'A Manual About ' + self
.book
837 def _read_localised_template(self
, template
, fallbacks
=['en']):
838 """Try to get the template in the approriate language, otherwise in english."""
839 for lang
in [self
.lang
] + fallbacks
:
841 fn
= template
% (lang
)
845 log("couldn't open inside front cover for lang %s (filename %s)" % (lang
, fn
))
851 def compose_inside_cover(self
):
852 """create the markup for the preamble inside cover."""
853 template
= self
._read
_localised
_template
(config
.INSIDE_FRONT_COVER_TEMPLATE
)
856 isbn_text
= '<b>ISBN :</b> %s <br>' % self
.isbn
860 return template
% {'date': time
.strftime('%Y-%m-%d'),
862 'license': self
.license
,
866 def compose_end_matter(self
):
867 """create the markup for the end_matter inside cover. If
868 self.isbn is not set, the html will result in a pdf that
869 spills onto two pages.
871 template
= self
._read
_localised
_template
(config
.END_MATTER_TEMPLATE
)
873 d
= {'css_url': self
.css_url
,
878 d
['inside_cover_style'] = ''
880 d
['inside_cover_style'] = 'page-break-after: always'
888 """Start an Xvfb instance, using a new server number. A
889 reference to it is stored in self.xvfb, which is used to kill
890 it when the pdf is done.
892 Note that Xvfb doesn't interact well with dbus which is
893 present on modern desktops.
895 #Find an unused server number (in case two cgis are running at once)
897 servernum
= random
.randrange(50, 500)
898 if not os
.path
.exists('/tmp/.X%s-lock' % servernum
):
901 self
.xserver_no
= ':%s' % servernum
903 authfile
= self
.filepath('Xauthority')
904 os
.environ
['XAUTHORITY'] = authfile
906 #mcookie(1) eats into /dev/random, so avoid that
907 from hashlib
import md5
908 m
= md5("%r %r %r %r %r" % (self
, os
.environ
, os
.getpid(), time
.time(), os
.urandom(32)))
909 mcookie
= m
.hexdigest()
911 check_call(['xauth', 'add', self
.xserver_no
, '.', mcookie
])
913 self
.xvfb
= Popen(['Xvfb', self
.xserver_no
,
914 '-screen', '0', '1024x768x24',
917 #'-whitepixel', str(2 ** 24 -1),
918 #'+extension', 'Composite',
924 # We need to wait a bit before the Xvfb is ready. but the
925 # downloads are so slow that that probably doesn't matter
927 self
.xvfb_ready_time
= time
.time() + 2
929 os
.environ
['DISPLAY'] = self
.xserver_no
932 def wait_for_xvfb(self
):
933 """wait until a previously set time before continuing. This
934 is so Xvfb has time to properly start."""
935 if hasattr(self
, 'xvfb'):
936 d
= self
.xvfb_ready_time
- time
.time()
939 self
.notify_watcher()
942 """Try very hard to kill off Xvfb. In addition to killing
943 this instance's xvfb, occasionally (randomly) search for
944 escaped Xvfb instances and kill those too."""
945 if not hasattr(self
, 'xvfb'):
947 check_call(['xauth', 'remove', self
.xserver_no
])
949 log("trying to kill Xvfb %s" % p
.pid
)
952 if p
.poll() is not None:
953 log("%s died with %s" % (p
.pid
, p
.poll()))
955 log("%s not dead yet" % p
.pid
)
958 log("Xvfb would not die! kill -9! kill -9!")
961 if random
.random() < 0.05:
962 #kill old xvfbs occasionally, if there are any.
963 self
.kill_old_xvfbs()
965 def kill_old_xvfbs(self
):
966 """Sometimes, despite everything, Xvfb instances hang around
967 well after they are wanted -- for example if the cgi process
968 dies particularly badly. So kill them if they have been
969 running for a long time."""
970 log("running kill_old_xvfbs")
971 p
= Popen(['ps', '-C' 'Xvfb', '-o', 'pid,etime', '--no-headers'], stdout
=PIPE
)
972 data
= p
.communicate()[0].strip()
974 lines
= data
.split('\n')
976 log('dealing with ps output "%s"' % line
)
978 pid
, days_
, hours
, minutes
, seconds
= re
.match(r
'^(\d+)\s+(\d+-)?(\d{2})?:?(\d{2}):(\d+)\s*$').groups()
979 except AttributeError:
980 log("Couldn't parse that line!")
981 # 50 minutes should be enough xvfb time for anyone
982 if days
or hours
or int(minutes
) > 50:
983 log("going to kill pid %s" % pid
)
984 os
.kill(int(pid
), 15)
987 self
.notify_watcher()
991 if not config
.KEEP_TEMP_FILES
:
992 for fn
in os
.listdir(self
.workdir
):
993 os
.remove(os
.path
.join(self
.workdir
, fn
))
994 os
.rmdir(self
.workdir
)
996 log("NOT removing '%s', containing the following files:" % self
.workdir
)
997 log(*os
.listdir(self
.workdir
))
999 self
.notify_watcher()