change shfit_file in case of cross-file-system move
[objavi2.git] / objavi / pdf.py
blob7a86acd3db595ce1c6c914ffe657f3fbec9c6d0f
1 # Part of Objavi2, which turns html manuals into books.
2 # This deals with PDF and page specific concepts.
4 # Copyright (C) 2009 Douglas Bagnall
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 """Fiddly stuff to do with pages and PDFs."""
22 import os, sys
23 import re
24 from subprocess import Popen, PIPE
26 from objavi import config
27 from objavi.book_utils import log, run
30 def find_containing_paper(w, h):
31 for name, pw, ph in config.PAPER_SIZES:
32 if pw >= w and ph >= h:
33 mw = (pw - w) * 0.5
34 mh = (ph - h) * 0.5
35 return (name, mw, mh)
37 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
38 (w * config.POINT_2_MM, h * config.POINT_2_MM))
40 class PageSettings(object):
41 """Calculates and wraps commands for the generation and processing
42 of PDFs"""
43 def __init__(self, pointsize, **kwargs):
44 # the formulas for default gutters, margins and column margins
45 # are quite ad-hoc and certainly improvable.
46 self.width, self.height = pointsize
47 self.papersize, clipx, clipy = find_containing_paper(self.width, self.height)
48 self.grey_scale = 'grey_scale' in kwargs
50 self.engine = kwargs.get('engine', config.DEFAULT_ENGINE)
51 # All measurements in points unless otherwise stated
52 # user interaction is in *mm*, but is converted in objavi2.py
53 default_margin = (config.BASE_MARGIN + config.PROPORTIONAL_MARGIN * min(pointsize))
54 default_gutter = (config.BASE_GUTTER + config.PROPORTIONAL_GUTTER * self.width)
56 self.top_margin = kwargs.get('top_margin', default_margin)
57 self.side_margin = kwargs.get('side_margin', default_margin)
58 self.bottom_margin = kwargs.get('bottom_margin', default_margin)
59 self.gutter = kwargs.get('gutter', default_gutter)
61 self.columns = kwargs.get('columns', 1)
62 if self.columns == 'auto': #default for newspapers is to work out columns
63 self.columns = int(self.width // config.MIN_COLUMN_WIDTH)
65 self.column_margin = kwargs.get('column_margin',
66 default_margin * 2 / (5.0 + self.columns))
68 self.number_bottom = self.bottom_margin - 0.6 * config.PAGE_NUMBER_SIZE
69 self.number_margin = self.side_margin
71 # calculate margins in mm for browsers
72 self.margins = []
73 for m, clip in ((self.top_margin, clipy),
74 (self.side_margin, clipx + 0.5 * self.gutter),
75 (self.bottom_margin, clipy + 0.5 * config.PAGE_NUMBER_SIZE),
76 (self.side_margin, clipx + 0.5 * self.gutter),
78 self.margins.append((m + clip) * config.POINT_2_MM)
80 self.moz_printer = kwargs.get('moz_printer', ('objavi_' + self.papersize))
82 if 'PDFGEN' in config.DEBUG_MODES:
83 log("making PageSettings with:")
84 for x in locals().iteritems():
85 log("%s: %s" % x, debug='PDFGEN')
86 for x in dir(self):
87 if not x.startswith('__'):
88 log("self.%s: %s" % (x, getattr(self, x)), debug='PDFGEN')
92 def _webkit_command(self, html, pdf, outline=False):
93 m = [str(x) for x in self.margins]
94 outline_args = ['--outline', '--outline-depth', '2'] * outline
95 greyscale_args = ['-g'] * self.grey_scale
96 cmd = ([config.WKHTMLTOPDF, '-q', '-s', self.papersize,
97 '-T', m[0], '-R', m[1], '-B', m[2], '-L', m[3],
98 '-d', '100'] + outline_args + greyscale_args +
99 config.WKHTMLTOPDF_EXTRA_COMMANDS + [html, pdf])
100 log(' '.join(cmd))
101 return cmd
103 def _gecko_command(self, html, pdf, outline=False):
104 m = [str(x) for x in self.margins]
105 #firefox -P pdfprint -print URL -printprinter "printer_settings"
106 cmd = [config.FIREFOX, '-P', 'pdfprint', '-print',
107 html, '-printprinter', self.moz_printer]
108 log(' '.join(cmd))
109 return cmd
111 def make_raw_pdf(self, html, pdf, outline=False):
112 func = getattr(self, '_%s_command' % self.engine)
113 if self.columns == 1:
114 cmd = func(html, pdf, outline=outline)
115 run(cmd)
116 else:
117 printable_width = self.width - 2.0 * self.side_margin - self.gutter
118 column_width = (printable_width - (self.columns - 1) * self.column_margin) / self.columns
119 page_width = column_width + self.column_margin
120 side_margin = self.column_margin * 0.5
121 if 'PDFGEN' in config.DEBUG_MODES:
122 log("making columns with:")
123 for k, v in locals().iteritems():
124 log("%s: %r" % (k, v))
125 for k in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'):
126 log("self.%s: %r" % (k, getattr(self, k)))
128 columnmaker = PageSettings((page_width, self.height), moz_printer=self.moz_printer,
129 gutter=0, top_margin=self.top_margin,
130 side_margin=side_margin,
131 bottom_margin=self.bottom_margin,
132 grey_scale=self.grey_scale,
133 engine=self.engine
136 column_pdf = pdf[:-4] + '-single-column.pdf'
137 columnmaker.make_raw_pdf(html, column_pdf, outline=outline)
138 columnmaker.reshape_pdf(column_pdf)
139 cmd = ['pdfnup',
140 '--nup', '%sx1' % int(self.columns),
141 '--paper', self.papersize.lower() + 'paper',
142 '--outfile', pdf,
143 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
144 '--noautoscale', 'true',
145 '--orient', 'portrait',
146 #'--tidy', 'false',
147 column_pdf
150 run(cmd)
154 def reshape_pdf(self, pdf, dir='LTR', centre_start=False, centre_end=False,
155 even_pages=True):
156 """Spin the pdf for RTL text, resize it to the right size, and
157 shift the gutter left and right"""
158 ops = 'resize'
159 if self.gutter:
160 ops += ',shift'
161 if even_pages:
162 ops += ',even_pages'
163 gutter = self.gutter
164 if dir == 'RTL':
165 gutter = -gutter
166 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
167 'dir=%s' % dir,
168 'filename=%s' % pdf,
169 'output_filename=%s' % pdf,
170 'operation=%s' % ops,
171 'width=%s' % self.width,
172 'height=%s' % self.height,
173 'offset=%s' % gutter,
174 'centre_start=%s' % centre_start,
175 'centre_end=%s' % centre_end,
177 run(cmd)
179 def _number_pdf(self, pdf, numbers='latin', dir='LTR',
180 number_start=1):
181 cmd = ['pdfedit', '-s', 'wk_objavi.qs',
182 'operation=page_numbers',
183 'dir=%s' % dir,
184 'filename=%s' % pdf,
185 'output_filename=%s' % pdf,
186 'number_start=%s' % number_start,
187 'number_style=%s' % numbers,
188 'number_bottom=%s' % self.number_bottom,
189 'number_margin=%s' % self.number_margin,
191 run(cmd)
193 def number_pdf(self, pdf, pages, **kwargs):
194 # if there are too many pages for pdfedit to handle in one go,
195 # split the job into bits. <pages> may not be exact
196 if pages is None or pages <= config.PDFEDIT_MAX_PAGES:
197 self._number_pdf(pdf, **kwargs)
198 else:
199 # section_size must be even
200 sections = pages // config.PDFEDIT_MAX_PAGES + 1
201 section_size = (pages // sections + 2) & ~1
203 pdf_sections = []
204 s = kwargs.pop('number_start', 1)
205 while s < pages:
206 e = s + section_size - 1
207 pdf_section = '%s-%s-%s.pdf' % (pdf[:-4], s, e)
208 if e < pages - 1:
209 page_range = '%s-%s' % (s, e)
210 else:
211 page_range = '%s-end' % s
212 run(['pdftk',
213 pdf,
214 'cat',
215 page_range,
216 'output',
217 pdf_section,
219 self._number_pdf(pdf_section, number_start=s, **kwargs)
220 pdf_sections.append(pdf_section)
221 s = e + 1
223 concat_pdfs(pdf, *pdf_sections)
225 def make_barcode_pdf(self, isbn, pdf, corner='br'):
226 """Put an ISBN barcode in a corner of a single blank page."""
228 position = '%s,%s,%s,%s,%s' % (corner, self.width, self.height, self.side_margin, self.bottom_margin)
229 cmd1 = [config.BOOKLAND,
230 '--position', position,
231 str(isbn)]
232 cmd2 = ['ps2pdf',
233 '-dFIXEDMEDIA',
234 '-dDEVICEWIDTHPOINTS=%s' % self.width,
235 '-dDEVICEHEIGHTPOINTS=%s' % self.height,
236 '-', pdf]
238 p1 = Popen(cmd1, stdout=PIPE)
239 p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, stderr=PIPE)
240 out, err = p2.communicate()
242 log('ran:\n%s | %s' % (' '.join(cmd1), ' '.join(cmd2)))
243 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1.poll(), p2.poll(), out, err))
246 def count_pdf_pages(pdf):
247 """How many pages in the PDF?"""
248 #XXX could also use python-pypdf or python-poppler
249 cmd = ('pdfinfo', pdf)
250 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
251 out, err = p.communicate()
252 m = re.search(r'^\s*Pages:\s*(\d+)\s*$', out, re.MULTILINE)
253 return int(m.group(1))
256 def concat_pdfs(destination, *pdfs):
257 """Join all the named pdfs together into one and save it as <name>"""
258 cmd = ['pdftk']
259 cmd.extend(x for x in pdfs if x is not None)
260 cmd += ['cat', 'output', destination]
261 run(cmd)
263 def rotate_pdf(pdfin, pdfout):
264 """Turn the PDF on its head"""
265 cmd = ['pdftk', pdfin,
266 'cat',
267 '1-endD',
268 'output',
269 pdfout
271 run(cmd)
273 def parse_outline(pdf, level_threshold, debug_filename=None):
274 """Create a structure reflecting the outline of a PDF.
275 A chapter heading looks like this:
277 BookmarkTitle: 2. What is sound?
278 BookmarkLevel: 1
279 BookmarkPageNumber: 3
281 cmd = ('pdftk', pdf, 'dump_data')
282 p = Popen(cmd, stdout=PIPE, stderr=PIPE)
283 outline, err = p.communicate()
284 log("OUTLINE:", outline)
285 if debug_filename is not None:
286 try:
287 f = open(debug_filename, 'w')
288 f.write(outline)
289 f.close()
290 except IOError:
291 log("could not write to %s!" % debug_filename)
293 lines = (x.strip() for x in outline.split('\n') if x.strip())
294 contents = []
296 def _strip(s):
297 return s.strip(config.WHITESPACE_AND_NULL)
299 def extract(expected, conv=_strip):
300 line = lines.next()
301 try:
302 k, v = line.split(':', 1)
303 if k == expected:
304 return conv(v)
305 except ValueError:
306 log("trouble with line %r" %line)
308 #There are a few useless variables, then the pagecount, then the contents.
309 #The pagecount is useful, so pick it up first.
310 page_count = None
311 while page_count == None:
312 page_count = extract('NumberOfPages', int)
314 try:
315 while True:
316 title = extract('BookmarkTitle')
317 if title is not None:
318 level = extract('BookmarkLevel', int)
319 pagenum = extract('BookmarkPageNumber', int)
320 if level <= level_threshold and None not in (level, pagenum):
321 contents.append((title, level, pagenum))
322 except StopIteration:
323 pass
325 return contents, outline, page_count