1 # Part of Objavi2, which turns html manuals into books.
2 # This deals with PDF and page specific concepts.
4 # Copyright (C) 2009 Douglas Bagnall
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 """Fiddly stuff to do with pages and PDFs."""
24 from subprocess
import Popen
, PIPE
26 from objavi
import config
27 from objavi
.book_utils
import log
, run
30 def find_containing_paper(w
, h
):
31 for name
, pw
, ph
in config
.PAPER_SIZES
:
32 if pw
>= w
and ph
>= h
:
37 raise ValueError("page sized %.2fmm x %.2fmm won't fit on any paper!" %
38 (w
* config
.POINT_2_MM
, h
* config
.POINT_2_MM
))
40 class PageSettings(object):
41 """Calculates and wraps commands for the generation and processing
43 def __init__(self
, pointsize
, **kwargs
):
44 # the formulas for default gutters, margins and column margins
45 # are quite ad-hoc and certainly improvable.
46 self
.width
, self
.height
= pointsize
47 self
.papersize
, clipx
, clipy
= find_containing_paper(self
.width
, self
.height
)
48 self
.grey_scale
= 'grey_scale' in kwargs
50 self
.engine
= kwargs
.get('engine', config
.DEFAULT_ENGINE
)
51 # All measurements in points unless otherwise stated
52 # user interaction is in *mm*, but is converted in objavi2.py
53 default_margin
= (config
.BASE_MARGIN
+ config
.PROPORTIONAL_MARGIN
* min(pointsize
))
54 default_gutter
= (config
.BASE_GUTTER
+ config
.PROPORTIONAL_GUTTER
* self
.width
)
56 self
.top_margin
= kwargs
.get('top_margin', default_margin
)
57 self
.side_margin
= kwargs
.get('side_margin', default_margin
)
58 self
.bottom_margin
= kwargs
.get('bottom_margin', default_margin
)
59 self
.gutter
= kwargs
.get('gutter', default_gutter
)
61 self
.columns
= kwargs
.get('columns', 1)
62 if self
.columns
== 'auto': #default for newspapers is to work out columns
63 self
.columns
= int(self
.width
// config
.MIN_COLUMN_WIDTH
)
65 self
.column_margin
= kwargs
.get('column_margin',
66 default_margin
* 2 / (5.0 + self
.columns
))
68 self
.number_bottom
= self
.bottom_margin
- 0.6 * config
.PAGE_NUMBER_SIZE
69 self
.number_margin
= self
.side_margin
71 # calculate margins in mm for browsers
73 for m
, clip
in ((self
.top_margin
, clipy
),
74 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
75 (self
.bottom_margin
, clipy
+ 0.5 * config
.PAGE_NUMBER_SIZE
),
76 (self
.side_margin
, clipx
+ 0.5 * self
.gutter
),
78 self
.margins
.append((m
+ clip
) * config
.POINT_2_MM
)
80 self
.moz_printer
= kwargs
.get('moz_printer', ('objavi_' + self
.papersize
))
82 if 'PDFGEN' in config
.DEBUG_MODES
:
83 log("making PageSettings with:")
84 for x
in locals().iteritems():
85 log("%s: %s" % x
, debug
='PDFGEN')
87 if not x
.startswith('__'):
88 log("self.%s: %s" % (x
, getattr(self
, x
)), debug
='PDFGEN')
92 def _webkit_command(self
, html
, pdf
, outline
=False):
93 m
= [str(x
) for x
in self
.margins
]
94 outline_args
= ['--outline', '--outline-depth', '2'] * outline
95 greyscale_args
= ['-g'] * self
.grey_scale
96 cmd
= ([config
.WKHTMLTOPDF
, '-q', '-s', self
.papersize
,
97 '-T', m
[0], '-R', m
[1], '-B', m
[2], '-L', m
[3],
98 '-d', '100'] + outline_args
+ greyscale_args
+
99 config
.WKHTMLTOPDF_EXTRA_COMMANDS
+ [html
, pdf
])
103 def _gecko_command(self
, html
, pdf
, outline
=False):
104 m
= [str(x
) for x
in self
.margins
]
105 #firefox -P pdfprint -print URL -printprinter "printer_settings"
106 cmd
= [config
.FIREFOX
, '-P', 'pdfprint', '-print',
107 html
, '-printprinter', self
.moz_printer
]
111 def make_raw_pdf(self
, html
, pdf
, outline
=False):
112 func
= getattr(self
, '_%s_command' % self
.engine
)
113 if self
.columns
== 1:
114 cmd
= func(html
, pdf
, outline
=outline
)
117 printable_width
= self
.width
- 2.0 * self
.side_margin
- self
.gutter
118 column_width
= (printable_width
- (self
.columns
- 1) * self
.column_margin
) / self
.columns
119 page_width
= column_width
+ self
.column_margin
120 side_margin
= self
.column_margin
* 0.5
121 if 'PDFGEN' in config
.DEBUG_MODES
:
122 log("making columns with:")
123 for k
, v
in locals().iteritems():
124 log("%s: %r" % (k
, v
))
125 for k
in ('width', 'side_margin', 'gutter', 'column_margin', 'columns', 'height'):
126 log("self.%s: %r" % (k
, getattr(self
, k
)))
128 columnmaker
= PageSettings((page_width
, self
.height
), moz_printer
=self
.moz_printer
,
129 gutter
=0, top_margin
=self
.top_margin
,
130 side_margin
=side_margin
,
131 bottom_margin
=self
.bottom_margin
,
132 grey_scale
=self
.grey_scale
,
136 column_pdf
= pdf
[:-4] + '-single-column.pdf'
137 columnmaker
.make_raw_pdf(html
, column_pdf
, outline
=outline
)
138 columnmaker
.reshape_pdf(column_pdf
)
140 '--nup', '%sx1' % int(self
.columns
),
141 '--paper', self
.papersize
.lower() + 'paper',
143 '--offset', '0 0', #'%scm 0' % (self.margins[1] * 0.1),
144 '--noautoscale', 'true',
145 '--orient', 'portrait',
154 def reshape_pdf(self
, pdf
, dir='LTR', centre_start
=False, centre_end
=False,
156 """Spin the pdf for RTL text, resize it to the right size, and
157 shift the gutter left and right"""
166 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
169 'output_filename=%s' % pdf
,
170 'operation=%s' % ops
,
171 'width=%s' % self
.width
,
172 'height=%s' % self
.height
,
173 'offset=%s' % gutter
,
174 'centre_start=%s' % centre_start
,
175 'centre_end=%s' % centre_end
,
179 def _number_pdf(self
, pdf
, numbers
='latin', dir='LTR',
181 cmd
= ['pdfedit', '-s', 'wk_objavi.qs',
182 'operation=page_numbers',
185 'output_filename=%s' % pdf
,
186 'number_start=%s' % number_start
,
187 'number_style=%s' % numbers
,
188 'number_bottom=%s' % self
.number_bottom
,
189 'number_margin=%s' % self
.number_margin
,
193 def number_pdf(self
, pdf
, pages
, **kwargs
):
194 # if there are too many pages for pdfedit to handle in one go,
195 # split the job into bits. <pages> may not be exact
196 if pages
is None or pages
<= config
.PDFEDIT_MAX_PAGES
:
197 self
._number
_pdf
(pdf
, **kwargs
)
199 # section_size must be even
200 sections
= pages
// config
.PDFEDIT_MAX_PAGES
+ 1
201 section_size
= (pages
// sections
+ 2) & ~
1
204 s
= kwargs
.pop('number_start', 1)
206 e
= s
+ section_size
- 1
207 pdf_section
= '%s-%s-%s.pdf' % (pdf
[:-4], s
, e
)
209 page_range
= '%s-%s' % (s
, e
)
211 page_range
= '%s-end' % s
219 self
._number
_pdf
(pdf_section
, number_start
=s
, **kwargs
)
220 pdf_sections
.append(pdf_section
)
223 concat_pdfs(pdf
, *pdf_sections
)
225 def make_barcode_pdf(self
, isbn
, pdf
, corner
='br'):
226 """Put an ISBN barcode in a corner of a single blank page."""
228 position
= '%s,%s,%s,%s,%s' % (corner
, self
.width
, self
.height
, self
.side_margin
, self
.bottom_margin
)
229 cmd1
= [config
.BOOKLAND
,
230 '--position', position
,
234 '-dDEVICEWIDTHPOINTS=%s' % self
.width
,
235 '-dDEVICEHEIGHTPOINTS=%s' % self
.height
,
238 p1
= Popen(cmd1
, stdout
=PIPE
)
239 p2
= Popen(cmd2
, stdin
=p1
.stdout
, stdout
=PIPE
, stderr
=PIPE
)
240 out
, err
= p2
.communicate()
242 log('ran:\n%s | %s' % (' '.join(cmd1
), ' '.join(cmd2
)))
243 log("return: %s and %s \nstdout:%s \nstderr:%s" % (p1
.poll(), p2
.poll(), out
, err
))
246 def count_pdf_pages(pdf
):
247 """How many pages in the PDF?"""
248 #XXX could also use python-pypdf or python-poppler
249 cmd
= ('pdfinfo', pdf
)
250 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
251 out
, err
= p
.communicate()
252 m
= re
.search(r
'^\s*Pages:\s*(\d+)\s*$', out
, re
.MULTILINE
)
253 return int(m
.group(1))
256 def concat_pdfs(destination
, *pdfs
):
257 """Join all the named pdfs together into one and save it as <name>"""
259 cmd
.extend(x
for x
in pdfs
if x
is not None)
260 cmd
+= ['cat', 'output', destination
]
263 def rotate_pdf(pdfin
, pdfout
):
264 """Turn the PDF on its head"""
265 cmd
= ['pdftk', pdfin
,
273 def parse_outline(pdf
, level_threshold
, debug_filename
=None):
274 """Create a structure reflecting the outline of a PDF.
275 A chapter heading looks like this:
277 BookmarkTitle: 2. What is sound?
279 BookmarkPageNumber: 3
281 cmd
= ('pdftk', pdf
, 'dump_data')
282 p
= Popen(cmd
, stdout
=PIPE
, stderr
=PIPE
)
283 outline
, err
= p
.communicate()
284 log("OUTLINE:", outline
)
285 if debug_filename
is not None:
287 f
= open(debug_filename
, 'w')
291 log("could not write to %s!" % debug_filename
)
293 lines
= (x
.strip() for x
in outline
.split('\n') if x
.strip())
297 return s
.strip(config
.WHITESPACE_AND_NULL
)
299 def extract(expected
, conv
=_strip
):
302 k
, v
= line
.split(':', 1)
306 log("trouble with line %r" %line
)
308 #There are a few useless variables, then the pagecount, then the contents.
309 #The pagecount is useful, so pick it up first.
311 while page_count
== None:
312 page_count
= extract('NumberOfPages', int)
316 title
= extract('BookmarkTitle')
317 if title
is not None:
318 level
= extract('BookmarkLevel', int)
319 pagenum
= extract('BookmarkPageNumber', int)
320 if level
<= level_threshold
and None not in (level
, pagenum
):
321 contents
.append((title
, level
, pagenum
))
322 except StopIteration:
325 return contents
, outline
, page_count