one more change to the multipage question
[PyX/mjg.git] / pyx / pdfwriter.py
blobb4b7cab1842829038662c0055881d4903cd59ee1
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2005 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2005 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import copy, warnings, time
25 try:
26 import zlib
27 haszlib = 1
28 except:
29 haszlib = 0
31 import unit, style, type1font, version
33 try:
34 enumerate([])
35 except NameError:
36 # fallback implementation for Python 2.2 and below
37 def enumerate(list):
38 return zip(xrange(len(list)), list)
41 class PDFregistry:
43 def __init__(self):
44 self.types = {}
45 # we need to keep the original order of the resources (for PDFcontentlength)
46 self.resources = []
48 def add(self, resource):
49 """ register resource, merging it with an already registered resource of the same type and id"""
50 resources = self.types.setdefault(resource.type, {})
51 if resources.has_key(resource.id):
52 resources[resource.id].merge(resource)
53 else:
54 self.resources.append(resource)
55 resources[resource.id] = resource
57 def getrefno(self, resource):
58 return self.types[resource.type][resource.id].refno
60 def mergeregistry(self, registry):
61 for resource in registry.resources:
62 self.add(resource)
64 def write(self, file, writer, catalog):
65 # first we set all refnos
66 refno = 1
68 # we recursively inserted the resources such that the topmost resources in
69 # the dependency tree of the resources come last. Hence, we need to
70 # reverse the resources list before writing the output
71 self.resources.reverse()
72 for resource in self.resources:
73 resource.refno = refno
74 refno += 1
76 # second, all objects are written, keeping the positions in the output file
77 fileposes = []
78 for resource in self.resources:
79 fileposes.append(file.tell())
80 file.write("%i 0 obj\n" % resource.refno)
81 resource.outputPDF(file, writer, self)
82 file.write("endobj\n")
84 # xref
85 xrefpos = file.tell()
86 file.write("xref\n"
87 "0 %d\n"
88 "0000000000 65535 f \n" % refno)
90 for filepos in fileposes:
91 file.write("%010i 00000 n \n" % filepos)
93 # trailer
94 file.write("trailer\n"
95 "<<\n"
96 "/Size %i\n" % refno)
97 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
98 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
99 file.write(">>\n"
100 "startxref\n"
101 "%i\n" % xrefpos)
102 file.write("%%EOF\n")
105 class PDFobject:
107 def __init__(self, type, _id=None):
108 self.type = type
109 if _id is None:
110 self.id = id(self)
111 else:
112 self.id = _id
113 self.refno = None
115 def merge(self, other):
116 pass
118 def outputPDF(self, file, writer, registry):
119 raise NotImplementedError("outputPDF method has to be provided by PDFobject subclass")
122 class PDFcatalog(PDFobject):
124 def __init__(self, document, registry):
125 PDFobject.__init__(self, "catalog")
126 self.PDFpages = PDFpages(document, registry)
127 registry.add(self.PDFpages)
128 self.PDFinfo = PDFinfo()
129 registry.add(self.PDFinfo)
131 def outputPDF(self, file, writer, registry):
132 file.write("<<\n"
133 "/Type /Catalog\n"
134 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
135 if writer.fullscreen:
136 file.write("/PageMode /FullScreen\n")
137 file.write(">>\n")
140 class PDFinfo(PDFobject):
142 def __init__(self):
143 PDFobject.__init__(self, "info")
145 def outputPDF(self, file, writer, registry):
146 if time.timezone < 0:
147 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
148 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
149 elif time.timezone > 0:
150 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
151 else:
152 timezone = "Z00'00'"
154 def pdfstring(s):
155 r = ""
156 for c in s:
157 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
158 r += c
159 else:
160 r += "\\%03o" % ord(c)
161 return r
163 file.write("<<\n")
164 if writer.title:
165 file.write("/Title (%s)\n" % pdfstring(writer.title))
166 if writer.author:
167 file.write("/Author (%s)\n" % pdfstring(writer.author))
168 if writer.subject:
169 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
170 if writer.keywords:
171 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
172 file.write("/Creator (PyX %s)\n" % version.version)
173 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
174 file.write(">>\n")
177 class PDFpages(PDFobject):
179 def __init__(self, document, registry):
180 PDFobject.__init__(self, "pages")
181 self.PDFpagelist = []
182 for pageno, page in enumerate(document.pages):
183 page = PDFpage(page, pageno, self, registry)
184 self.PDFpagelist.append(page)
185 for i in range(len(self.PDFpagelist), 0, -1):
186 registry.add(self.PDFpagelist[i-1])
188 def outputPDF(self, file, writer, registry):
189 file.write("<<\n"
190 "/Type /Pages\n"
191 "/Kids [%s]\n"
192 "/Count %i\n"
193 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
194 for page in self.PDFpagelist]),
195 len(self.PDFpagelist)))
198 class PDFpage(PDFobject):
200 def __init__(self, page, pageno, PDFpages, registry):
201 PDFobject.__init__(self, "page", pageno)
202 self.PDFpages = PDFpages
203 self.page = page
205 # every page uses its own registry in order to find out which
206 # resources are used within the page. However, the
207 # pageregistry is also merged in the global registry
208 self.pageregistry = PDFregistry()
210 self.bbox = page.bbox()
211 self.pagetrafo = page.pagetrafo(self.bbox)
212 if self.pagetrafo:
213 self.transformedbbox = self.bbox.transformed(self.pagetrafo)
214 else:
215 self.transformedbbox = self.bbox
216 self.PDFcontent = PDFcontent(page.canvas, self.pagetrafo, self.pageregistry)
217 self.pageregistry.add(self.PDFcontent)
218 self.page.canvas.registerPDF(self.pageregistry)
219 registry.mergeregistry(self.pageregistry)
221 def outputPDF(self, file, writer, registry):
222 file.write("<<\n"
223 "/Type /Page\n"
224 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
225 paperformat = self.page.paperformat
226 if paperformat:
227 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
228 else:
229 file.write("/MediaBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
230 if self.transformedbbox and writer.writebbox:
231 file.write("/CropBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
232 procset = []
233 if self.pageregistry.types.has_key("font"):
234 procset.append("/Text")
235 if self.pageregistry.types.has_key("image"):
236 if [image for image in self.pageregistry.types["image"].values()
237 if image.colorspace == "/DeviceGray"]:
238 procset.append("/ImageB")
239 if [image for image in self.pageregistry.types["image"].values()
240 if image.colorspace is not None and image.colorspace != "/DeviceGray"]:
241 procset.append("/ImageC")
242 if [image for image in self.pageregistry.types["image"].values()
243 if image.palettedata is not None]:
244 procset.append("/ImageI")
245 file.write("/Resources <<\n"
246 "/ProcSet [ /PDF %s ]\n" % " ".join(procset))
247 if self.pageregistry.types.has_key("font"):
248 file.write("/Font <<\n%s\n>>\n" % "\n".join(["/%s %i 0 R" % (font.name, registry.getrefno(font))
249 for font in self.pageregistry.types["font"].values()]))
250 if self.pageregistry.types.has_key("image"):
251 file.write("/XObject <<\n%s\n>>\n" % "\n".join(["/%s %i 0 R" % (image.name, registry.getrefno(image))
252 for image in self.pageregistry.types["image"].values()]))
253 if self.pageregistry.types.has_key("pattern"):
254 file.write("/Pattern <<\n%s\n>>\n" % "\n".join(["/%s %i 0 R" % (pattern.name, registry.getrefno(pattern))
255 for pattern in self.pageregistry.types["pattern"].values()]))
256 file.write(">>\n")
257 file.write("/Contents %i 0 R\n"
258 ">>\n" % registry.getrefno(self.PDFcontent))
261 class PDFcontent(PDFobject):
263 def __init__(self, canvas, pagetrafo, registry):
264 PDFobject.__init__(self, "content")
265 self.canvas = canvas
266 self.pagetrafo = pagetrafo
267 self.contentlength = PDFcontentlength((self.type, self.id))
268 registry.add(self.contentlength)
270 def outputPDF(self, file, writer, registry):
271 file.write("<<\n"
272 "/Length %i 0 R\n" % registry.getrefno(self.contentlength))
273 if writer.compress:
274 file.write("/Filter /FlateDecode\n")
275 file.write(">>\n"
276 "stream\n")
277 beginstreampos = file.tell()
279 if writer.compress:
280 stream = compressedstream(file, writer.compresslevel)
281 else:
282 stream = file
284 # XXX this should maybe be handled by the page since removing
285 # this code would allow us to (nearly, since we also need to
286 # set more info in the content dict) reuse PDFcontent for
287 # patterns
288 acontext = context()
289 # apply a possible global transformation
290 if self.pagetrafo:
291 self.pagetrafo.outputPDF(stream, writer, acontext)
292 style.linewidth.normal.outputPDF(stream, writer, acontext)
294 self.canvas.outputPDF(stream, writer, acontext)
295 if writer.compress:
296 stream.flush()
298 self.contentlength.contentlength = file.tell() - beginstreampos
299 if writer.compress:
300 file.write("\n")
301 file.write("endstream\n")
304 class PDFcontentlength(PDFobject):
306 def __init__(self, contentid):
307 PDFobject.__init__(self, "_contentlength", contentid)
308 self.contentlength = None
310 def outputPDF(self, file, writer, registry):
311 # initially we do not know about the content length
312 # -> it has to be written into the instance later on
313 file.write("%d\n" % self.contentlength)
316 class PDFfont(PDFobject):
318 def __init__(self, font, chars, registry):
319 PDFobject.__init__(self, "font", font.name)
321 self.fontdescriptor = PDFfontdescriptor(font, chars, registry)
322 registry.add(self.fontdescriptor)
324 if font.encoding:
325 self.encoding = PDFencoding(font.encoding)
326 registry.add(self.encoding)
327 else:
328 self.encoding = None
330 self.name = font.name
331 self.basefontname = font.basefontname
332 self.metric = font.metric
334 def outputPDF(self, file, writer, registry):
335 file.write("<<\n"
336 "/Type /Font\n"
337 "/Subtype /Type1\n")
338 file.write("/Name /%s\n" % self.name)
339 file.write("/BaseFont /%s\n" % self.basefontname)
340 if self.fontdescriptor.fontfile is not None and self.fontdescriptor.fontfile.usedchars is not None:
341 usedchars = self.fontdescriptor.fontfile.usedchars
342 firstchar = min(usedchars.keys())
343 lastchar = max(usedchars.keys())
344 file.write("/FirstChar %d\n" % firstchar)
345 file.write("/LastChar %d\n" % lastchar)
346 file.write("/Widths\n"
347 "[")
348 for i in range(firstchar, lastchar+1):
349 if i and not (i % 8):
350 file.write("\n")
351 else:
352 file.write(" ")
353 if usedchars.has_key(i):
354 file.write("%f" % self.metric.getwidth_ds(i))
355 else:
356 file.write("0")
357 file.write(" ]\n")
358 else:
359 file.write("/FirstChar 0\n"
360 "/LastChar 255\n"
361 "/Widths\n"
362 "[")
363 for i in range(256):
364 if i and not (i % 8):
365 file.write("\n")
366 else:
367 file.write(" ")
368 try:
369 width = self.metric.getwidth_ds(i)
370 except (IndexError, AttributeError):
371 width = 0
372 file.write("%f" % width)
373 file.write(" ]\n")
374 file.write("/FontDescriptor %d 0 R\n" % registry.getrefno(self.fontdescriptor))
375 if self.encoding:
376 file.write("/Encoding %d 0 R\n" % registry.getrefno(self.encoding))
377 file.write(">>\n")
380 class PDFfontdescriptor(PDFobject):
382 def __init__(self, font, chars, registry):
383 PDFobject.__init__(self, "fontdescriptor", font.basefontname)
385 if font.filename is None:
386 self.fontfile = None
387 else:
388 self.fontfile = PDFfontfile(font.basefontname, font.filename, font.encoding, chars)
389 registry.add(self.fontfile)
391 self.name = font.basefontname
392 self.fontinfo = font.metric.fontinfo()
394 def outputPDF(self, file, writer, registry):
395 file.write("<<\n"
396 "/Type /FontDescriptor\n"
397 "/FontName /%s\n" % self.name)
398 if self.fontfile is None:
399 file.write("/Flags 32\n")
400 else:
401 file.write("/Flags %d\n" % self.fontfile.getflags())
402 file.write("/FontBBox [%d %d %d %d]\n" % self.fontinfo.fontbbox)
403 file.write("/ItalicAngle %d\n" % self.fontinfo.italicangle)
404 file.write("/Ascent %d\n" % self.fontinfo.ascent)
405 file.write("/Descent %d\n" % self.fontinfo.descent)
406 file.write("/CapHeight %d\n" % self.fontinfo.capheight)
407 file.write("/StemV %d\n" % self.fontinfo.vstem)
408 if self.fontfile is not None:
409 file.write("/FontFile %d 0 R\n" % registry.getrefno(self.fontfile))
410 file.write(">>\n")
413 class PDFfontfile(PDFobject):
415 def __init__(self, name, filename, encoding, chars):
416 PDFobject.__init__(self, "fontfile", filename)
417 self.name = name
418 self.filename = filename
419 if encoding is None:
420 self.encodingfilename = None
421 else:
422 self.encodingfilename = encoding.filename
423 self.usedchars = {}
424 for char in chars:
425 self.usedchars[char] = 1
427 # for flags-caching
428 self.fontfile = None
429 self.flags = None
431 def merge(self, other):
432 self.fontfile = None # remove fontfile cache when adding further stuff after writing
433 if self.encodingfilename != other.encodingfilename:
434 self.usedchars = None # stripping of font not possible
435 else:
436 self.usedchars.update(other.usedchars)
438 def mkfontfile(self):
439 if self.fontfile is None:
440 self.fontfile = type1font.fontfile(self.name,
441 self.filename,
442 self.usedchars,
443 self.encodingfilename)
445 def getflags(self):
446 if not self.flags:
447 self.mkfontfile()
448 self.flags = self.fontfile.getflags()
449 return self.flags
451 def outputPDF(self, file, writer, registry):
452 self.mkfontfile()
453 self.fontfile.outputPDF(file, writer, registry)
456 class PDFencoding(PDFobject):
458 def __init__(self, encoding):
459 PDFobject.__init__(self, "encoding", encoding.name)
460 self.encoding = encoding
462 def outputPDF(self, file, writer, registry):
463 encodingfile = type1font.encodingfile(self.encoding.name, self.encoding.filename)
464 encodingfile.outputPDF(file, writer, registry)
467 class PDFwriter:
469 def __init__(self, document, filename,
470 title=None, author=None, subject=None, keywords=None,
471 fullscreen=0, writebbox=0, compress=1, compresslevel=6):
472 if not filename.endswith(".pdf"):
473 filename = filename + ".pdf"
474 try:
475 file = open(filename, "wb")
476 except IOError:
477 raise IOError("cannot open output file")
479 self.title = title
480 self.author = author
481 self.subject = subject
482 self.keywords = keywords
483 self.fullscreen = fullscreen
484 self.writebbox = writebbox
485 if compress and not haszlib:
486 compress = 0
487 warnings.warn("compression disabled due to missing zlib module")
488 self.compress = compress
489 self.compresslevel = compresslevel
491 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
493 # the PDFcatalog class automatically builds up the pdfobjects from a document
494 registry = PDFregistry()
495 catalog = PDFcatalog(document, registry)
496 registry.add(catalog)
497 registry.write(file, self, catalog)
498 file.close()
501 class compressedstream:
503 def __init__(self, file, compresslevel):
504 self.file = file
505 self.compressobj = zlib.compressobj(compresslevel)
507 def write(self, string):
508 self.file.write(self.compressobj.compress(string))
510 def flush(self):
511 self.file.write(self.compressobj.flush())
514 class context:
516 def __init__(self):
517 self.linewidth_pt = None
518 # XXX there are both stroke and fill color spaces
519 self.colorspace = None
520 self.strokeattr = 1
521 self.fillattr = 1
522 self.font = None
523 self.textregion = 0
525 def __call__(self, **kwargs):
526 newcontext = copy.copy(self)
527 for key, value in kwargs.items():
528 setattr(newcontext, key, value)
529 return newcontext