add a set method to the bbox
[PyX/mjg.git] / pyx / pdfwriter.py
blob1c72a6858252c76482e777951bd6ce8d48d4b69b
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2005-2006 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2005-2006 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import cStringIO, copy, warnings, time
25 try:
26 import zlib
27 haszlib = 1
28 except:
29 haszlib = 0
31 import bbox, unit, style, type1font, version
33 try:
34 enumerate([])
35 except NameError:
36 # fallback implementation for Python 2.2 and below
37 def enumerate(list):
38 return zip(xrange(len(list)), list)
41 class PDFregistry:
43 def __init__(self):
44 self.types = {}
45 # we want to keep the order of the resources
46 self.resources = []
47 self.pageresources = {}
48 self.pageprocsets = {}
50 def add(self, resource):
51 """ register resource, merging it with an already registered resource of the same type and id """
52 resources = self.types.setdefault(resource.type, {})
53 if resources.has_key(resource.id):
54 resources[resource.id].merge(resource)
55 else:
56 self.resources.append(resource)
57 resources[resource.id] = resource
59 def getrefno(self, resource):
60 return self.types[resource.type][resource.id].refno
62 def mergeregistry(self, registry):
63 for resource in registry.resources:
64 self.add(resource)
66 def write(self, file, writer, catalog):
67 # first we set all refnos
68 refno = 1
69 for resource in self.resources:
70 resource.refno = refno
71 refno += 1
73 # second, all objects are written, keeping the positions in the output file
74 fileposes = []
75 for resource in self.resources:
76 fileposes.append(file.tell())
77 file.write("%i 0 obj\n" % resource.refno)
78 resource.write(file, writer, self)
79 file.write("endobj\n")
81 # xref
82 xrefpos = file.tell()
83 file.write("xref\n"
84 "0 %d\n"
85 "0000000000 65535 f \n" % refno)
87 for filepos in fileposes:
88 file.write("%010i 00000 n \n" % filepos)
90 # trailer
91 file.write("trailer\n"
92 "<<\n"
93 "/Size %i\n" % refno)
94 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
95 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
96 file.write(">>\n"
97 "startxref\n"
98 "%i\n" % xrefpos)
99 file.write("%%EOF\n")
102 class PDFobject:
104 def __init__(self, type, _id=None):
105 """create a PDFobject
106 - type has to be a string describing the type of the object
107 - _id is a unique identification used for the object if it is not None.
108 Otherwise id(self) is used
109 - If pageresource is not None, it has to be a string describing the name
110 of the resource to be included in the resource dictionary of the pages
111 including the PDFobject.
112 - If pageprocset is not None, it has to be a string describing the name
113 to be used in the ProcSet list of the pages including the PDFObject.
115 self.type = type
116 if _id is None:
117 self.id = id(self)
118 else:
119 self.id = _id
121 def merge(self, other):
122 pass
124 def write(self, file, writer, registry):
125 raise NotImplementedError("write method has to be provided by PDFobject subclass")
128 class PDFcatalog(PDFobject):
130 def __init__(self, document, writer, registry):
131 PDFobject.__init__(self, "catalog")
132 self.PDFpages = PDFpages(document, writer, registry)
133 registry.add(self.PDFpages)
134 self.PDFinfo = PDFinfo()
135 registry.add(self.PDFinfo)
137 def write(self, file, writer, registry):
138 file.write("<<\n"
139 "/Type /Catalog\n"
140 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
141 if writer.fullscreen:
142 file.write("/PageMode /FullScreen\n")
143 file.write(">>\n")
146 class PDFinfo(PDFobject):
148 def __init__(self):
149 PDFobject.__init__(self, "info")
151 def write(self, file, writer, registry):
152 if time.timezone < 0:
153 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
154 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
155 elif time.timezone > 0:
156 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
157 else:
158 timezone = "Z00'00'"
160 def pdfstring(s):
161 r = ""
162 for c in s:
163 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
164 r += c
165 else:
166 r += "\\%03o" % ord(c)
167 return r
169 file.write("<<\n")
170 if writer.title:
171 file.write("/Title (%s)\n" % pdfstring(writer.title))
172 if writer.author:
173 file.write("/Author (%s)\n" % pdfstring(writer.author))
174 if writer.subject:
175 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
176 if writer.keywords:
177 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
178 file.write("/Creator (PyX %s)\n" % version.version)
179 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
180 file.write(">>\n")
183 class PDFpages(PDFobject):
185 def __init__(self, document, writer, registry):
186 PDFobject.__init__(self, "pages")
187 self.PDFpagelist = []
188 for pageno, page in enumerate(document.pages):
189 page = PDFpage(page, pageno, self, writer, registry)
190 registry.add(page)
191 self.PDFpagelist.append(page)
193 def write(self, file, writer, registry):
194 file.write("<<\n"
195 "/Type /Pages\n"
196 "/Kids [%s]\n"
197 "/Count %i\n"
198 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
199 for page in self.PDFpagelist]),
200 len(self.PDFpagelist)))
203 class PDFpage(PDFobject):
205 def __init__(self, page, pageno, PDFpages, writer, registry):
206 PDFobject.__init__(self, "page")
207 self.PDFpages = PDFpages
208 self.page = page
210 # every page uses its own registry in order to find out which
211 # resources are used within the page. However, the
212 # pageregistry is also merged in the global registry
213 self.pageregistry = PDFregistry()
215 self.PDFcontent = PDFcontent(page, writer, self.pageregistry)
216 self.pageregistry.add(self.PDFcontent)
217 registry.mergeregistry(self.pageregistry)
219 def write(self, file, writer, registry):
220 file.write("<<\n"
221 "/Type /Page\n"
222 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
223 paperformat = self.page.paperformat
224 if paperformat:
225 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
226 else:
227 file.write("/MediaBox [%f %f %f %f]\n" % self.PDFcontent.transformedbbox.highrestuple_pt())
228 if self.PDFcontent.transformedbbox and writer.writebbox:
229 file.write("/CropBox [%f %f %f %f]\n" % self.PDFcontent.transformedbbox.highrestuple_pt())
230 file.write("/Resources <<\n"
231 "/ProcSet [ /PDF %s ]\n" % " ".join(["/%s" % p for p in self.pageregistry.pageprocsets.keys()]))
232 for pageresource, resources in self.pageregistry.pageresources.items():
233 file.write("/%s <<\n%s\n>>\n" % (pageresource, "\n".join(["/%s %i 0 R" % (name, registry.getrefno(resource))
234 for name, resource in resources.items()])))
235 file.write(">>\n"
236 "/Contents %i 0 R\n"
237 ">>\n" % registry.getrefno(self.PDFcontent))
240 class PDFcontent(PDFobject):
242 def __init__(self, page, writer, registry):
243 PDFobject.__init__(self, registry, "content")
245 self.contentfile = cStringIO.StringIO()
246 # XXX this should maybe be handled by the page since removing
247 # this code would allow us to (nearly, since we also need to
248 # set more info in the content dict) reuse PDFcontent for
249 # patterns
250 self.bbox = bbox.empty()
251 acontext = context()
252 style.linewidth.normal.processPDF(self.contentfile, writer, acontext, registry, self.bbox)
254 page.canvas.processPDF(self.contentfile, writer, acontext, registry, self.bbox)
256 self.pagetrafo = page.pagetrafo(self.bbox)
257 if self.pagetrafo:
258 self.transformedbbox = self.bbox.transformed(self.pagetrafo)
259 else:
260 self.transformedbbox = self.bbox
262 def write(self, file, writer, registry):
263 # apply a possible global transformation
264 if self.pagetrafo:
265 pagetrafofile = cStringIO.StringIO()
266 self.pagetrafo.processPDF(pagetrafofile, writer, context(), registry, bbox.empty())
267 content = pagetrafofile.getvalue() + self.contentfile.getvalue()
268 pagetrafofile.close()
269 else:
270 content = self.contentfile.getvalue()
271 self.contentfile.close()
273 if writer.compress:
274 content = zlib.compress(content)
276 file.write("<<\n"
277 "/Length %i\n" % len(content))
278 if writer.compress:
279 file.write("/Filter /FlateDecode\n")
280 file.write(">>\n"
281 "stream\n")
282 file.write(content)
283 file.write("endstream\n")
286 class PDFfont(PDFobject):
288 def __init__(self, font, chars, writer, registry):
289 PDFobject.__init__(self, "font", font.name)
290 registry.pageprocsets["Text"] = 1
291 registry.pageresources.setdefault("Font", {})[font.name] = self
293 self.fontdescriptor = PDFfontdescriptor(font, chars, writer, registry)
294 registry.add(self.fontdescriptor)
296 if font.encoding:
297 self.encoding = PDFencoding(font.encoding, writer, registry)
298 registry.add(self.encoding)
299 else:
300 self.encoding = None
302 self.name = font.name
303 self.basefontname = font.basefontname
304 self.metric = font.metric
306 def write(self, file, writer, registry):
307 file.write("<<\n"
308 "/Type /Font\n"
309 "/Subtype /Type1\n")
310 file.write("/Name /%s\n" % self.name)
311 file.write("/BaseFont /%s\n" % self.basefontname)
312 if self.fontdescriptor.fontfile is not None and self.fontdescriptor.fontfile.usedchars is not None:
313 usedchars = self.fontdescriptor.fontfile.usedchars
314 firstchar = min(usedchars.keys())
315 lastchar = max(usedchars.keys())
316 file.write("/FirstChar %d\n" % firstchar)
317 file.write("/LastChar %d\n" % lastchar)
318 file.write("/Widths\n"
319 "[")
320 for i in range(firstchar, lastchar+1):
321 if i and not (i % 8):
322 file.write("\n")
323 else:
324 file.write(" ")
325 if usedchars.has_key(i):
326 file.write("%f" % self.metric.getwidth_ds(i))
327 else:
328 file.write("0")
329 file.write(" ]\n")
330 else:
331 file.write("/FirstChar 0\n"
332 "/LastChar 255\n"
333 "/Widths\n"
334 "[")
335 for i in range(256):
336 if i and not (i % 8):
337 file.write("\n")
338 else:
339 file.write(" ")
340 try:
341 width = self.metric.getwidth_ds(i)
342 except (IndexError, AttributeError):
343 width = 0
344 file.write("%f" % width)
345 file.write(" ]\n")
346 file.write("/FontDescriptor %d 0 R\n" % registry.getrefno(self.fontdescriptor))
347 if self.encoding:
348 file.write("/Encoding %d 0 R\n" % registry.getrefno(self.encoding))
349 file.write(">>\n")
352 class PDFfontdescriptor(PDFobject):
354 def __init__(self, font, chars, writer, registry):
355 PDFobject.__init__(self, "fontdescriptor", font.basefontname)
357 if font.filename is None:
358 self.fontfile = None
359 else:
360 self.fontfile = PDFfontfile(font.basefontname, font.filename, font.encoding, chars, writer, registry)
361 registry.add(self.fontfile)
363 self.name = font.basefontname
364 self.fontinfo = font.metric.fontinfo()
366 def write(self, file, writer, registry):
367 file.write("<<\n"
368 "/Type /FontDescriptor\n"
369 "/FontName /%s\n" % self.name)
370 if self.fontfile is None:
371 file.write("/Flags 32\n")
372 else:
373 file.write("/Flags %d\n" % self.fontfile.getflags())
374 file.write("/FontBBox [%d %d %d %d]\n" % self.fontinfo.fontbbox)
375 file.write("/ItalicAngle %d\n" % self.fontinfo.italicangle)
376 file.write("/Ascent %d\n" % self.fontinfo.ascent)
377 file.write("/Descent %d\n" % self.fontinfo.descent)
378 file.write("/CapHeight %d\n" % self.fontinfo.capheight)
379 file.write("/StemV %d\n" % self.fontinfo.vstem)
380 if self.fontfile is not None:
381 file.write("/FontFile %d 0 R\n" % registry.getrefno(self.fontfile))
382 file.write(">>\n")
385 class PDFfontfile(PDFobject):
387 def __init__(self, name, filename, encoding, chars, writer, registry):
388 PDFobject.__init__(self, "fontfile", filename)
389 self.name = name
390 self.filename = filename
391 if encoding is None:
392 self.encodingfilename = None
393 else:
394 self.encodingfilename = encoding.filename
395 self.usedchars = {}
396 for char in chars:
397 self.usedchars[char] = 1
399 self.strip = 1
400 self.font = None
402 def merge(self, other):
403 if self.encodingfilename == other.encodingfilename:
404 self.usedchars.update(other.usedchars)
405 else:
406 # TODO: need to resolve the encoding when several encodings are in the play
407 self.strip = 0
409 def mkfontfile(self):
410 import font.t1font
411 self.font = font.t1font.T1pfbfont(self.filename)
413 def getflags(self):
414 if self.font is None:
415 self.mkfontfile()
416 return self.font.getflags()
418 def write(self, file, writer, registry):
419 if self.font is None:
420 self.mkfontfile()
421 if self.strip:
422 # XXX: access to the encoding file
423 if self.encodingfilename:
424 encodingfile = type1font.encodingfile(self.encodingfilename, self.encodingfilename)
425 usedglyphs = [encodingfile.decode(char)[1:] for char in self.usedchars.keys()]
426 else:
427 self.font._encoding()
428 usedglyphs = [self.font.encoding.decode(char) for char in self.usedchars.keys()]
429 strippedfont = self.font.getstrippedfont(usedglyphs)
430 else:
431 strippedfont = self.font
432 strippedfont.outputPDF(file, writer)
435 class PDFencoding(PDFobject):
437 def __init__(self, encoding, writer, registry):
438 PDFobject.__init__(self, "encoding", encoding.name)
439 self.encoding = encoding
441 def write(self, file, writer, registry):
442 encodingfile = type1font.encodingfile(self.encoding.name, self.encoding.filename)
443 encodingfile.outputPDF(file, writer)
446 class PDFwriter:
448 def __init__(self, document, filename,
449 title=None, author=None, subject=None, keywords=None,
450 fullscreen=0, writebbox=0, compress=1, compresslevel=6):
451 if not filename.endswith(".pdf"):
452 filename = filename + ".pdf"
453 try:
454 file = open(filename, "wb")
455 except IOError:
456 raise IOError("cannot open output file")
458 self.title = title
459 self.author = author
460 self.subject = subject
461 self.keywords = keywords
462 self.fullscreen = fullscreen
463 self.writebbox = writebbox
464 if compress and not haszlib:
465 compress = 0
466 warnings.warn("compression disabled due to missing zlib module")
467 self.compress = compress
468 self.compresslevel = compresslevel
470 # the PDFcatalog class automatically builds up the pdfobjects from a document
471 registry = PDFregistry()
472 catalog = PDFcatalog(document, self, registry)
473 registry.add(catalog)
475 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
476 registry.write(file, self, catalog)
477 file.close()
480 class context:
482 def __init__(self):
483 self.linewidth_pt = None
484 # XXX there are both stroke and fill color spaces
485 self.colorspace = None
486 self.strokeattr = 1
487 self.fillattr = 1
488 self.font = None
489 self.textregion = 0
491 def __call__(self, **kwargs):
492 newcontext = copy.copy(self)
493 for key, value in kwargs.items():
494 setattr(newcontext, key, value)
495 return newcontext