- path module
[PyX/mjg.git] / pyx / pdfwriter.py
bloba15954f6994477cf7262353ce0c7b0b1344cfeec
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2005-2006 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2005-2006 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import cStringIO, copy, warnings, time
25 try:
26 import zlib
27 haszlib = 1
28 except:
29 haszlib = 0
31 import bbox, unit, style, type1font, version
33 try:
34 enumerate([])
35 except NameError:
36 # fallback implementation for Python 2.2 and below
37 def enumerate(list):
38 return zip(xrange(len(list)), list)
41 class PDFregistry:
43 def __init__(self):
44 self.types = {}
45 # we need to keep the original order of the resources (for PDFcontentlength)
46 self.resources = []
47 self.bbox = None
49 def add(self, resource):
50 """ register resource, merging it with an already registered resource of the same type and id"""
51 resources = self.types.setdefault(resource.type, {})
52 if resources.has_key(resource.id):
53 resources[resource.id].merge(resource)
54 else:
55 self.resources.append(resource)
56 resources[resource.id] = resource
58 def getrefno(self, resource):
59 return self.types[resource.type][resource.id].refno
61 def mergeregistry(self, registry):
62 for resource in registry.resources:
63 self.add(resource)
65 def write(self, file, writer, catalog):
66 # first we set all refnos
67 refno = 1
68 for resource in self.resources:
69 resource.refno = refno
70 refno += 1
72 # second, all objects are written, keeping the positions in the output file
73 fileposes = []
74 for resource in self.resources:
75 fileposes.append(file.tell())
76 file.write("%i 0 obj\n" % resource.refno)
77 resource.output(file, writer, self)
78 file.write("endobj\n")
80 # xref
81 xrefpos = file.tell()
82 file.write("xref\n"
83 "0 %d\n"
84 "0000000000 65535 f \n" % refno)
86 for filepos in fileposes:
87 file.write("%010i 00000 n \n" % filepos)
89 # trailer
90 file.write("trailer\n"
91 "<<\n"
92 "/Size %i\n" % refno)
93 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
94 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
95 file.write(">>\n"
96 "startxref\n"
97 "%i\n" % xrefpos)
98 file.write("%%EOF\n")
101 class PDFobject:
103 def __init__(self, type, _id=None, pageresource=None, pageprocset=None):
104 """create a PDFobject
105 - type has to be a string describing the type of the object
106 - _id is a unique identification used for the object if it is not None.
107 Otherwise id(self) is used
108 - If pageresource is not None, it has to be a string describing the name
109 of the resource to be included in the resource dictionary of the pages
110 including the PDFobject.
111 - If pageprocset is not None, it has to be a string describing the name
112 to be used in the ProcSet list of the pages including the PDFObject.
114 self.type = type
115 if _id is None:
116 self.id = id(self)
117 else:
118 self.id = _id
119 self.pageresource = pageresource
120 self.pageprocset = pageprocset
121 self.refno = None
123 def merge(self, other):
124 pass
126 def output(self, file, writer, registry):
127 raise NotImplementedError("output method has to be provided by PDFobject subclass")
130 class PDFcatalog(PDFobject):
132 def __init__(self, document, writer, registry):
133 PDFobject.__init__(self, "catalog")
134 self.PDFpages = PDFpages(document, writer, registry)
135 registry.add(self.PDFpages)
136 self.PDFinfo = PDFinfo(writer, registry)
137 registry.add(self.PDFinfo)
139 def output(self, file, writer, registry):
140 file.write("<<\n"
141 "/Type /Catalog\n"
142 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
143 if writer.fullscreen:
144 file.write("/PageMode /FullScreen\n")
145 file.write(">>\n")
148 class PDFinfo(PDFobject):
150 def __init__(self, writer, registry):
151 PDFobject.__init__(self, "info")
153 def output(self, file, writer, registry):
154 if time.timezone < 0:
155 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
156 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
157 elif time.timezone > 0:
158 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
159 else:
160 timezone = "Z00'00'"
162 def pdfstring(s):
163 r = ""
164 for c in s:
165 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
166 r += c
167 else:
168 r += "\\%03o" % ord(c)
169 return r
171 file.write("<<\n")
172 if writer.title:
173 file.write("/Title (%s)\n" % pdfstring(writer.title))
174 if writer.author:
175 file.write("/Author (%s)\n" % pdfstring(writer.author))
176 if writer.subject:
177 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
178 if writer.keywords:
179 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
180 file.write("/Creator (PyX %s)\n" % version.version)
181 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
182 file.write(">>\n")
185 class PDFpages(PDFobject):
187 def __init__(self, document, writer, registry):
188 PDFobject.__init__(self, "pages")
189 self.PDFpagelist = []
190 for pageno, page in enumerate(document.pages):
191 page = PDFpage(page, pageno, self, writer, registry)
192 self.PDFpagelist.append(page)
193 for i in range(len(self.PDFpagelist), 0, -1):
194 registry.add(self.PDFpagelist[i-1])
196 def output(self, file, writer, registry):
197 file.write("<<\n"
198 "/Type /Pages\n"
199 "/Kids [%s]\n"
200 "/Count %i\n"
201 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
202 for page in self.PDFpagelist]),
203 len(self.PDFpagelist)))
206 class PDFpage(PDFobject):
208 def __init__(self, page, pageno, PDFpages, writer, registry):
209 PDFobject.__init__(self, "page", pageno)
210 self.PDFpages = PDFpages
211 self.page = page
213 # every page uses its own registry in order to find out which
214 # resources are used within the page. However, the
215 # pageregistry is also merged in the global registry
216 self.pageregistry = PDFregistry()
217 self.bbox = bbox.empty()
219 self.PDFcontent = PDFcontent(self, writer, self.pageregistry, self.bbox)
220 self.pageregistry.add(self.PDFcontent)
221 registry.mergeregistry(self.pageregistry)
223 self.pagetrafo = page.pagetrafo(self.bbox)
224 if self.pagetrafo:
225 self.transformedbbox = self.bbox.transformed(self.pagetrafo)
226 else:
227 self.transformedbbox = self.bbox
229 def output(self, file, writer, registry):
230 file.write("<<\n"
231 "/Type /Page\n"
232 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
233 paperformat = self.page.paperformat
234 if paperformat:
235 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
236 else:
237 file.write("/MediaBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
238 if self.transformedbbox and writer.writebbox:
239 file.write("/CropBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
240 procset = ["PDF"]
241 resources = {}
242 for type in self.pageregistry.types.keys():
243 for resource in self.pageregistry.types[type].values():
244 if resource.pageprocset is not None and resource.pageprocset not in procset:
245 procset.append(resource.pageprocset)
246 if resource.pageresource is not None:
247 resources.setdefault(resource.pageresource, []).append(resource)
248 file.write("/Resources <<\n"
249 "/ProcSet [ %s ]\n" % " ".join(["/%s" % p for p in procset]))
250 for pageresource, resources in resources.items():
251 file.write("/%s <<\n%s\n>>\n" % (pageresource, "\n".join(["/%s %i 0 R" % (resource.name, registry.getrefno(resource))
252 for resource in resources])))
253 file.write(">>\n")
254 file.write("/Contents %i 0 R\n"
255 ">>\n" % registry.getrefno(self.PDFcontent))
258 class PDFcontent(PDFobject):
260 def __init__(self, PDFpage, writer, registry, bbox):
261 PDFobject.__init__(self, "content")
262 self.PDFpage = PDFpage
264 self.contentfile = cStringIO.StringIO()
265 # XXX this should maybe be handled by the page since removing
266 # this code would allow us to (nearly, since we also need to
267 # set more info in the content dict) reuse PDFcontent for
268 # patterns
269 acontext = context()
270 style.linewidth.normal.processPDF(self.contentfile, writer, acontext, registry, bbox)
272 self.PDFpage.page.canvas.processPDF(self.contentfile, writer, acontext, registry, bbox)
274 def output(self, file, writer, registry):
275 # apply a possible global transformation
276 if self.PDFpage.pagetrafo:
277 pagetrafofile = cStringIO.StringIO()
278 self.PDFpage.pagetrafo.processPDF(pagetrafofile, writer, context(), registry, bbox.empty())
279 content = pagetrafofile.getvalue() + self.contentfile.getvalue()
280 else:
281 content = self.contentfile.getvalue()
283 file.write("<<\n"
284 "/Length %i\n" % len(content))
285 # if writer.compress:
286 # file.write("/Filter /FlateDecode\n")
287 file.write(">>\n"
288 "stream\n")
289 beginstreampos = file.tell()
291 #if writer.compress:
292 # stream = compressedstream(file, writer.compresslevel)
293 #else:
294 # stream = file
296 file.write(content)
298 #if writer.compress:
299 # stream.flush()
301 #if writer.compress:
302 # file.write("\n")
303 file.write("endstream\n")
306 class PDFfont(PDFobject):
308 def __init__(self, font, chars, writer, registry):
309 PDFobject.__init__(self, "font", font.name, "Font", "Text")
311 self.fontdescriptor = PDFfontdescriptor(font, chars, writer, registry)
312 registry.add(self.fontdescriptor)
314 if font.encoding:
315 self.encoding = PDFencoding(font.encoding, writer, registry)
316 registry.add(self.encoding)
317 else:
318 self.encoding = None
320 self.name = font.name
321 self.basefontname = font.basefontname
322 self.metric = font.metric
324 def output(self, file, writer, registry):
325 file.write("<<\n"
326 "/Type /Font\n"
327 "/Subtype /Type1\n")
328 file.write("/Name /%s\n" % self.name)
329 file.write("/BaseFont /%s\n" % self.basefontname)
330 if self.fontdescriptor.fontfile is not None and self.fontdescriptor.fontfile.usedchars is not None:
331 usedchars = self.fontdescriptor.fontfile.usedchars
332 firstchar = min(usedchars.keys())
333 lastchar = max(usedchars.keys())
334 file.write("/FirstChar %d\n" % firstchar)
335 file.write("/LastChar %d\n" % lastchar)
336 file.write("/Widths\n"
337 "[")
338 for i in range(firstchar, lastchar+1):
339 if i and not (i % 8):
340 file.write("\n")
341 else:
342 file.write(" ")
343 if usedchars.has_key(i):
344 file.write("%f" % self.metric.getwidth_ds(i))
345 else:
346 file.write("0")
347 file.write(" ]\n")
348 else:
349 file.write("/FirstChar 0\n"
350 "/LastChar 255\n"
351 "/Widths\n"
352 "[")
353 for i in range(256):
354 if i and not (i % 8):
355 file.write("\n")
356 else:
357 file.write(" ")
358 try:
359 width = self.metric.getwidth_ds(i)
360 except (IndexError, AttributeError):
361 width = 0
362 file.write("%f" % width)
363 file.write(" ]\n")
364 file.write("/FontDescriptor %d 0 R\n" % registry.getrefno(self.fontdescriptor))
365 if self.encoding:
366 file.write("/Encoding %d 0 R\n" % registry.getrefno(self.encoding))
367 file.write(">>\n")
370 class PDFfontdescriptor(PDFobject):
372 def __init__(self, font, chars, writer, registry):
373 PDFobject.__init__(self, "fontdescriptor", font.basefontname)
375 if font.filename is None:
376 self.fontfile = None
377 else:
378 self.fontfile = PDFfontfile(font.basefontname, font.filename, font.encoding, chars, writer, registry)
379 registry.add(self.fontfile)
381 self.name = font.basefontname
382 self.fontinfo = font.metric.fontinfo()
384 def output(self, file, writer, registry):
385 file.write("<<\n"
386 "/Type /FontDescriptor\n"
387 "/FontName /%s\n" % self.name)
388 if self.fontfile is None:
389 file.write("/Flags 32\n")
390 else:
391 file.write("/Flags %d\n" % self.fontfile.getflags())
392 file.write("/FontBBox [%d %d %d %d]\n" % self.fontinfo.fontbbox)
393 file.write("/ItalicAngle %d\n" % self.fontinfo.italicangle)
394 file.write("/Ascent %d\n" % self.fontinfo.ascent)
395 file.write("/Descent %d\n" % self.fontinfo.descent)
396 file.write("/CapHeight %d\n" % self.fontinfo.capheight)
397 file.write("/StemV %d\n" % self.fontinfo.vstem)
398 if self.fontfile is not None:
399 file.write("/FontFile %d 0 R\n" % registry.getrefno(self.fontfile))
400 file.write(">>\n")
403 class PDFfontfile(PDFobject):
405 def __init__(self, name, filename, encoding, chars, writer, registry):
406 PDFobject.__init__(self, "fontfile", filename)
407 self.name = name
408 self.filename = filename
409 if encoding is None:
410 self.encodingfilename = None
411 else:
412 self.encodingfilename = encoding.filename
413 self.usedchars = {}
414 for char in chars:
415 self.usedchars[char] = 1
417 self.strip = 1
418 self.font = None
420 def merge(self, other):
421 if self.encodingfilename == other.encodingfilename:
422 self.usedchars.update(other.usedchars)
423 else:
424 # TODO: need to resolve the encoding when several encodings are in the play
425 self.strip = 0
427 def mkfontfile(self):
428 import font.t1font
429 self.font = font.t1font.T1pfbfont(self.filename)
431 def getflags(self):
432 if self.font is None:
433 self.mkfontfile()
434 return self.font.getflags()
436 def output(self, file, writer, registry):
437 if self.font is None:
438 self.mkfontfile()
439 if self.strip:
440 # XXX: access to the encoding file
441 if self.encodingfilename:
442 encodingfile = type1font.encodingfile(self.encodingfilename, self.encodingfilename)
443 usedglyphs = [encodingfile.decode(char)[1:] for char in self.usedchars.keys()]
444 else:
445 self.font._encoding()
446 usedglyphs = [self.font.encoding.decode(char) for char in self.usedchars.keys()]
447 strippedfont = self.font.getstrippedfont(usedglyphs)
448 else:
449 strippedfont = self.font
450 strippedfont.outputPDF(file, writer)
453 class PDFencoding(PDFobject):
455 def __init__(self, encoding, writer, registry):
456 PDFobject.__init__(self, "encoding", encoding.name)
457 self.encoding = encoding
459 def output(self, file, writer, registry):
460 encodingfile = type1font.encodingfile(self.encoding.name, self.encoding.filename)
461 encodingfile.outputPDF(file, writer)
464 class PDFwriter:
466 def __init__(self, document, filename,
467 title=None, author=None, subject=None, keywords=None,
468 fullscreen=0, writebbox=0, compress=1, compresslevel=6):
469 if not filename.endswith(".pdf"):
470 filename = filename + ".pdf"
471 try:
472 file = open(filename, "wb")
473 except IOError:
474 raise IOError("cannot open output file")
476 self.title = title
477 self.author = author
478 self.subject = subject
479 self.keywords = keywords
480 self.fullscreen = fullscreen
481 self.writebbox = writebbox
482 if compress and not haszlib:
483 compress = 0
484 warnings.warn("compression disabled due to missing zlib module")
485 self.compress = compress
486 self.compresslevel = compresslevel
488 # the PDFcatalog class automatically builds up the pdfobjects from a document
489 registry = PDFregistry()
490 catalog = PDFcatalog(document, self, registry)
491 registry.add(catalog)
493 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
494 registry.write(file, self, catalog)
495 file.close()
498 class compressedstream:
500 def __init__(self, file, compresslevel):
501 self.file = file
502 self.compressobj = zlib.compressobj(compresslevel)
504 def write(self, string):
505 self.file.write(self.compressobj.compress(string))
507 def flush(self):
508 self.file.write(self.compressobj.flush())
511 class context:
513 def __init__(self):
514 self.linewidth_pt = None
515 # XXX there are both stroke and fill color spaces
516 self.colorspace = None
517 self.strokeattr = 1
518 self.fillattr = 1
519 self.font = None
520 self.textregion = 0
522 def __call__(self, **kwargs):
523 newcontext = copy.copy(self)
524 for key, value in kwargs.items():
525 setattr(newcontext, key, value)
526 return newcontext