added first version of AFM parser#
[PyX/mjg.git] / pyx / pdfwriter.py
blob7041e0b81c02a85f448f683720c99726bd33a3fa
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2005 Jörg Lehmann <joergl@users.sourceforge.net>
6 # Copyright (C) 2005 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import copy, warnings, time
25 try:
26 import zlib
27 haszlib = 1
28 except:
29 haszlib = 0
31 import unit, style, type1font, version
33 try:
34 enumerate([])
35 except NameError:
36 # fallback implementation for Python 2.2 and below
37 def enumerate(list):
38 return zip(xrange(len(list)), list)
41 class PDFregistry:
43 def __init__(self):
44 self.types = {}
45 # we need to keep the original order of the resources (for PDFcontentlength)
46 self.resources = []
48 def add(self, resource):
49 """ register resource, merging it with an already registered resource of the same type and id"""
50 resources = self.types.setdefault(resource.type, {})
51 if resources.has_key(resource.id):
52 resources[resource.id].merge(resource)
53 else:
54 self.resources.append(resource)
55 resources[resource.id] = resource
57 def getrefno(self, resource):
58 return self.types[resource.type][resource.id].refno
60 def mergeregistry(self, registry):
61 for resource in registry.resources:
62 self.add(resource)
64 def write(self, file, writer, catalog):
65 # first we set all refnos
66 refno = 1
68 # we recursively inserted the resources such that the topmost resources in
69 # the dependency tree of the resources come last. Hence, we need to
70 # reverse the resources list before writing the output
71 self.resources.reverse()
72 for resource in self.resources:
73 resource.refno = refno
74 refno += 1
76 # second, all objects are written, keeping the positions in the output file
77 fileposes = []
78 for resource in self.resources:
79 fileposes.append(file.tell())
80 file.write("%i 0 obj\n" % resource.refno)
81 resource.outputPDF(file, writer, self)
82 file.write("endobj\n")
84 # xref
85 xrefpos = file.tell()
86 file.write("xref\n"
87 "0 %d\n"
88 "0000000000 65535 f \n" % refno)
90 for filepos in fileposes:
91 file.write("%010i 00000 n \n" % filepos)
93 # trailer
94 file.write("trailer\n"
95 "<<\n"
96 "/Size %i\n" % refno)
97 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
98 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
99 file.write(">>\n"
100 "startxref\n"
101 "%i\n" % xrefpos)
102 file.write("%%EOF\n")
105 class PDFobject:
107 def __init__(self, type, _id=None, pageresource=None, pageprocset=None):
108 """create a PDFobject
109 - type has to be a string describing the type of the object
110 - _id is a unique identification used for the object if it is not None.
111 Otherwise id(self) is used
112 - If pageresource is not None, it has to be a string describing the name
113 of the resource to be included in the resource dictionary of the pages
114 including the PDFobject.
115 - If pageprocset is not None, it has to be a string describing the name
116 to be used in the ProcSet list of the pages including the PDFObject.
118 self.type = type
119 if _id is None:
120 self.id = id(self)
121 else:
122 self.id = _id
123 self.pageresource = pageresource
124 self.pageprocset = pageprocset
125 self.refno = None
127 def merge(self, other):
128 pass
130 def outputPDF(self, file, writer, registry):
131 raise NotImplementedError("outputPDF method has to be provided by PDFobject subclass")
134 class PDFcatalog(PDFobject):
136 def __init__(self, document, registry):
137 PDFobject.__init__(self, "catalog")
138 self.PDFpages = PDFpages(document, registry)
139 registry.add(self.PDFpages)
140 self.PDFinfo = PDFinfo()
141 registry.add(self.PDFinfo)
143 def outputPDF(self, file, writer, registry):
144 file.write("<<\n"
145 "/Type /Catalog\n"
146 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
147 if writer.fullscreen:
148 file.write("/PageMode /FullScreen\n")
149 file.write(">>\n")
152 class PDFinfo(PDFobject):
154 def __init__(self):
155 PDFobject.__init__(self, "info")
157 def outputPDF(self, file, writer, registry):
158 if time.timezone < 0:
159 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
160 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
161 elif time.timezone > 0:
162 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
163 else:
164 timezone = "Z00'00'"
166 def pdfstring(s):
167 r = ""
168 for c in s:
169 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
170 r += c
171 else:
172 r += "\\%03o" % ord(c)
173 return r
175 file.write("<<\n")
176 if writer.title:
177 file.write("/Title (%s)\n" % pdfstring(writer.title))
178 if writer.author:
179 file.write("/Author (%s)\n" % pdfstring(writer.author))
180 if writer.subject:
181 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
182 if writer.keywords:
183 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
184 file.write("/Creator (PyX %s)\n" % version.version)
185 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
186 file.write(">>\n")
189 class PDFpages(PDFobject):
191 def __init__(self, document, registry):
192 PDFobject.__init__(self, "pages")
193 self.PDFpagelist = []
194 for pageno, page in enumerate(document.pages):
195 page = PDFpage(page, pageno, self, registry)
196 self.PDFpagelist.append(page)
197 for i in range(len(self.PDFpagelist), 0, -1):
198 registry.add(self.PDFpagelist[i-1])
200 def outputPDF(self, file, writer, registry):
201 file.write("<<\n"
202 "/Type /Pages\n"
203 "/Kids [%s]\n"
204 "/Count %i\n"
205 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
206 for page in self.PDFpagelist]),
207 len(self.PDFpagelist)))
210 class PDFpage(PDFobject):
212 def __init__(self, page, pageno, PDFpages, registry):
213 PDFobject.__init__(self, "page", pageno)
214 self.PDFpages = PDFpages
215 self.page = page
217 # every page uses its own registry in order to find out which
218 # resources are used within the page. However, the
219 # pageregistry is also merged in the global registry
220 self.pageregistry = PDFregistry()
222 self.bbox = page.bbox()
223 self.pagetrafo = page.pagetrafo(self.bbox)
224 if self.pagetrafo:
225 self.transformedbbox = self.bbox.transformed(self.pagetrafo)
226 else:
227 self.transformedbbox = self.bbox
228 self.PDFcontent = PDFcontent(page.canvas, self.pagetrafo, self.pageregistry)
229 self.pageregistry.add(self.PDFcontent)
230 self.page.canvas.registerPDF(self.pageregistry)
231 registry.mergeregistry(self.pageregistry)
233 def outputPDF(self, file, writer, registry):
234 file.write("<<\n"
235 "/Type /Page\n"
236 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
237 paperformat = self.page.paperformat
238 if paperformat:
239 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
240 else:
241 file.write("/MediaBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
242 if self.transformedbbox and writer.writebbox:
243 file.write("/CropBox [%f %f %f %f]\n" % self.transformedbbox.highrestuple_pt())
244 procset = ["PDF"]
245 resources = {}
246 for type in self.pageregistry.types.keys():
247 for resource in self.pageregistry.types[type].values():
248 if resource.pageprocset is not None and resource.pageprocset not in procset:
249 procset.append(resource.pageprocset)
250 if resource.pageresource is not None:
251 resources.setdefault(resource.pageresource, []).append(resource)
252 file.write("/Resources <<\n"
253 "/ProcSet [ %s ]\n" % " ".join(["/%s" % p for p in procset]))
254 for pageresource, resources in resources.items():
255 file.write("/%s <<\n%s\n>>\n" % (pageresource, "\n".join(["/%s %i 0 R" % (resource.name, registry.getrefno(resource))
256 for resource in resources])))
257 file.write(">>\n")
258 file.write("/Contents %i 0 R\n"
259 ">>\n" % registry.getrefno(self.PDFcontent))
262 class PDFcontent(PDFobject):
264 def __init__(self, canvas, pagetrafo, registry):
265 PDFobject.__init__(self, "content")
266 self.canvas = canvas
267 self.pagetrafo = pagetrafo
268 self.contentlength = PDFcontentlength((self.type, self.id))
269 registry.add(self.contentlength)
271 def outputPDF(self, file, writer, registry):
272 file.write("<<\n"
273 "/Length %i 0 R\n" % registry.getrefno(self.contentlength))
274 if writer.compress:
275 file.write("/Filter /FlateDecode\n")
276 file.write(">>\n"
277 "stream\n")
278 beginstreampos = file.tell()
280 if writer.compress:
281 stream = compressedstream(file, writer.compresslevel)
282 else:
283 stream = file
285 # XXX this should maybe be handled by the page since removing
286 # this code would allow us to (nearly, since we also need to
287 # set more info in the content dict) reuse PDFcontent for
288 # patterns
289 acontext = context()
290 # apply a possible global transformation
291 if self.pagetrafo:
292 self.pagetrafo.outputPDF(stream, writer, acontext)
293 style.linewidth.normal.outputPDF(stream, writer, acontext)
295 self.canvas.outputPDF(stream, writer, acontext)
296 if writer.compress:
297 stream.flush()
299 self.contentlength.contentlength = file.tell() - beginstreampos
300 if writer.compress:
301 file.write("\n")
302 file.write("endstream\n")
305 class PDFcontentlength(PDFobject):
307 def __init__(self, contentid):
308 PDFobject.__init__(self, "_contentlength", contentid)
309 self.contentlength = None
311 def outputPDF(self, file, writer, registry):
312 # initially we do not know about the content length
313 # -> it has to be written into the instance later on
314 file.write("%d\n" % self.contentlength)
317 class PDFfont(PDFobject):
319 def __init__(self, font, chars, registry):
320 PDFobject.__init__(self, "font", font.name, "Font", "Text")
322 self.fontdescriptor = PDFfontdescriptor(font, chars, registry)
323 registry.add(self.fontdescriptor)
325 if font.encoding:
326 self.encoding = PDFencoding(font.encoding)
327 registry.add(self.encoding)
328 else:
329 self.encoding = None
331 self.name = font.name
332 self.basefontname = font.basefontname
333 self.metric = font.metric
335 def outputPDF(self, file, writer, registry):
336 file.write("<<\n"
337 "/Type /Font\n"
338 "/Subtype /Type1\n")
339 file.write("/Name /%s\n" % self.name)
340 file.write("/BaseFont /%s\n" % self.basefontname)
341 if self.fontdescriptor.fontfile is not None and self.fontdescriptor.fontfile.usedchars is not None:
342 usedchars = self.fontdescriptor.fontfile.usedchars
343 firstchar = min(usedchars.keys())
344 lastchar = max(usedchars.keys())
345 file.write("/FirstChar %d\n" % firstchar)
346 file.write("/LastChar %d\n" % lastchar)
347 file.write("/Widths\n"
348 "[")
349 for i in range(firstchar, lastchar+1):
350 if i and not (i % 8):
351 file.write("\n")
352 else:
353 file.write(" ")
354 if usedchars.has_key(i):
355 file.write("%f" % self.metric.getwidth_ds(i))
356 else:
357 file.write("0")
358 file.write(" ]\n")
359 else:
360 file.write("/FirstChar 0\n"
361 "/LastChar 255\n"
362 "/Widths\n"
363 "[")
364 for i in range(256):
365 if i and not (i % 8):
366 file.write("\n")
367 else:
368 file.write(" ")
369 try:
370 width = self.metric.getwidth_ds(i)
371 except (IndexError, AttributeError):
372 width = 0
373 file.write("%f" % width)
374 file.write(" ]\n")
375 file.write("/FontDescriptor %d 0 R\n" % registry.getrefno(self.fontdescriptor))
376 if self.encoding:
377 file.write("/Encoding %d 0 R\n" % registry.getrefno(self.encoding))
378 file.write(">>\n")
381 class PDFfontdescriptor(PDFobject):
383 def __init__(self, font, chars, registry):
384 PDFobject.__init__(self, "fontdescriptor", font.basefontname)
386 if font.filename is None:
387 self.fontfile = None
388 else:
389 self.fontfile = PDFfontfile(font.basefontname, font.filename, font.encoding, chars)
390 registry.add(self.fontfile)
392 self.name = font.basefontname
393 self.fontinfo = font.metric.fontinfo()
395 def outputPDF(self, file, writer, registry):
396 file.write("<<\n"
397 "/Type /FontDescriptor\n"
398 "/FontName /%s\n" % self.name)
399 if self.fontfile is None:
400 file.write("/Flags 32\n")
401 else:
402 file.write("/Flags %d\n" % self.fontfile.getflags())
403 file.write("/FontBBox [%d %d %d %d]\n" % self.fontinfo.fontbbox)
404 file.write("/ItalicAngle %d\n" % self.fontinfo.italicangle)
405 file.write("/Ascent %d\n" % self.fontinfo.ascent)
406 file.write("/Descent %d\n" % self.fontinfo.descent)
407 file.write("/CapHeight %d\n" % self.fontinfo.capheight)
408 file.write("/StemV %d\n" % self.fontinfo.vstem)
409 if self.fontfile is not None:
410 file.write("/FontFile %d 0 R\n" % registry.getrefno(self.fontfile))
411 file.write(">>\n")
414 class PDFfontfile(PDFobject):
416 def __init__(self, name, filename, encoding, chars):
417 PDFobject.__init__(self, "fontfile", filename)
418 self.name = name
419 self.filename = filename
420 if encoding is None:
421 self.encodingfilename = None
422 else:
423 self.encodingfilename = encoding.filename
424 self.usedchars = {}
425 for char in chars:
426 self.usedchars[char] = 1
428 self.strip = 1
429 self.font = None
431 def merge(self, other):
432 if self.encodingfilename == other.encodingfilename:
433 self.usedchars.update(other.usedchars)
434 else:
435 # TODO: need to resolve the encoding when several encodings are in the play
436 self.strip = 0
438 def mkfontfile(self):
439 import font.t1font
440 self.font = font.t1font.T1pfbfont(self.filename)
442 def getflags(self):
443 if self.font is None:
444 self.mkfontfile()
445 return self.font.getflags()
447 def outputPDF(self, file, writer, registry):
448 if self.font is None:
449 self.mkfontfile()
450 if self.strip:
451 # XXX: access to the encoding file
452 if self.encodingfilename:
453 encodingfile = type1font.encodingfile(self.encodingfilename, self.encodingfilename)
454 usedglyphs = [encodingfile.decode(char)[1:] for char in self.usedchars.keys()]
455 else:
456 self.font._encoding()
457 usedglyphs = [self.font.encoding.decode(char) for char in self.usedchars.keys()]
458 strippedfont = self.font.getstrippedfont(usedglyphs)
459 else:
460 strippedfont = self.font
461 strippedfont.outputPDF(file, writer)
464 class PDFencoding(PDFobject):
466 def __init__(self, encoding):
467 PDFobject.__init__(self, "encoding", encoding.name)
468 self.encoding = encoding
470 def outputPDF(self, file, writer, registry):
471 encodingfile = type1font.encodingfile(self.encoding.name, self.encoding.filename)
472 encodingfile.outputPDF(file, writer, registry)
475 class PDFwriter:
477 def __init__(self, document, filename,
478 title=None, author=None, subject=None, keywords=None,
479 fullscreen=0, writebbox=0, compress=1, compresslevel=6):
480 if not filename.endswith(".pdf"):
481 filename = filename + ".pdf"
482 try:
483 file = open(filename, "wb")
484 except IOError:
485 raise IOError("cannot open output file")
487 self.title = title
488 self.author = author
489 self.subject = subject
490 self.keywords = keywords
491 self.fullscreen = fullscreen
492 self.writebbox = writebbox
493 if compress and not haszlib:
494 compress = 0
495 warnings.warn("compression disabled due to missing zlib module")
496 self.compress = compress
497 self.compresslevel = compresslevel
499 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
501 # the PDFcatalog class automatically builds up the pdfobjects from a document
502 registry = PDFregistry()
503 catalog = PDFcatalog(document, registry)
504 registry.add(catalog)
505 registry.write(file, self, catalog)
506 file.close()
509 class compressedstream:
511 def __init__(self, file, compresslevel):
512 self.file = file
513 self.compressobj = zlib.compressobj(compresslevel)
515 def write(self, string):
516 self.file.write(self.compressobj.compress(string))
518 def flush(self):
519 self.file.write(self.compressobj.flush())
522 class context:
524 def __init__(self):
525 self.linewidth_pt = None
526 # XXX there are both stroke and fill color spaces
527 self.colorspace = None
528 self.strokeattr = 1
529 self.fillattr = 1
530 self.font = None
531 self.textregion = 0
533 def __call__(self, **kwargs):
534 newcontext = copy.copy(self)
535 for key, value in kwargs.items():
536 setattr(newcontext, key, value)
537 return newcontext