take into account kerning and inter-character spacing in bounding box
[PyX.git] / pyx / pdfwriter.py
blob2a9fcfbf9c2b3d2241115881130ad35a50c86c90
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2005-2011 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2007 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2005-2011 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import io, copy, logging, time
25 logger = logging.getLogger("pyx")
26 try:
27 import zlib
28 haszlib = True
29 except:
30 haszlib = False
32 from . import bbox, config, style, unit, version, trafo, writer
36 class PDFregistry:
38 def __init__(self):
39 self.types = {}
40 # we want to keep the order of the resources
41 self.objects = []
42 self.resources = {}
43 self.procsets = {"PDF": 1}
44 self.merged = None
46 def add(self, object):
47 """ register object, merging it with an already registered object of the same type and id """
48 sameobjects = self.types.setdefault(object.type, {})
49 if object.id in sameobjects:
50 sameobjects[object.id].merge(object)
51 else:
52 self.objects.append(object)
53 sameobjects[object.id] = object
55 def getrefno(self, object):
56 if self.merged:
57 return self.merged.getrefno(object)
58 else:
59 return self.types[object.type][object.id].refno
61 def mergeregistry(self, registry):
62 for object in registry.objects:
63 self.add(object)
64 registry.merged = self
66 def write(self, file, writer, catalog):
67 # first we set all refnos
68 refno = 1
69 for object in self.objects:
70 object.refno = refno
71 refno += 1
73 # second, all objects are written, keeping the positions in the output file
74 fileposes = []
75 for object in self.objects:
76 fileposes.append(file.tell())
77 file.write("%i 0 obj\n" % object.refno)
78 object.write(file, writer, self)
79 file.write("endobj\n")
81 # xref
82 xrefpos = file.tell()
83 file.write("xref\n"
84 "0 %d\n"
85 "0000000000 65535 f \n" % refno)
87 for filepos in fileposes:
88 file.write("%010i 00000 n \n" % filepos)
90 # trailer
91 file.write("trailer\n"
92 "<<\n"
93 "/Size %i\n" % refno)
94 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
95 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
96 file.write(">>\n"
97 "startxref\n"
98 "%i\n" % xrefpos)
99 file.write("%%EOF\n")
101 def addresource(self, resourcetype, resourcename, object, procset=None):
102 self.resources.setdefault(resourcetype, {})[resourcename] = object
103 if procset:
104 self.procsets[procset] = 1
106 def writeresources(self, file):
107 file.write("<<\n")
108 file.write("/ProcSet [ %s ]\n" % " ".join(["/%s" % p for p in list(self.procsets.keys())]))
109 if self.resources:
110 for resourcetype, resources in list(self.resources.items()):
111 file.write("/%s <<\n%s\n>>\n" % (resourcetype, "\n".join(["/%s %i 0 R" % (name, self.getrefno(object))
112 for name, object in list(resources.items())])))
113 file.write(">>\n")
116 class PDFobject:
118 def __init__(self, type, _id=None):
119 """create a PDFobject
120 - type has to be a string describing the type of the object
121 - _id is a unique identification used for the object if it is not None.
122 Otherwise id(self) is used
124 self.type = type
125 if _id is None:
126 self.id = id(self)
127 else:
128 self.id = _id
130 def merge(self, other):
131 pass
133 def write(self, file, writer, registry):
134 raise NotImplementedError("write method has to be provided by PDFobject subclass")
137 class PDFcatalog(PDFobject):
139 def __init__(self, document, writer, registry):
140 PDFobject.__init__(self, "catalog")
141 self.PDFform = PDFform(writer, registry)
142 registry.add(self.PDFform)
143 self.PDFpages = PDFpages(document, writer, registry)
144 registry.add(self.PDFpages)
145 self.PDFinfo = PDFinfo()
146 registry.add(self.PDFinfo)
148 def write(self, file, writer, registry):
149 file.write("<<\n"
150 "/Type /Catalog\n"
151 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
152 if not self.PDFform.empty():
153 file.write("/AcroForm %i 0 R\n" % registry.getrefno(self.PDFform))
154 if writer.fullscreen:
155 file.write("/PageMode /FullScreen\n")
156 file.write(">>\n")
159 class PDFinfo(PDFobject):
161 def __init__(self):
162 PDFobject.__init__(self, "info")
164 def write(self, file, writer, registry):
165 if time.timezone < 0:
166 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
167 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
168 elif time.timezone > 0:
169 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
170 else:
171 timezone = "Z00'00'"
173 def pdfstring(s):
174 r = ""
175 for c in s:
176 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
177 r += c
178 else:
179 r += "\\%03o" % ord(c)
180 return r
182 file.write("<<\n")
183 if writer.title:
184 file.write("/Title (%s)\n" % pdfstring(writer.title))
185 if writer.author:
186 file.write("/Author (%s)\n" % pdfstring(writer.author))
187 if writer.subject:
188 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
189 if writer.keywords:
190 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
191 file.write("/Creator (PyX %s)\n" % version.version)
192 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
193 file.write(">>\n")
196 class PDFpages(PDFobject):
198 def __init__(self, document, writer, registry):
199 PDFobject.__init__(self, "pages")
200 self.PDFpagelist = []
201 for pageno, page in enumerate(document.pages):
202 page = PDFpage(page, pageno, self, writer, registry)
203 registry.add(page)
204 self.PDFpagelist.append(page)
206 def write(self, file, writer, registry):
207 file.write("<<\n"
208 "/Type /Pages\n"
209 "/Kids [%s]\n"
210 "/Count %i\n"
211 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
212 for page in self.PDFpagelist]),
213 len(self.PDFpagelist)))
216 class PDFpage(PDFobject):
218 def __init__(self, page, pageno, PDFpages, writer, registry):
219 PDFobject.__init__(self, "page")
220 self.PDFpages = PDFpages
221 self.page = page
223 # every page uses its own registry in order to find out which
224 # resources are used within the page. However, the
225 # pageregistry is also merged in the global registry
226 self.pageregistry = PDFregistry()
227 self.pageregistry.add(self)
229 self.PDFannotations = PDFannotations()
230 self.pageregistry.add(self.PDFannotations)
231 # we eventually need the form dictionary to append formfields
232 for object in registry.objects:
233 if object.type == "form":
234 self.pageregistry.add(object)
236 self.PDFcontent = PDFcontent(page, writer, self.pageregistry)
237 self.pageregistry.add(self.PDFcontent)
238 registry.mergeregistry(self.pageregistry)
240 def write(self, file, writer, registry):
241 file.write("<<\n"
242 "/Type /Page\n"
243 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
244 paperformat = self.page.paperformat
245 if paperformat:
246 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
247 else:
248 file.write("/MediaBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
249 if self.PDFcontent.bbox and writer.writebbox:
250 file.write("/CropBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
251 if self.page.rotated:
252 file.write("/Rotate 90\n")
253 if not self.PDFannotations.empty():
254 file.write("/Annots %i 0 R\n" % registry.getrefno(self.PDFannotations))
255 file.write("/Contents %i 0 R\n" % registry.getrefno(self.PDFcontent))
256 file.write("/Resources ")
257 self.pageregistry.writeresources(file)
258 file.write(">>\n")
261 class PDFcontent(PDFobject):
263 def __init__(self, page, awriter, registry):
264 PDFobject.__init__(self, registry, "content")
265 contentfile = writer.writer(io.BytesIO())
266 self.bbox = bbox.empty()
267 acontext = context()
268 page.processPDF(contentfile, awriter, acontext, registry, self.bbox)
269 self.content = contentfile.file.getvalue()
271 def write(self, file, awriter, registry):
272 if awriter.compress:
273 content = zlib.compress(self.content)
274 else:
275 content = self.content
276 file.write("<<\n"
277 "/Length %i\n" % len(content))
278 if awriter.compress:
279 file.write("/Filter /FlateDecode\n")
280 file.write(">>\n"
281 "stream\n")
282 file.write_bytes(content)
283 file.write("endstream\n")
286 class PDFwriter:
288 def __init__(self, document, file,
289 title=None, author=None, subject=None, keywords=None,
290 fullscreen=False, writebbox=False, compress=True, compresslevel=6,
291 strip_fonts=True, text_as_path=False, mesh_as_bitmap=False, mesh_as_bitmap_resolution=300):
292 self._fontmap = None
294 self.title = title
295 self.author = author
296 self.subject = subject
297 self.keywords = keywords
298 self.fullscreen = fullscreen
299 self.writebbox = writebbox
300 if compress and not haszlib:
301 compress = 0
302 logger.warning("PDFwriter: compression disabled due to missing zlib module")
303 self.compress = compress
304 self.compresslevel = compresslevel
305 self.strip_fonts = strip_fonts
306 self.text_as_path = text_as_path
307 self.mesh_as_bitmap = mesh_as_bitmap
308 self.mesh_as_bitmap_resolution = mesh_as_bitmap_resolution
310 # dictionary mapping font names to dictionaries mapping encoding names to encodings
311 # encodings themselves are mappings from glyphnames to codepoints
312 self.encodings = {}
314 # the PDFcatalog class automatically builds up the pdfobjects from a document
315 registry = PDFregistry()
316 catalog = PDFcatalog(document, self, registry)
317 registry.add(catalog)
319 file = writer.writer(file)
320 file.write_bytes(b"%PDF-1.4\n%\xc3\xb6\xc3\xa9\n")
321 registry.write(file, self, catalog)
323 def getfontmap(self):
324 if self._fontmap is None:
325 # late import due to cyclic dependency
326 from pyx.dvi import mapfile
327 fontmapfiles = config.getlist("text", "pdffontmaps", ["pdftex.map"])
328 self._fontmap = mapfile.readfontmap(fontmapfiles)
329 return self._fontmap
332 class PDFannotations(PDFobject):
334 def __init__(self):
335 PDFobject.__init__(self, "annotations")
336 self.annots = []
338 def append(self, item):
339 if item not in self.annots:
340 self.annots.append(item)
342 def empty(self):
343 return len(self.annots) == 0
345 def write(self, file, writer, registry):
346 # XXX problem: This object will be written to the file even if it is useless (empty)
347 file.write("[ %s ]\n" % " ".join(["%d 0 R" % registry.getrefno(annot) for annot in self.annots]))
350 class PDFform(PDFobject):
352 def __init__(self, writer, registry):
353 PDFobject.__init__(self, "form")
354 self.fields = []
356 def merge(self, other):
357 for field in other.fields:
358 self.append(field)
360 def append(self, field):
361 if field not in self.fields:
362 self.fields.append(field)
364 def empty(self):
365 return len(self.fields) == 0
367 def write(self, file, writer, registry):
368 # XXX problem: This object will be written to the file even if it is useless (empty)
369 file.write("<<")
370 file.write("/Fields [")
371 for field in self.fields:
372 file.write(" %d 0 R" % registry.getrefno(field))
373 file.write(" ]\n")
374 file.write(">>\n")
377 class context:
379 def __init__(self):
380 self.linewidth_pt = None
381 # XXX there are both stroke and fill color spaces
382 self.colorspace = None
383 self.strokeattr = 1
384 self.fillattr = 1
385 self.selectedfont = None
386 self.trafo = trafo.trafo()
387 self.fillstyles = []
388 self.fillrule = 0
390 def __call__(self, **kwargs):
391 newcontext = copy.copy(self)
392 for key, value in list(kwargs.items()):
393 setattr(newcontext, key, value)
394 return newcontext