remove converter script
[PyX.git] / pyx / pdfwriter.py
blobdf296eb1fe4c8caf84c3e1246c6e9eed6cb6c51d
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2005-2011 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2007 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2005-2011 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import cStringIO, copy, warnings, time
25 try:
26 import zlib
27 haszlib = 1
28 except:
29 haszlib = 0
31 import bbox, config, style, unit, version, trafo
35 class PDFregistry:
37 def __init__(self):
38 self.types = {}
39 # we want to keep the order of the resources
40 self.objects = []
41 self.resources = {}
42 self.procsets = {"PDF": 1}
43 self.merged = None
45 def add(self, object):
46 """ register object, merging it with an already registered object of the same type and id """
47 sameobjects = self.types.setdefault(object.type, {})
48 if sameobjects.has_key(object.id):
49 sameobjects[object.id].merge(object)
50 else:
51 self.objects.append(object)
52 sameobjects[object.id] = object
54 def getrefno(self, object):
55 if self.merged:
56 return self.merged.getrefno(object)
57 else:
58 return self.types[object.type][object.id].refno
60 def mergeregistry(self, registry):
61 for object in registry.objects:
62 self.add(object)
63 registry.merged = self
65 def write(self, file, writer, catalog):
66 # first we set all refnos
67 refno = 1
68 for object in self.objects:
69 object.refno = refno
70 refno += 1
72 # second, all objects are written, keeping the positions in the output file
73 fileposes = []
74 for object in self.objects:
75 fileposes.append(file.tell())
76 file.write("%i 0 obj\n" % object.refno)
77 object.write(file, writer, self)
78 file.write("endobj\n")
80 # xref
81 xrefpos = file.tell()
82 file.write("xref\n"
83 "0 %d\n"
84 "0000000000 65535 f \n" % refno)
86 for filepos in fileposes:
87 file.write("%010i 00000 n \n" % filepos)
89 # trailer
90 file.write("trailer\n"
91 "<<\n"
92 "/Size %i\n" % refno)
93 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
94 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
95 file.write(">>\n"
96 "startxref\n"
97 "%i\n" % xrefpos)
98 file.write("%%EOF\n")
100 def addresource(self, resourcetype, resourcename, object, procset=None):
101 self.resources.setdefault(resourcetype, {})[resourcename] = object
102 if procset:
103 self.procsets[procset] = 1
105 def writeresources(self, file):
106 file.write("<<\n")
107 file.write("/ProcSet [ %s ]\n" % " ".join(["/%s" % p for p in self.procsets.keys()]))
108 if self.resources:
109 for resourcetype, resources in self.resources.items():
110 file.write("/%s <<\n%s\n>>\n" % (resourcetype, "\n".join(["/%s %i 0 R" % (name, self.getrefno(object))
111 for name, object in resources.items()])))
112 file.write(">>\n")
115 class PDFobject:
117 def __init__(self, type, _id=None):
118 """create a PDFobject
119 - type has to be a string describing the type of the object
120 - _id is a unique identification used for the object if it is not None.
121 Otherwise id(self) is used
123 self.type = type
124 if _id is None:
125 self.id = id(self)
126 else:
127 self.id = _id
129 def merge(self, other):
130 pass
132 def write(self, file, writer, registry):
133 raise NotImplementedError("write method has to be provided by PDFobject subclass")
136 class PDFcatalog(PDFobject):
138 def __init__(self, document, writer, registry):
139 PDFobject.__init__(self, "catalog")
140 self.PDFform = PDFform(writer, registry)
141 registry.add(self.PDFform)
142 self.PDFpages = PDFpages(document, writer, registry)
143 registry.add(self.PDFpages)
144 self.PDFinfo = PDFinfo()
145 registry.add(self.PDFinfo)
147 def write(self, file, writer, registry):
148 file.write("<<\n"
149 "/Type /Catalog\n"
150 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
151 if not self.PDFform.empty():
152 file.write("/AcroForm %i 0 R\n" % registry.getrefno(self.PDFform))
153 if writer.fullscreen:
154 file.write("/PageMode /FullScreen\n")
155 file.write(">>\n")
158 class PDFinfo(PDFobject):
160 def __init__(self):
161 PDFobject.__init__(self, "info")
163 def write(self, file, writer, registry):
164 if time.timezone < 0:
165 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
166 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
167 elif time.timezone > 0:
168 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
169 else:
170 timezone = "Z00'00'"
172 def pdfstring(s):
173 r = ""
174 for c in s:
175 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
176 r += c
177 else:
178 r += "\\%03o" % ord(c)
179 return r
181 file.write("<<\n")
182 if writer.title:
183 file.write("/Title (%s)\n" % pdfstring(writer.title))
184 if writer.author:
185 file.write("/Author (%s)\n" % pdfstring(writer.author))
186 if writer.subject:
187 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
188 if writer.keywords:
189 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
190 file.write("/Creator (PyX %s)\n" % version.version)
191 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
192 file.write(">>\n")
195 class PDFpages(PDFobject):
197 def __init__(self, document, writer, registry):
198 PDFobject.__init__(self, "pages")
199 self.PDFpagelist = []
200 for pageno, page in enumerate(document.pages):
201 page = PDFpage(page, pageno, self, writer, registry)
202 registry.add(page)
203 self.PDFpagelist.append(page)
205 def write(self, file, writer, registry):
206 file.write("<<\n"
207 "/Type /Pages\n"
208 "/Kids [%s]\n"
209 "/Count %i\n"
210 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
211 for page in self.PDFpagelist]),
212 len(self.PDFpagelist)))
215 class PDFpage(PDFobject):
217 def __init__(self, page, pageno, PDFpages, writer, registry):
218 PDFobject.__init__(self, "page")
219 self.PDFpages = PDFpages
220 self.page = page
222 # every page uses its own registry in order to find out which
223 # resources are used within the page. However, the
224 # pageregistry is also merged in the global registry
225 self.pageregistry = PDFregistry()
226 self.pageregistry.add(self)
228 self.PDFannotations = PDFannotations()
229 self.pageregistry.add(self.PDFannotations)
230 # we eventually need the form dictionary to append formfields
231 for object in registry.objects:
232 if object.type == "form":
233 self.pageregistry.add(object)
235 self.PDFcontent = PDFcontent(page, writer, self.pageregistry)
236 self.pageregistry.add(self.PDFcontent)
237 registry.mergeregistry(self.pageregistry)
239 def write(self, file, writer, registry):
240 file.write("<<\n"
241 "/Type /Page\n"
242 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
243 paperformat = self.page.paperformat
244 if paperformat:
245 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
246 else:
247 file.write("/MediaBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
248 if self.PDFcontent.bbox and writer.writebbox:
249 file.write("/CropBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
250 if self.page.rotated:
251 file.write("/Rotate 90\n")
252 if not self.PDFannotations.empty():
253 file.write("/Annots %i 0 R\n" % registry.getrefno(self.PDFannotations))
254 file.write("/Contents %i 0 R\n" % registry.getrefno(self.PDFcontent))
255 file.write("/Resources ")
256 self.pageregistry.writeresources(file)
257 file.write(">>\n")
260 class PDFcontent(PDFobject):
262 def __init__(self, page, writer, registry):
263 PDFobject.__init__(self, registry, "content")
264 contentfile = cStringIO.StringIO()
265 self.bbox = bbox.empty()
266 acontext = context()
267 page.processPDF(contentfile, writer, acontext, registry, self.bbox)
268 self.content = contentfile.getvalue()
269 contentfile.close()
271 def write(self, file, writer, registry):
272 if writer.compress:
273 content = zlib.compress(self.content)
274 else:
275 content = self.content
276 file.write("<<\n"
277 "/Length %i\n" % len(content))
278 if writer.compress:
279 file.write("/Filter /FlateDecode\n")
280 file.write(">>\n"
281 "stream\n")
282 file.write(content)
283 file.write("endstream\n")
286 class PDFwriter:
288 def __init__(self, document, file,
289 title=None, author=None, subject=None, keywords=None,
290 fullscreen=False, writebbox=False, compress=True, compresslevel=6,
291 strip_fonts=True, text_as_path=False, mesh_as_bitmap=False, mesh_as_bitmap_resolution=300):
292 self._fontmap = None
294 self.title = title
295 self.author = author
296 self.subject = subject
297 self.keywords = keywords
298 self.fullscreen = fullscreen
299 self.writebbox = writebbox
300 if compress and not haszlib:
301 compress = 0
302 warnings.warn("compression disabled due to missing zlib module")
303 self.compress = compress
304 self.compresslevel = compresslevel
305 self.strip_fonts = strip_fonts
306 self.text_as_path = text_as_path
307 self.mesh_as_bitmap = mesh_as_bitmap
308 self.mesh_as_bitmap_resolution = mesh_as_bitmap_resolution
310 # dictionary mapping font names to dictionaries mapping encoding names to encodings
311 # encodings themselves are mappings from glyphnames to codepoints
312 self.encodings = {}
314 # the PDFcatalog class automatically builds up the pdfobjects from a document
315 registry = PDFregistry()
316 catalog = PDFcatalog(document, self, registry)
317 registry.add(catalog)
319 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
320 registry.write(file, self, catalog)
321 file.close()
323 def getfontmap(self):
324 if self._fontmap is None:
325 # late import due to cyclic dependency
326 from pyx.dvi import mapfile
327 fontmapfiles = config.getlist("text", "pdffontmaps", ["pdftex.map"])
328 self._fontmap = mapfile.readfontmap(fontmapfiles)
329 return self._fontmap
332 class PDFannotations(PDFobject):
334 def __init__(self):
335 PDFobject.__init__(self, "annotations")
336 self.annots = []
338 def append(self, item):
339 if item not in self.annots:
340 self.annots.append(item)
342 def empty(self):
343 return len(self.annots) == 0
345 def write(self, file, writer, registry):
346 # XXX problem: This object will be written to the file even if it is useless (empty)
347 file.write("[ %s ]\n" % " ".join(["%d 0 R" % registry.getrefno(annot) for annot in self.annots]))
350 class PDFform(PDFobject):
352 def __init__(self, writer, registry):
353 PDFobject.__init__(self, "form")
354 self.fields = []
356 def merge(self, other):
357 for field in other.fields:
358 self.append(field)
360 def append(self, field):
361 if field not in self.fields:
362 self.fields.append(field)
364 def empty(self):
365 return len(self.fields) == 0
367 def write(self, file, writer, registry):
368 # XXX problem: This object will be written to the file even if it is useless (empty)
369 file.write("<<")
370 file.write("/Fields [")
371 for field in self.fields:
372 file.write(" %d 0 R" % registry.getrefno(field))
373 file.write(" ]\n")
374 file.write(">>\n")
377 class context:
379 def __init__(self):
380 self.linewidth_pt = None
381 # XXX there are both stroke and fill color spaces
382 self.colorspace = None
383 self.strokeattr = 1
384 self.fillattr = 1
385 self.selectedfont = None
386 self.textregion = 0
387 self.trafo = trafo.trafo()
388 self.fillstyles = []
390 def __call__(self, **kwargs):
391 newcontext = copy.copy(self)
392 for key, value in kwargs.items():
393 setattr(newcontext, key, value)
394 return newcontext