fontstripping and textaspath writer options; psfontmaps and pdffontmaps config options
[PyX/mjg.git] / pyx / pdfwriter.py
blobd9bc8a5cfd348e6f805d09e157b06630bcce7e9b
1 # -*- coding: ISO-8859-1 -*-
4 # Copyright (C) 2005-2006 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2005-2006 André Wobst <wobsta@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 import cStringIO, copy, warnings, time
24 try:
25 import zlib
26 haszlib = 1
27 except:
28 haszlib = 0
30 import bbox, config, style, unit, version
34 class PDFregistry:
36 def __init__(self):
37 self.types = {}
38 # we want to keep the order of the resources
39 self.objects = []
40 self.resources = {}
41 self.procsets = {"PDF": 1}
42 self.merged = None
44 def add(self, object):
45 """ register object, merging it with an already registered object of the same type and id """
46 sameobjects = self.types.setdefault(object.type, {})
47 if sameobjects.has_key(object.id):
48 sameobjects[object.id].merge(object)
49 else:
50 self.objects.append(object)
51 sameobjects[object.id] = object
53 def getrefno(self, object):
54 if self.merged:
55 return self.merged.getrefno(object)
56 else:
57 return self.types[object.type][object.id].refno
59 def mergeregistry(self, registry):
60 for object in registry.objects:
61 self.add(object)
62 registry.merged = self
64 def write(self, file, writer, catalog):
65 # first we set all refnos
66 refno = 1
67 for object in self.objects:
68 object.refno = refno
69 refno += 1
71 # second, all objects are written, keeping the positions in the output file
72 fileposes = []
73 for object in self.objects:
74 fileposes.append(file.tell())
75 file.write("%i 0 obj\n" % object.refno)
76 object.write(file, writer, self)
77 file.write("endobj\n")
79 # xref
80 xrefpos = file.tell()
81 file.write("xref\n"
82 "0 %d\n"
83 "0000000000 65535 f \n" % refno)
85 for filepos in fileposes:
86 file.write("%010i 00000 n \n" % filepos)
88 # trailer
89 file.write("trailer\n"
90 "<<\n"
91 "/Size %i\n" % refno)
92 file.write("/Root %i 0 R\n" % self.getrefno(catalog))
93 file.write("/Info %i 0 R\n" % self.getrefno(catalog.PDFinfo))
94 file.write(">>\n"
95 "startxref\n"
96 "%i\n" % xrefpos)
97 file.write("%%EOF\n")
99 def addresource(self, resourcetype, resourcename, object, procset=None):
100 self.resources.setdefault(resourcetype, {})[resourcename] = object
101 if procset:
102 self.procsets[procset] = 1
104 def writeresources(self, file):
105 file.write("/Resources <<\n")
106 file.write("/ProcSet [ %s ]\n" % " ".join(["/%s" % p for p in self.procsets.keys()]))
107 if self.resources:
108 for resourcetype, resources in self.resources.items():
109 file.write("/%s <<\n%s\n>>\n" % (resourcetype, "\n".join(["/%s %i 0 R" % (name, self.getrefno(object))
110 for name, object in resources.items()])))
111 file.write(">>\n")
114 class PDFobject:
116 def __init__(self, type, _id=None):
117 """create a PDFobject
118 - type has to be a string describing the type of the object
119 - _id is a unique identification used for the object if it is not None.
120 Otherwise id(self) is used
122 self.type = type
123 if _id is None:
124 self.id = id(self)
125 else:
126 self.id = _id
128 def merge(self, other):
129 pass
131 def write(self, file, writer, registry):
132 raise NotImplementedError("write method has to be provided by PDFobject subclass")
135 class PDFcatalog(PDFobject):
137 def __init__(self, document, writer, registry):
138 PDFobject.__init__(self, "catalog")
139 self.PDFpages = PDFpages(document, writer, registry)
140 registry.add(self.PDFpages)
141 self.PDFinfo = PDFinfo()
142 registry.add(self.PDFinfo)
144 def write(self, file, writer, registry):
145 file.write("<<\n"
146 "/Type /Catalog\n"
147 "/Pages %i 0 R\n" % registry.getrefno(self.PDFpages))
148 if writer.fullscreen:
149 file.write("/PageMode /FullScreen\n")
150 file.write(">>\n")
153 class PDFinfo(PDFobject):
155 def __init__(self):
156 PDFobject.__init__(self, "info")
158 def write(self, file, writer, registry):
159 if time.timezone < 0:
160 # divmod on positive numbers, otherwise the minutes have a different sign from the hours
161 timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
162 elif time.timezone > 0:
163 timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
164 else:
165 timezone = "Z00'00'"
167 def pdfstring(s):
168 r = ""
169 for c in s:
170 if 32 <= ord(c) <= 127 and c not in "()[]<>\\":
171 r += c
172 else:
173 r += "\\%03o" % ord(c)
174 return r
176 file.write("<<\n")
177 if writer.title:
178 file.write("/Title (%s)\n" % pdfstring(writer.title))
179 if writer.author:
180 file.write("/Author (%s)\n" % pdfstring(writer.author))
181 if writer.subject:
182 file.write("/Subject (%s)\n" % pdfstring(writer.subject))
183 if writer.keywords:
184 file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
185 file.write("/Creator (PyX %s)\n" % version.version)
186 file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
187 file.write(">>\n")
190 class PDFpages(PDFobject):
192 def __init__(self, document, writer, registry):
193 PDFobject.__init__(self, "pages")
194 self.PDFpagelist = []
195 for pageno, page in enumerate(document.pages):
196 page = PDFpage(page, pageno, self, writer, registry)
197 registry.add(page)
198 self.PDFpagelist.append(page)
200 def write(self, file, writer, registry):
201 file.write("<<\n"
202 "/Type /Pages\n"
203 "/Kids [%s]\n"
204 "/Count %i\n"
205 ">>\n" % (" ".join(["%i 0 R" % registry.getrefno(page)
206 for page in self.PDFpagelist]),
207 len(self.PDFpagelist)))
210 class PDFpage(PDFobject):
212 def __init__(self, page, pageno, PDFpages, writer, registry):
213 PDFobject.__init__(self, "page")
214 self.PDFpages = PDFpages
215 self.page = page
217 # every page uses its own registry in order to find out which
218 # resources are used within the page. However, the
219 # pageregistry is also merged in the global registry
220 self.pageregistry = PDFregistry()
222 self.PDFcontent = PDFcontent(page, writer, self.pageregistry)
223 self.pageregistry.add(self.PDFcontent)
224 registry.mergeregistry(self.pageregistry)
226 def write(self, file, writer, registry):
227 file.write("<<\n"
228 "/Type /Page\n"
229 "/Parent %i 0 R\n" % registry.getrefno(self.PDFpages))
230 paperformat = self.page.paperformat
231 if paperformat:
232 file.write("/MediaBox [0 0 %f %f]\n" % (unit.topt(paperformat.width), unit.topt(paperformat.height)))
233 else:
234 file.write("/MediaBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
235 if self.PDFcontent.bbox and writer.writebbox:
236 file.write("/CropBox [%f %f %f %f]\n" % self.PDFcontent.bbox.highrestuple_pt())
237 if self.page.rotated:
238 file.write("/Rotate 90\n")
239 file.write("/Contents %i 0 R\n" % registry.getrefno(self.PDFcontent))
240 self.pageregistry.writeresources(file)
241 file.write(">>\n")
244 class PDFcontent(PDFobject):
246 def __init__(self, page, writer, registry):
247 PDFobject.__init__(self, registry, "content")
248 contentfile = cStringIO.StringIO()
249 self.bbox = bbox.empty()
250 acontext = context()
251 page.processPDF(contentfile, writer, acontext, registry, self.bbox)
252 self.content = contentfile.getvalue()
253 contentfile.close()
255 def write(self, file, writer, registry):
256 if writer.compress:
257 content = zlib.compress(self.content)
258 else:
259 content = self.content
260 file.write("<<\n"
261 "/Length %i\n" % len(content))
262 if writer.compress:
263 file.write("/Filter /FlateDecode\n")
264 file.write(">>\n"
265 "stream\n")
266 file.write(content)
267 file.write("endstream\n")
270 class PDFwriter:
272 def __init__(self, document, file,
273 title=None, author=None, subject=None, keywords=None,
274 fullscreen=0, writebbox=0, compress=1, compresslevel=6,
275 stripfonts=True, textaspath=False):
276 self._fontmap = None
278 self.title = title
279 self.author = author
280 self.subject = subject
281 self.keywords = keywords
282 self.fullscreen = fullscreen
283 self.writebbox = writebbox
284 if compress and not haszlib:
285 compress = 0
286 warnings.warn("compression disabled due to missing zlib module")
287 self.compress = compress
288 self.compresslevel = compresslevel
289 self.stripfonts = stripfonts
290 self.textaspath = textaspath
292 # the PDFcatalog class automatically builds up the pdfobjects from a document
293 registry = PDFregistry()
294 catalog = PDFcatalog(document, self, registry)
295 registry.add(catalog)
297 file.write("%%PDF-1.4\n%%%s%s%s%s\n" % (chr(195), chr(182), chr(195), chr(169)))
298 registry.write(file, self, catalog)
299 file.close()
301 def getfontmap(self):
302 if self._fontmap is None:
303 # late import due to cyclic dependency
304 from pyx.dvi import mapfile
305 fontmapfiles = config.get("text", "pdffontmaps", "pdftex.map")
306 separator = config.get("general", "separator", "|")
307 self._fontmap = mapfile.readfontmap(fontmapfiles.split(separator))
308 return self._fontmap
311 class context:
313 def __init__(self):
314 self.linewidth_pt = None
315 # XXX there are both stroke and fill color spaces
316 self.colorspace = None
317 self.strokeattr = 1
318 self.fillattr = 1
319 self.selectedfont = None
320 self.textregion = 0
321 # dictionary mapping font names to dictionaries mapping encoding names to encodings
322 # encodings themselves are mappings from glyphnames to codepoints
323 self.encodings = {}
325 def __call__(self, **kwargs):
326 newcontext = copy.copy(self)
327 for key, value in kwargs.items():
328 setattr(newcontext, key, value)
329 return newcontext