enhance the pdf data extractor (reported by Gert Ingold)
[PyX/mjg.git] / pyx / font / t1font.py
blob09656507857e2bd3a2cb4bbc8cf9162c726d249c
1 #!/usr/bin/env python
2 # -*- coding: ISO-8859-1 -*-
5 # Copyright (C) 2005 André Wobst <wobsta@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import array, binascii, re
25 try:
26 import zlib
27 haszlib = 1
28 except ImportError:
29 haszlib = 0
31 from pyx import trafo
32 from pyx.path import path, moveto_pt, lineto_pt, curveto_pt, closepath
33 import encoding
35 try:
36 from _t1code import *
37 except:
38 from t1code import *
41 class T1context:
43 def __init__(self, t1font):
44 """context for T1cmd evaluation"""
45 self.t1font = t1font
47 # state description
48 self.x = None
49 self.y = None
50 self.wx = None
51 self.wy = None
52 self.t1stack = []
53 self.psstack = []
56 ######################################################################
57 # T1 commands
58 # Note, that all commands except for the T1value are variable-free and
59 # are thus implemented as instances.
61 class _T1cmd:
63 def __str__(self):
64 """returns a string representation of the T1 command"""
65 raise NotImplementedError
67 def updatepath(self, path, trafo, context):
68 """update path instance applying trafo to the points"""
69 raise NotImplementedError
71 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
72 """gather dependancy information
74 subrs is the "called-subrs" dictionary. gathercalls will insert the
75 subrnumber as key having the value 1, i.e. subrs.keys() will become the
76 numbers of used subrs. Similar seacglyphs will contain all glyphs in
77 composite characters (subrs and othersubrs for those glyphs will also
78 already be included) and othersubrs the othersubrs called.
80 This method might will not properly update all information in the
81 context (especially consuming values from the stack) and will also skip
82 various tests for performance reasons. For most T1 commands it just
83 doesn't need to do anything.
84 """
85 pass
88 class T1value(_T1cmd):
90 def __init__(self, value):
91 self.value = value
93 def __str__(self):
94 return str(self.value)
96 def updatepath(self, path, trafo, context):
97 context.t1stack.append(self.value)
99 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
100 context.t1stack.append(self.value)
102 def __eq__(self, other):
103 # while we can compare the other commands, since they are instances,
104 # for T1value we need to compare its values
105 if isinstance(other, T1value):
106 return self.value == other.value
107 else:
108 return 0
111 # commands for starting and finishing
113 class _T1endchar(_T1cmd):
115 def __str__(self):
116 return "endchar"
118 def updatepath(self, path, trafo, context):
119 pass
121 T1endchar = _T1endchar()
124 class _T1hsbw(_T1cmd):
126 def __str__(self):
127 return "hsbw"
129 def updatepath(self, path, trafo, context):
130 sbx = context.t1stack.pop(0)
131 wx = context.t1stack.pop(0)
132 path.append(moveto_pt(*trafo.apply_pt(sbx, 0)))
133 context.x = sbx
134 context.y = 0
135 context.wx = wx
136 context.wy = 0
138 T1hsbw = _T1hsbw()
141 class _T1seac(_T1cmd):
143 def __str__(self):
144 return "seac"
146 def updatepath(self, path, atrafo, context):
147 sab = context.t1stack.pop(0)
148 adx = context.t1stack.pop(0)
149 ady = context.t1stack.pop(0)
150 bchar = context.t1stack.pop(0)
151 achar = context.t1stack.pop(0)
152 for cmd in context.t1font.getglyphcmds(encoding.adobestandardencoding.decode(bchar)):
153 cmd.updatepath(path, atrafo, context)
154 atrafo = atrafo * trafo.translate_pt(adx-sab, ady)
155 for cmd in context.t1font.getglyphcmds(encoding.adobestandardencoding.decode(achar)):
156 cmd.updatepath(path, atrafo, context)
158 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
159 bchar = context.t1stack.pop()
160 achar = context.t1stack.pop()
161 aglyph = encoding.adobestandardencoding.decode(achar)
162 bglyph = encoding.adobestandardencoding.decode(bchar)
163 seacglyphs[aglyph] = 1
164 seacglyphs[bglyph] = 1
165 for cmd in context.t1font.getglyphcmds(bglyph):
166 cmd.gathercalls(seacglyphs, subrs, othersubrs, context)
167 for cmd in context.t1font.getglyphcmds(aglyph):
168 cmd.gathercalls(seacglyphs, subrs, othersubrs, context)
170 T1seac = _T1seac()
173 class _T1sbw(_T1cmd):
175 def __str__(self):
176 return "sbw"
178 def updatepath(self, path, trafo, context):
179 sbx = context.t1stack.pop(0)
180 sby = context.t1stack.pop(0)
181 wx = context.t1stack.pop(0)
182 wy = context.t1stack.pop(0)
183 path.append(moveto_pt(*trafo.apply_pt(sbx, sby)))
184 context.x = sbx
185 context.y = sby
186 context.wx = wx
187 context.wy = wy
189 T1sbw = _T1sbw()
192 # path construction commands
194 class _T1closepath(_T1cmd):
196 def __str__(self):
197 return "closepath"
199 def updatepath(self, path, trafo, context):
200 path.append(closepath())
201 # The closepath in T1 is different from PostScripts in that it does
202 # *not* modify the current position; hence we need to add an additional
203 # moveto here ...
204 path.append(moveto_pt(*trafo.apply_pt(context.x, context.y)))
206 T1closepath = _T1closepath()
209 class _T1hlineto(_T1cmd):
211 def __str__(self):
212 return "hlineto"
214 def updatepath(self, path, trafo, context):
215 dx = context.t1stack.pop(0)
216 path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y)))
217 context.x += dx
219 T1hlineto = _T1hlineto()
222 class _T1hmoveto(_T1cmd):
224 def __str__(self):
225 return "hmoveto"
227 def updatepath(self, path, trafo, context):
228 dx = context.t1stack.pop(0)
229 path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y)))
230 context.x += dx
232 T1hmoveto = _T1hmoveto()
235 class _T1hvcurveto(_T1cmd):
237 def __str__(self):
238 return "hvcurveto"
240 def updatepath(self, path, trafo, context):
241 dx1 = context.t1stack.pop(0)
242 dx2 = context.t1stack.pop(0)
243 dy2 = context.t1stack.pop(0)
244 dy3 = context.t1stack.pop(0)
245 path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1, context.y) +
246 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2) +
247 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2 + dy3))))
248 context.x += dx1+dx2
249 context.y += dy2+dy3
251 T1hvcurveto = _T1hvcurveto()
254 class _T1rlineto(_T1cmd):
256 def __str__(self):
257 return "rlineto"
259 def updatepath(self, path, trafo, context):
260 dx = context.t1stack.pop(0)
261 dy = context.t1stack.pop(0)
262 path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
263 context.x += dx
264 context.y += dy
266 T1rlineto = _T1rlineto()
269 class _T1rmoveto(_T1cmd):
271 def __str__(self):
272 return "rmoveto"
274 def updatepath(self, path, trafo, context):
275 dx = context.t1stack.pop(0)
276 dy = context.t1stack.pop(0)
277 path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
278 context.x += dx
279 context.y += dy
281 T1rmoveto = _T1rmoveto()
284 class _T1rrcurveto(_T1cmd):
286 def __str__(self):
287 return "rrcurveto"
289 def updatepath(self, path, trafo, context):
290 dx1 = context.t1stack.pop(0)
291 dy1 = context.t1stack.pop(0)
292 dx2 = context.t1stack.pop(0)
293 dy2 = context.t1stack.pop(0)
294 dx3 = context.t1stack.pop(0)
295 dy3 = context.t1stack.pop(0)
296 path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1, context.y + dy1) +
297 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy1 + dy2) +
298 trafo.apply_pt(context.x + dx1 + dx2 + dx3, context.y + dy1 + dy2 + dy3))))
299 context.x += dx1+dx2+dx3
300 context.y += dy1+dy2+dy3
302 T1rrcurveto = _T1rrcurveto()
305 class _T1vlineto(_T1cmd):
307 def __str__(self):
308 return "vlineto"
310 def updatepath(self, path, trafo, context):
311 dy = context.t1stack.pop(0)
312 path.append(lineto_pt(*trafo.apply_pt(context.x, context.y + dy)))
313 context.y += dy
315 T1vlineto = _T1vlineto()
318 class _T1vmoveto(_T1cmd):
320 def __str__(self):
321 return "vmoveto"
323 def updatepath(self, path, trafo, context):
324 dy = context.t1stack.pop(0)
325 path.append(moveto_pt(*trafo.apply_pt(context.x, context.y + dy)))
326 context.y += dy
328 T1vmoveto = _T1vmoveto()
331 class _T1vhcurveto(_T1cmd):
333 def __str__(self):
334 return "vhcurveto"
336 def updatepath(self, path, trafo, context):
337 dy1 = context.t1stack.pop(0)
338 dx2 = context.t1stack.pop(0)
339 dy2 = context.t1stack.pop(0)
340 dx3 = context.t1stack.pop(0)
341 path.append(curveto_pt(*(trafo.apply_pt(context.x, context.y + dy1) +
342 trafo.apply_pt(context.x + dx2, context.y + dy1 + dy2) +
343 trafo.apply_pt(context.x + dx2 + dx3, context.y + dy1 + dy2))))
344 context.x += dx2+dx3
345 context.y += dy1+dy2
347 T1vhcurveto = _T1vhcurveto()
350 # hint commands
352 class _T1dotsection(_T1cmd):
354 def __str__(self):
355 return "dotsection"
357 def updatepath(self, path, trafo, context):
358 pass
360 T1dotsection = _T1dotsection()
363 class _T1hstem(_T1cmd):
365 def __str__(self):
366 return "hstem"
368 def updatepath(self, path, trafo, context):
369 y = context.t1stack.pop(0)
370 dy = context.t1stack.pop(0)
372 T1hstem = _T1hstem()
375 class _T1hstem3(_T1cmd):
377 def __str__(self):
378 return "hstem3"
380 def updatepath(self, path, trafo, context):
381 y0 = context.t1stack.pop(0)
382 dy0 = context.t1stack.pop(0)
383 y1 = context.t1stack.pop(0)
384 dy1 = context.t1stack.pop(0)
385 y2 = context.t1stack.pop(0)
386 dy2 = context.t1stack.pop(0)
388 T1hstem3 = _T1hstem3()
391 class _T1vstem(_T1cmd):
393 def __str__(self):
394 return "hstem"
396 def updatepath(self, path, trafo, context):
397 x = context.t1stack.pop(0)
398 dx = context.t1stack.pop(0)
400 T1vstem = _T1vstem()
403 class _T1vstem3(_T1cmd):
405 def __str__(self):
406 return "hstem3"
408 def updatepath(self, path, trafo, context):
409 self.x0 = context.t1stack.pop(0)
410 self.dx0 = context.t1stack.pop(0)
411 self.x1 = context.t1stack.pop(0)
412 self.dx1 = context.t1stack.pop(0)
413 self.x2 = context.t1stack.pop(0)
414 self.dx2 = context.t1stack.pop(0)
416 T1vstem3 = _T1vstem3()
419 # arithmetic command
421 class _T1div(_T1cmd):
423 def __str__(self):
424 return "div"
426 def updatepath(self, path, trafo, context):
427 num2 = context.t1stack.pop()
428 num1 = context.t1stack.pop()
429 context.t1stack.append(divmod(num1, num2)[0])
431 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
432 num2 = context.t1stack.pop()
433 num1 = context.t1stack.pop()
434 context.t1stack.append(divmod(num1, num2)[0])
436 T1div = _T1div()
439 # subroutine commands
441 class _T1callothersubr(_T1cmd):
443 def __str__(self):
444 return "callothersubr"
446 def updatepath(self, path, trafo, context):
447 othersubrnumber = context.t1stack.pop()
448 n = context.t1stack.pop()
449 for i in range(n):
450 context.psstack.append(context.t1stack.pop())
452 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
453 othersubrnumber = context.t1stack.pop()
454 othersubrs[othersubrnumber] = 1
455 n = context.t1stack.pop()
456 for i in range(n):
457 context.psstack.append(context.t1stack.pop())
459 T1callothersubr = _T1callothersubr()
462 class _T1callsubr(_T1cmd):
464 def __str__(self):
465 return "callsubr"
467 def updatepath(self, path, trafo, context):
468 subrnumber = context.t1stack.pop()
469 for cmd in context.t1font.getsubrcmds(subrnumber):
470 cmd.updatepath(path, trafo, context)
472 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
473 subrnumber = context.t1stack.pop()
474 subrs[subrnumber] = 1
475 for cmd in context.t1font.getsubrcmds(subrnumber):
476 cmd.gathercalls(seacglyphs, subrs, othersubrs, context)
478 T1callsubr = _T1callsubr()
481 class _T1pop(_T1cmd):
483 def __str__(self):
484 return "pop"
486 def updatepath(self, path, trafo, context):
487 context.t1stack.append(context.psstack.pop())
489 def gathercalls(self, seacglyphs, subrs, othersubrs, context):
490 context.t1stack.append(context.psstack.pop())
492 T1pop = _T1pop()
495 class _T1return(_T1cmd):
497 def __str__(self):
498 return "return"
500 def updatepath(self, path, trafo, context):
501 pass
503 T1return = _T1return()
506 class _T1setcurrentpoint(_T1cmd):
508 def __str__(self):
509 return "setcurrentpoint" % self.x, self.y
511 def updatepath(self, path, trafo, context):
512 x = context.t1stack.pop(0)
513 y = context.t1stack.pop(0)
514 path.append(moveto_pt(*trafo.apply_pt(x, y)))
515 context.x = x
516 context.y = y
518 T1setcurrentpoint = _T1setcurrentpoint()
521 ######################################################################
523 class cursor:
524 """cursor to read a string token by token"""
526 def __init__(self, data, startstring, eattokensep=1, tokenseps=" \t\r\n"):
527 """creates a cursor for the string data
529 startstring is a string at which the cursor should start at. The first
530 ocurance of startstring is used. When startstring is not in data, an
531 exception is raised, otherwise the cursor is set to the position right
532 after the startstring. When eattokenseps is set, startstring must be
533 followed by a tokensep and this first tokensep is also consumed.
534 tokenseps is a string containing characters to be used as token
535 separators.
537 self.data = data
538 self.pos = data.index(startstring) + len(startstring)
539 self.tokenseps = tokenseps
540 if eattokensep:
541 if self.data[self.pos] not in self.tokenseps:
542 raise ValueError("cursor initialization string is not followed by a token separator")
543 self.pos += 1
545 def gettoken(self):
546 """get the next token
548 Leading token separators are silently consumed. The first token
549 separator after the token is also silently consumed."""
550 while self.data[self.pos] in self.tokenseps:
551 self.pos += 1
552 startpos = self.pos
553 while self.data[self.pos] not in self.tokenseps:
554 self.pos += 1
555 self.pos += 1 # consume a single tokensep
556 return self.data[startpos: self.pos-1]
558 def getint(self):
559 """get the next token as an integer"""
560 return int(self.gettoken())
562 def getbytes(self, count):
563 """get the next count bytes"""
564 startpos = self.pos
565 self.pos += count
566 return self.data[startpos: self.pos]
569 class T1font:
571 eexecr = 55665
572 charstringr = 4330
574 def __init__(self, data1, data2eexec, data3):
575 """initializes a t1font instance
577 data1 and data3 are the two clear text data parts and data2 is
578 the binary data part"""
579 self.data1 = data1
580 self.data2eexec = data2eexec
581 self.data3 = data3
583 # marker and value for decoded data
584 self.data2 = None
586 # marker and value for standard encoding check
587 self.encoding = None
589 def _eexecdecode(self, code):
590 """eexec decoding of code"""
591 return decoder(code, self.eexecr, 4)
593 def _charstringdecode(self, code):
594 """charstring decoding of code"""
595 return decoder(code, self.charstringr, self.lenIV)
597 def _eexecencode(self, data):
598 """eexec encoding of data"""
599 return encoder(data, self.eexecr, "PyX!")
601 def _charstringencode(self, data):
602 """eexec encoding of data"""
603 return encoder(data, self.charstringr, "PyX!"[:self.lenIV])
605 lenIVpattern = re.compile("/lenIV\s+(\d+)\s+def\s+")
606 flexhintsubrs = [[T1value(3), T1value(0), T1callothersubr, T1pop, T1pop, T1setcurrentpoint, T1return],
607 [T1value(0), T1value(1), T1callothersubr, T1return],
608 [T1value(0), T1value(2), T1callothersubr, T1return],
609 [T1return]]
611 def _encoding(self):
612 c = cursor(self.data1, "/Encoding")
613 token1 = c.gettoken()
614 token2 = c.gettoken()
615 if token1 == "StandardEncoding" and token2 == "def":
616 self.encoding = encoding.adobestandardencoding
617 else:
618 encvector = [None]*255
619 while 1:
620 self.encodingstart = c.pos
621 if c.gettoken() == "dup":
622 break
623 while 1:
624 i = c.getint()
625 glyph = c.gettoken()
626 if 0 <= i < 256:
627 encvector[i] = glyph[1:]
628 token = c.gettoken(); assert token == "put"
629 self.encodingend = c.pos
630 token = c.gettoken()
631 if token == "readonly" or token == "def":
632 break
633 assert token == "dup"
634 self.encoding = encoding.encoding(encvector)
636 def _data2decode(self):
637 """decodes data2eexec to the data2 string and the subr and glyphs dictionary
639 It doesn't make sense to call this method twice -- check the content of
640 data2 before calling. The method also keeps the subrs and charstrings
641 start and end positions for later replacement by stripped data.
644 self.data2 = self._eexecdecode(self.data2eexec)
646 m = self.lenIVpattern.search(self.data2)
647 if m:
648 self.lenIV = int(m.group(1))
649 else:
650 self.lenIV = 4
651 self.emptysubr = self._charstringencode(chr(11))
653 # extract Subrs
654 c = cursor(self.data2, "/Subrs")
655 self.subrsstart = c.pos
656 arraycount = c.getint()
657 token = c.gettoken(); assert token == "array"
658 self.subrs = []
659 for i in range(arraycount):
660 token = c.gettoken(); assert token == "dup"
661 token = c.getint(); assert token == i
662 size = c.getint()
663 if not i:
664 self.subrrdtoken = c.gettoken()
665 else:
666 token = c.gettoken(); assert token == self.subrrdtoken
667 self.subrs.append(c.getbytes(size))
668 token = c.gettoken()
669 if token == "noaccess":
670 token = "%s %s" % (token, c.gettoken())
671 if not i:
672 self.subrnptoken = token
673 else:
674 assert token == self.subrnptoken
675 self.subrsend = c.pos
677 # hasflexhintsubrs is a boolean indicating that the font uses flex or
678 # hint replacement subrs as specified by Adobe (tm). When it does, the
679 # first 4 subrs should all be copied except when none of them are used
680 # in the stripped version of the font since we than get a font not
681 # using flex or hint replacement subrs at all.
682 self.hasflexhintsubrs = (arraycount >= len(self.flexhintsubrs) and
683 [self.getsubrcmds(i)
684 for i in range(len(self.flexhintsubrs))] == self.flexhintsubrs)
686 # extract glyphs
687 self.glyphs = {}
688 self.glyphlist = [] # we want to keep the order of the glyph names
689 c = cursor(self.data2, "/CharStrings")
690 self.charstingsstart = c.pos
691 c.getint()
692 token = c.gettoken(); assert token == "dict"
693 token = c.gettoken(); assert token == "dup"
694 token = c.gettoken(); assert token == "begin"
695 first = 1
696 while 1:
697 chartoken = c.gettoken()
698 if chartoken == "end":
699 break
700 assert chartoken[0] == "/"
701 size = c.getint()
702 if first:
703 self.glyphrdtoken = c.gettoken()
704 else:
705 token = c.gettoken(); assert token == self.glyphrdtoken
706 self.glyphlist.append(chartoken[1:])
707 self.glyphs[chartoken[1:]] = c.getbytes(size)
708 if first:
709 self.glyphndtoken = c.gettoken()
710 else:
711 token = c.gettoken(); assert token == self.glyphndtoken
712 first = 0
713 self.charstingsend = c.pos
714 assert not self.subrs or self.subrrdtoken == self.glyphrdtoken
716 def _cmds(self, code):
717 """return a list of T1cmd's for encoded charstring data in code"""
718 code = array.array("B", self._charstringdecode(code))
719 cmds = []
720 while code:
721 x = code.pop(0)
722 if 0 <= x < 32: # those are cmd's
723 try:
724 cmds.append({1: T1hstem,
725 3: T1vstem,
726 4: T1vmoveto,
727 5: T1rlineto,
728 6: T1hlineto,
729 7: T1vlineto,
730 8: T1rrcurveto,
731 9: T1closepath,
732 10: T1callsubr,
733 11: T1return,
734 13: T1hsbw,
735 14: T1endchar,
736 21: T1rmoveto,
737 22: T1hmoveto,
738 30: T1vhcurveto,
739 31: T1hvcurveto}[x])
740 except KeyError:
741 if x == 12: # this starts an escaped cmd
742 x = code.pop(0)
743 try:
744 cmds.append({0: T1dotsection,
745 1: T1vstem3,
746 2: T1hstem3,
747 6: T1seac,
748 7: T1sbw,
749 12: T1div,
750 16: T1callothersubr,
751 17: T1pop,
752 33: T1setcurrentpoint}[x])
753 except KeyError:
754 raise ValueError("invalid escaped command %d" % x)
755 else:
756 raise ValueError("invalid command %d" % x)
757 elif 32 <= x <= 246: # short ints
758 cmds.append(T1value(x-139))
759 elif 247 <= x <= 250: # mid size ints
760 cmds.append(T1value(((x - 247)*256) + code.pop(0) + 108))
761 elif 251 <= x <= 254: # mid size ints
762 cmds.append(T1value(-((x - 251)*256) - code.pop(0) - 108))
763 else: # x = 255, i.e. full size ints
764 y = ((code.pop(0)*256+code.pop(0))*256+code.pop(0))*256+code.pop(0)
765 if y > (1l << 31):
766 cmds.append(T1value(y - (1l << 32)))
767 else:
768 cmds.append(T1value(y))
769 return cmds
771 def getsubrcmds(self, n):
772 """return a list of T1cmd's for subr n"""
773 if not self.data2:
774 self._data2decode()
775 return self._cmds(self.subrs[n])
777 def getglyphcmds(self, glyph):
778 """return a list of T1cmd's for glyph glyph"""
779 if not self.data2:
780 self._data2decode()
781 return self._cmds(self.glyphs[glyph])
783 fontmatrixpattern = re.compile("/FontMatrix\s*\[\s*(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s*\]\s*(readonly\s+)?def")
785 def getglyphpath(self, glyph, size):
786 """return a PyX path for glyph named glyph"""
787 m = self.fontmatrixpattern.search(self.data1)
788 m11, m12, m21, m22, v1, v2 = map(float, m.groups()[:6])
789 t = trafo.trafo_pt(matrix=((m11, m12), (m21, m22)), vector=(v1, v2)).scaled(size)
790 context = T1context(self)
791 p = path()
792 for cmd in self.getglyphcmds(glyph):
793 cmd.updatepath(p, t, context)
794 p.wx_pt, p.wy_pt = t.apply_pt(context.wx, context.wy)
795 return p
797 newlinepattern = re.compile("\s*[\r\n]\s*")
798 uniqueidpattern = re.compile("/UniqueID\s+\d+\s+def\s+")
800 def getstrippedfont(self, glyphs):
801 """create a T1font instance containing only certain glyphs
803 glyphs is a list of glyph names to be contained.
806 # collect information about used glyphs and subrs
807 seacglyphs = {}
808 subrs = {}
809 othersubrs = {}
810 for glyph in glyphs:
811 context = T1context(self)
812 for cmd in self.getglyphcmds(glyph):
813 cmd.gathercalls(seacglyphs, subrs, othersubrs, context)
814 for glyph in seacglyphs.keys():
815 if glyph not in glyphs:
816 glyphs.append(glyph)
817 if ".notdef" not in glyphs:
818 glyphs.append(".notdef")
820 # strip subrs to those actually used
821 subrs = subrs.keys()
822 subrs.sort()
823 if subrs:
824 if self.hasflexhintsubrs and subrs[0] < len(self.flexhintsubrs):
825 # According to the spec we need to keep all the flex and hint subrs
826 # as long as any of it is used.
827 while subrs and subrs[0] < len(self.flexhintsubrs):
828 del subrs[0]
829 subrs = list(range(len(self.flexhintsubrs))) + subrs
830 count = subrs[-1]+1
831 else:
832 count = 0
833 strippedsubrs = ["%d array\n" % count]
834 for subr in range(count):
835 if subr in subrs:
836 code = self.subrs[subr]
837 else:
838 code = self.emptysubr
839 strippedsubrs.append("dup %d %d %s %s %s\n" % (subr, len(code), self.subrrdtoken, code, self.subrnptoken))
840 strippedsubrs = "".join(strippedsubrs)
842 # strip charstrings (i.e. glyphs) to those actually used
843 strippedcharstrings = ["%d dict dup begin\n" % len(glyphs)]
844 for glyph in self.glyphlist:
845 if glyph in glyphs:
846 strippedcharstrings.append("/%s %d %s %s %s\n" % (glyph, len(self.glyphs[glyph]), self.glyphrdtoken, self.glyphs[glyph], self.glyphndtoken))
847 strippedcharstrings.append("end\n")
848 strippedcharstrings = "".join(strippedcharstrings)
850 # TODO: we could also strip othersubrs to those actually used
852 # strip data1
853 if not self.encoding:
854 self._encoding()
855 if self.encoding is encoding.adobestandardencoding:
856 data1 = self.data1
857 else:
858 encodingstrings = []
859 for char, glyph in enumerate(self.encoding.encvector):
860 if glyph in glyphs:
861 encodingstrings.append("dup %i /%s put\n" % (char, glyph))
862 data1 = self.data1[:self.encodingstart] + "".join(encodingstrings) + self.data1[self.encodingend:]
863 data1 = self.newlinepattern.subn("\n", data1)[0]
864 data1 = self.uniqueidpattern.subn("", data1)[0]
866 # strip data2
867 # TODO: in the future, for full control, we might want to write data2 as well as data1 and data3 from scratch
868 if self.subrsstart < self.charstingsstart:
869 data2 = self.data2[:self.charstingsstart] + strippedcharstrings + self.data2[self.charstingsend:]
870 data2 = data2[:self.subrsstart] + strippedsubrs + data2[self.subrsend:]
871 else:
872 data2 = self.data2[:self.subrsstart] + strippedsubrs + self.data2[self.subrsend:]
873 data2 = data2[:self.charstingsstart] + strippedcharstrings + data2[self.charstingsend:]
874 data2 = self.uniqueidpattern.subn("", data2)[0]
876 # strip data3
877 data3 = self.newlinepattern.subn("\n", self.data3)[0]
879 # create and return the new font instance
880 return T1font(data1, self._eexecencode(data2), data3.rstrip())
882 def outputPS(self, file):
883 """output the PostScript code for the T1font to the file file"""
884 file.write(self.data1)
885 data2eexechex = binascii.b2a_hex(self.data2eexec)
886 linelength = 64
887 for i in range((len(data2eexechex)-1)/linelength + 1):
888 file.write(data2eexechex[i*linelength: i*linelength+linelength])
889 file.write("\n")
890 file.write(self.data3)
892 def getflags(self):
893 # As a simple heuristics we assume non-symbolic fonts if and only
894 # if the Adobe standard encoding is used. All other font flags are
895 # not specified here.
896 if not self.encoding:
897 self._encoding()
898 if self.encoding is encoding.adobestandardencoding:
899 return 32
900 return 4
902 def outputPDF(self, file, writer):
903 data3 = self.data3
904 # we might be allowed to skip the third part ...
905 if (data3.replace("\n", "")
906 .replace("\r", "")
907 .replace("\t", "")
908 .replace(" ", "")) == "0"*512 + "cleartomark":
909 data3 = ""
911 data = self.data1 + self.data2eexec + data3
912 if writer.compress and haszlib:
913 data = zlib.compress(data)
915 file.write("<<\n"
916 "/Length %d\n"
917 "/Length1 %d\n"
918 "/Length2 %d\n"
919 "/Length3 %d\n" % (len(data), len(self.data1), len(self.data2eexec), len(data3)))
920 if writer.compress and haszlib:
921 file.write("/Filter /FlateDecode\n")
922 file.write(">>\n"
923 "stream\n")
924 file.write(data)
925 file.write("\n"
926 "endstream\n")
929 class T1pfafont(T1font):
931 """create a T1font instance from a pfa font file"""
933 def __init__(self, filename):
934 d = open(filename, "rb").read()
935 # hey, that's quick'n'dirty
936 m1 = d.index("eexec") + 6
937 m2 = d.index("0"*40)
938 data1 = d[:m1]
939 data2 = binascii.a2b_hex(d[m1: m2].replace(" ", "").replace("\r", "").replace("\n", ""))
940 data3 = d[m2:]
941 T1font.__init__(self, data1, data2, data3)
944 class T1pfbfont(T1font):
946 """create a T1font instance from a pfb font file"""
948 def __init__(self, filename):
949 def pfblength(s):
950 if len(s) != 4:
951 raise ValueError("invalid string length")
952 return (ord(s[0]) +
953 ord(s[1])*256 +
954 ord(s[2])*256*256 +
955 ord(s[3])*256*256*256)
956 f = open(filename, "rb")
957 mark = f.read(2); assert mark == "\200\1"
958 data1 = f.read(pfblength(f.read(4)))
959 mark = f.read(2); assert mark == "\200\2"
960 data2 = ""
961 while mark == "\200\2":
962 data2 = data2 + f.read(pfblength(f.read(4)))
963 mark = f.read(2)
964 assert mark == "\200\1"
965 data3 = f.read(pfblength(f.read(4)))
966 mark = f.read(2); assert mark == "\200\3"
967 assert not f.read(1)
968 T1font.__init__(self, data1, data2, data3)