pyx/font/t1file.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2005-2011 André Wobst <wobsta@users.sourceforge.net>
   5 # Copyright (C) 2006-2011 Jörg Lehmann <joergl@users.sourceforge.net>
   6 #
   7 # This file is part of PyX (http://pyx.sourceforge.net/).
   8 #
   9 # PyX is free software; you can redistribute it and/or modify
  10 # it under the terms of the GNU General Public License as published by
  11 # the Free Software Foundation; either version 2 of the License, or
  12 # (at your option) any later version.
  13 #
  14 # PyX is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 # GNU General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU General Public License
  20 # along with PyX; if not, write to the Free Software
  21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  22
  23 import array, binascii, io, logging, math, re
  24 try:
  25     import zlib
  26     haszlib = True
  27 except ImportError:
  28     haszlib = False
  29
  30 logger = logging.getLogger("pyx")
  31
  32 from pyx import trafo, reader, writer
  33 from pyx.path import path, moveto_pt, lineto_pt, curveto_pt, closepath
  34
  35 try:
  36     from _t1code import *
  37 except:
  38     from .t1code import *
  39
  40
  41 adobestandardencoding = [None, None, None, None, None, None, None, None,
  42                          None, None, None, None, None, None, None, None,
  43                          None, None, None, None, None, None, None, None,
  44                          None, None, None, None, None, None, None, None,
  45                          "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright",
  46                          "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
  47                          "zero", "one", "two", "three", "four", "five", "six", "seven",
  48                          "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question",
  49                          "at", "A", "B", "C", "D", "E", "F", "G",
  50                          "H", "I", "J", "K", "L", "M", "N", "O",
  51                          "P", "Q", "R", "S", "T", "U", "V", "W",
  52                          "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "asciicircum", "underscore",
  53                          "quoteleft", "a", "b", "c", "d", "e", "f", "g",
  54                          "h", "i", "j", "k", "l", "m", "n", "o",
  55                          "p", "q", "r", "s", "t", "u", "v", "w",
  56                          "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", None,
  57                          None, None, None, None, None, None, None, None,
  58                          None, None, None, None, None, None, None, None,
  59                          None, None, None, None, None, None, None, None,
  60                          None, None, None, None, None, None, None, None,
  61                          None, "exclamdown", "cent", "sterling", "fraction", "yen", "florin", "section",
  62                          "currency", "quotesingle", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl",
  63                          None, "endash", "dagger", "daggerdbl", "periodcentered", None, "paragraph", "bullet",
  64                          "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", None, "questiondown",
  65                          None, "grave", "acute", "circumflex", "tilde", "macron", "breve", "dotaccent",
  66                          "dieresis", None, "ring", "cedilla", None, "hungarumlaut", "ogonek", "caron",
  67                          "emdash", None, None, None, None, None, None, None,
  68                          None, None, None, None, None, None, None, None,
  69                          None, "AE", None, "ordfeminine", None, None, None, None,
  70                          "Lslash", "Oslash", "OE", "ordmasculine", None, None, None, None,
  71                          None, "ae", None, None, None, "dotlessi", None, None,
  72                          "lslash", "oslash", "oe", "germandbls", None, None, None, None]
  73
  74 class T1context:
  75
  76     def __init__(self, t1font, flex=True):
  77         """context for T1cmd evaluation"""
  78         self.t1font = t1font
  79
  80         # state description
  81         self.x = None
  82         self.y = None
  83         self.wx = None
  84         self.wy = None
  85         self.t1stack = []
  86         self.psstack = []
  87         self.flex = flex
  88
  89
  90 ######################################################################
  91 # T1 commands
  92 # Note, that the T1 commands are variable-free except for plain number,
  93 # which are stored as integers. All other T1 commands exist as a single
  94 # instance only
  95
  96 T1cmds = {}
  97 T1subcmds = {}
  98
  99 class T1cmd:
 100
 101     def __init__(self, code, subcmd=0):
 102         self.code = code
 103         self.subcmd = subcmd
 104         if subcmd:
 105             T1subcmds[code] = self
 106         else:
 107             T1cmds[code] = self
 108
 109     def __str__(self):
 110         """returns a string representation of the T1 command"""
 111         raise NotImplementedError
 112
 113     def updatepath(self, path, trafo, context):
 114         """update path instance applying trafo to the points"""
 115         raise NotImplementedError
 116
 117     def gathercalls(self, seacglyphs, subrs, context):
 118         """gather dependancy information
 119
 120         subrs is the "called-subrs" dictionary. gathercalls will insert the
 121         subr number as key having the value 1, i.e. subrs will become the
 122         numbers of used subrs. Similar seacglyphs will contain all glyphs in
 123         composite characters (subrs for those glyphs will also
 124         already be included).
 125
 126         This method might will not properly update all information in the
 127         context (especially consuming values from the stack) and will also skip
 128         various tests for performance reasons. For most T1 commands it just
 129         doesn't need to do anything.
 130         """
 131         pass
 132
 133
 134 # commands for starting and finishing
 135
 136 class _T1endchar(T1cmd):
 137
 138     def __init__(self):
 139         T1cmd.__init__(self, 14)
 140
 141     def __str__(self):
 142         return "endchar"
 143
 144     def updatepath(self, path, trafo, context):
 145         pass
 146
 147 T1endchar = _T1endchar()
 148
 149
 150 class _T1hsbw(T1cmd):
 151
 152     def __init__(self):
 153         T1cmd.__init__(self, 13)
 154
 155     def __str__(self):
 156         return "hsbw"
 157
 158     def updatepath(self, path, trafo, context):
 159         sbx = context.t1stack.pop(0)
 160         wx = context.t1stack.pop(0)
 161         path.append(moveto_pt(*trafo.apply_pt(sbx, 0)))
 162         context.x = sbx
 163         context.y = 0
 164         context.wx = wx
 165         context.wy = 0
 166
 167 T1hsbw = _T1hsbw()
 168
 169
 170 class _T1seac(T1cmd):
 171
 172     def __init__(self):
 173         T1cmd.__init__(self, 6, subcmd=1)
 174
 175     def __str__(self):
 176         return "seac"
 177
 178     def updatepath(self, path, atrafo, context):
 179         sab = context.t1stack.pop(0)
 180         adx = context.t1stack.pop(0)
 181         ady = context.t1stack.pop(0)
 182         bchar = context.t1stack.pop(0)
 183         achar = context.t1stack.pop(0)
 184         aglyph = adobestandardencoding[achar]
 185         bglyph = adobestandardencoding[bchar]
 186         context.t1font.updateglyphpath(bglyph, path, atrafo, context)
 187         atrafo = atrafo * trafo.translate_pt(adx-sab, ady)
 188         context.t1font.updateglyphpath(aglyph, path, atrafo, context)
 189
 190     def gathercalls(self, seacglyphs, subrs, context):
 191         achar = context.t1stack.pop()
 192         bchar = context.t1stack.pop()
 193         aglyph = adobestandardencoding[achar]
 194         bglyph = adobestandardencoding[bchar]
 195         seacglyphs.add(aglyph)
 196         seacglyphs.add(bglyph)
 197         context.t1font.gatherglyphcalls(bglyph, seacglyphs, subrs, context)
 198         context.t1font.gatherglyphcalls(aglyph, seacglyphs, subrs, context)
 199
 200 T1seac = _T1seac()
 201
 202
 203 class _T1sbw(T1cmd):
 204
 205     def __init__(self):
 206         T1cmd.__init__(self, 7, subcmd=1)
 207
 208     def __str__(self):
 209         return "sbw"
 210
 211     def updatepath(self, path, trafo, context):
 212         sbx = context.t1stack.pop(0)
 213         sby = context.t1stack.pop(0)
 214         wx = context.t1stack.pop(0)
 215         wy = context.t1stack.pop(0)
 216         path.append(moveto_pt(*trafo.apply_pt(sbx, sby)))
 217         context.x = sbx
 218         context.y = sby
 219         context.wx = wx
 220         context.wy = wy
 221
 222 T1sbw = _T1sbw()
 223
 224
 225 # path construction commands
 226
 227 class _T1closepath(T1cmd):
 228
 229     def __init__(self):
 230         T1cmd.__init__(self, 9)
 231
 232     def __str__(self):
 233         return "closepath"
 234
 235     def updatepath(self, path, trafo, context):
 236         path.append(closepath())
 237         # The closepath in T1 is different from PostScripts in that it does
 238         # *not* modify the current position; hence we need to add an additional
 239         # moveto here ...
 240         path.append(moveto_pt(*trafo.apply_pt(context.x, context.y)))
 241
 242 T1closepath = _T1closepath()
 243
 244
 245 class _T1hlineto(T1cmd):
 246
 247     def __init__(self):
 248         T1cmd.__init__(self, 6)
 249
 250     def __str__(self):
 251         return "hlineto"
 252
 253     def updatepath(self, path, trafo, context):
 254         dx = context.t1stack.pop(0)
 255         path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y)))
 256         context.x += dx
 257
 258 T1hlineto = _T1hlineto()
 259
 260
 261 class _T1hmoveto(T1cmd):
 262
 263     def __init__(self):
 264         T1cmd.__init__(self, 22)
 265
 266     def __str__(self):
 267         return "hmoveto"
 268
 269     def updatepath(self, path, trafo, context):
 270         dx = context.t1stack.pop(0)
 271         path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y)))
 272         context.x += dx
 273
 274 T1hmoveto = _T1hmoveto()
 275
 276
 277 class _T1hvcurveto(T1cmd):
 278
 279     def __init__(self):
 280         T1cmd.__init__(self, 31)
 281
 282     def __str__(self):
 283         return "hvcurveto"
 284
 285     def updatepath(self, path, trafo, context):
 286         dx1 = context.t1stack.pop(0)
 287         dx2 = context.t1stack.pop(0)
 288         dy2 = context.t1stack.pop(0)
 289         dy3 = context.t1stack.pop(0)
 290         path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1,       context.y) +
 291                                  trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2) +
 292                                  trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2 + dy3))))
 293         context.x += dx1+dx2
 294         context.y += dy2+dy3
 295
 296 T1hvcurveto = _T1hvcurveto()
 297
 298
 299 class _T1rlineto(T1cmd):
 300
 301     def __init__(self):
 302         T1cmd.__init__(self, 5)
 303
 304     def __str__(self):
 305         return "rlineto"
 306
 307     def updatepath(self, path, trafo, context):
 308         dx = context.t1stack.pop(0)
 309         dy = context.t1stack.pop(0)
 310         path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
 311         context.x += dx
 312         context.y += dy
 313
 314 T1rlineto = _T1rlineto()
 315
 316
 317 class _T1rmoveto(T1cmd):
 318
 319     def __init__(self):
 320         T1cmd.__init__(self, 21)
 321
 322     def __str__(self):
 323         return "rmoveto"
 324
 325     def updatepath(self, path, trafo, context):
 326         dx = context.t1stack.pop(0)
 327         dy = context.t1stack.pop(0)
 328         path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
 329         context.x += dx
 330         context.y += dy
 331
 332 T1rmoveto = _T1rmoveto()
 333
 334
 335 class _T1rrcurveto(T1cmd):
 336
 337     def __init__(self):
 338         T1cmd.__init__(self, 8)
 339
 340     def __str__(self):
 341         return "rrcurveto"
 342
 343     def updatepath(self, path, trafo, context):
 344         dx1 = context.t1stack.pop(0)
 345         dy1 = context.t1stack.pop(0)
 346         dx2 = context.t1stack.pop(0)
 347         dy2 = context.t1stack.pop(0)
 348         dx3 = context.t1stack.pop(0)
 349         dy3 = context.t1stack.pop(0)
 350         path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1,             context.y + dy1) +
 351                                  trafo.apply_pt(context.x + dx1 + dx2,       context.y + dy1 + dy2) +
 352                                  trafo.apply_pt(context.x + dx1 + dx2 + dx3, context.y + dy1 + dy2 + dy3))))
 353         context.x += dx1+dx2+dx3
 354         context.y += dy1+dy2+dy3
 355
 356 T1rrcurveto = _T1rrcurveto()
 357
 358
 359 class _T1vlineto(T1cmd):
 360
 361     def __init__(self):
 362         T1cmd.__init__(self, 7)
 363
 364     def __str__(self):
 365         return "vlineto"
 366
 367     def updatepath(self, path, trafo, context):
 368         dy = context.t1stack.pop(0)
 369         path.append(lineto_pt(*trafo.apply_pt(context.x, context.y + dy)))
 370         context.y += dy
 371
 372 T1vlineto = _T1vlineto()
 373
 374
 375 class _T1vmoveto(T1cmd):
 376
 377     def __init__(self):
 378         T1cmd.__init__(self, 4)
 379
 380     def __str__(self):
 381         return "vmoveto"
 382
 383     def updatepath(self, path, trafo, context):
 384         dy = context.t1stack.pop(0)
 385         path.append(moveto_pt(*trafo.apply_pt(context.x, context.y + dy)))
 386         context.y += dy
 387
 388 T1vmoveto = _T1vmoveto()
 389
 390
 391 class _T1vhcurveto(T1cmd):
 392
 393     def __init__(self):
 394         T1cmd.__init__(self, 30)
 395
 396     def __str__(self):
 397         return "vhcurveto"
 398
 399     def updatepath(self, path, trafo, context):
 400         dy1 = context.t1stack.pop(0)
 401         dx2 = context.t1stack.pop(0)
 402         dy2 = context.t1stack.pop(0)
 403         dx3 = context.t1stack.pop(0)
 404         path.append(curveto_pt(*(trafo.apply_pt(context.x,             context.y + dy1) +
 405                                  trafo.apply_pt(context.x + dx2,       context.y + dy1 + dy2) +
 406                                  trafo.apply_pt(context.x + dx2 + dx3, context.y + dy1 + dy2))))
 407         context.x += dx2+dx3
 408         context.y += dy1+dy2
 409
 410 T1vhcurveto = _T1vhcurveto()
 411
 412
 413 # hint commands
 414
 415 class _T1dotsection(T1cmd):
 416
 417     def __init__(self):
 418         T1cmd.__init__(self, 0, subcmd=1)
 419
 420     def __str__(self):
 421         return "dotsection"
 422
 423     def updatepath(self, path, trafo, context):
 424         pass
 425
 426 T1dotsection = _T1dotsection()
 427
 428
 429 class _T1hstem(T1cmd):
 430
 431     def __init__(self):
 432         T1cmd.__init__(self, 1)
 433
 434     def __str__(self):
 435         return "hstem"
 436
 437     def updatepath(self, path, trafo, context):
 438         y = context.t1stack.pop(0)
 439         dy = context.t1stack.pop(0)
 440
 441 T1hstem = _T1hstem()
 442
 443
 444 class _T1hstem3(T1cmd):
 445
 446     def __init__(self):
 447         T1cmd.__init__(self, 2, subcmd=1)
 448
 449     def __str__(self):
 450         return "hstem3"
 451
 452     def updatepath(self, path, trafo, context):
 453         y0 = context.t1stack.pop(0)
 454         dy0 = context.t1stack.pop(0)
 455         y1 = context.t1stack.pop(0)
 456         dy1 = context.t1stack.pop(0)
 457         y2 = context.t1stack.pop(0)
 458         dy2 = context.t1stack.pop(0)
 459
 460 T1hstem3 = _T1hstem3()
 461
 462
 463 class _T1vstem(T1cmd):
 464
 465     def __init__(self):
 466         T1cmd.__init__(self, 3)
 467
 468     def __str__(self):
 469         return "vstem"
 470
 471     def updatepath(self, path, trafo, context):
 472         x = context.t1stack.pop(0)
 473         dx = context.t1stack.pop(0)
 474
 475 T1vstem = _T1vstem()
 476
 477
 478 class _T1vstem3(T1cmd):
 479
 480     def __init__(self):
 481         T1cmd.__init__(self, 1, subcmd=1)
 482
 483     def __str__(self):
 484         return "vstem3"
 485
 486     def updatepath(self, path, trafo, context):
 487         self.x0 = context.t1stack.pop(0)
 488         self.dx0 = context.t1stack.pop(0)
 489         self.x1 = context.t1stack.pop(0)
 490         self.dx1 = context.t1stack.pop(0)
 491         self.x2 = context.t1stack.pop(0)
 492         self.dx2 = context.t1stack.pop(0)
 493
 494 T1vstem3 = _T1vstem3()
 495
 496
 497 # arithmetic command
 498
 499 class _T1div(T1cmd):
 500
 501     def __init__(self):
 502         T1cmd.__init__(self, 12, subcmd=1)
 503
 504     def __str__(self):
 505         return "div"
 506
 507     def updatepath(self, path, trafo, context):
 508         num2 = context.t1stack.pop()
 509         num1 = context.t1stack.pop()
 510         context.t1stack.append(divmod(num1, num2)[0])
 511
 512     def gathercalls(self, seacglyphs, subrs, context):
 513         num2 = context.t1stack.pop()
 514         num1 = context.t1stack.pop()
 515         context.t1stack.append(divmod(num1, num2)[0])
 516
 517 T1div = _T1div()
 518
 519
 520 # subroutine commands
 521
 522 class _T1callothersubr(T1cmd):
 523
 524     def __init__(self):
 525         T1cmd.__init__(self, 16, subcmd=1)
 526
 527     def __str__(self):
 528         return "callothersubr"
 529
 530     def updatepath(self, path, trafo, context):
 531         othersubrnumber = context.t1stack.pop()
 532         n = context.t1stack.pop()
 533         for i in range(n):
 534             context.psstack.append(context.t1stack.pop(0))
 535         if othersubrnumber == 0:
 536             flex_size, x, y = context.psstack[-3:]
 537             if context.flex:
 538                 x1, y1, x2, y2, x3, y3 = context.psstack[2:8]
 539                 x1, y1 = trafo.apply_pt(x1, y1)
 540                 x2, y2 = trafo.apply_pt(x2, y2)
 541                 x3, y3 = trafo.apply_pt(x3, y3)
 542                 path.append(curveto_pt(x1, y1, x2, y2, x3, y3))
 543                 x1, y1, x2, y2, x3, y3 = context.psstack[8:14]
 544                 x1, y1 = trafo.apply_pt(x1, y1)
 545                 x2, y2 = trafo.apply_pt(x2, y2)
 546                 x3, y3 = trafo.apply_pt(x3, y3)
 547                 path.append(curveto_pt(x1, y1, x2, y2, x3, y3))
 548             else:
 549                 path.append(lineto_pt(*trafo.apply_pt(x, y)))
 550             context.psstack = [y, x]
 551         elif othersubrnumber == 1:
 552             pass
 553         elif othersubrnumber == 2:
 554             path.pathitems.pop()
 555             context.psstack.append(context.x)
 556             context.psstack.append(context.y)
 557
 558     def gathercalls(self, seacglyphs, subrs, context):
 559         othersubrnumber = context.t1stack.pop()
 560         n = context.t1stack.pop()
 561         context.psstack.extend([context.t1stack.pop() for i in range(n)][::-1])
 562
 563 T1callothersubr = _T1callothersubr()
 564
 565
 566 class _T1callsubr(T1cmd):
 567
 568     def __init__(self):
 569         T1cmd.__init__(self, 10)
 570
 571     def __str__(self):
 572         return "callsubr"
 573
 574     def updatepath(self, path, trafo, context):
 575         subr = context.t1stack.pop()
 576         context.t1font.updatesubrpath(subr, path, trafo, context)
 577
 578     def gathercalls(self, seacglyphs, subrs, context):
 579         subr = context.t1stack.pop()
 580         subrs.add(subr)
 581         context.t1font.gathersubrcalls(subr, seacglyphs, subrs, context)
 582
 583 T1callsubr = _T1callsubr()
 584
 585
 586 class _T1pop(T1cmd):
 587
 588     def __init__(self):
 589         T1cmd.__init__(self, 17, subcmd=1)
 590
 591     def __str__(self):
 592         return "pop"
 593
 594     def updatepath(self, path, trafo, context):
 595         context.t1stack.append(context.psstack.pop())
 596
 597     def gathercalls(self, seacglyphs, subrs, context):
 598         context.t1stack.append(context.psstack.pop())
 599
 600 T1pop = _T1pop()
 601
 602
 603 class _T1return(T1cmd):
 604
 605     def __init__(self):
 606         T1cmd.__init__(self, 11)
 607
 608     def __str__(self):
 609         return "return"
 610
 611     def updatepath(self, path, trafo, context):
 612         pass
 613
 614 T1return = _T1return()
 615
 616
 617 class _T1setcurrentpoint(T1cmd):
 618
 619     def __init__(self):
 620         T1cmd.__init__(self, 33, subcmd=1)
 621
 622     def __str__(self):
 623         return "setcurrentpoint"
 624
 625     def updatepath(self, path, trafo, context):
 626         context.x = context.t1stack.pop(0)
 627         context.y = context.t1stack.pop(0)
 628
 629 T1setcurrentpoint = _T1setcurrentpoint()
 630
 631
 632 ######################################################################
 633
 634 class T1file:
 635
 636     eexecr = 55665
 637     charstringr = 4330
 638
 639     fontnamepattern = re.compile("/FontName\s+/(.*?)\s+def\s+")
 640     fontmatrixpattern = re.compile("/FontMatrix\s*\[\s*(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s*\]\s*(readonly\s+)?def")
 641
 642     def __init__(self, data1, data2eexec, data3):
 643         """initializes a t1font instance
 644
 645         data1 and data3 are the two clear text data parts and data2 is
 646         the binary data part"""
 647         self.data1 = data1
 648         self._data2eexec = data2eexec
 649         self.data3 = data3
 650
 651         # marker and value for decoded data
 652         self._data2 = None
 653         # note that data2eexec is set to none by setsubrcmds and setglyphcmds
 654         # this *also* denotes, that data2 is out-of-date; hence they are both
 655         # marked by an _ and getdata2 and getdata2eexec will properly resolve
 656         # the current state of decoding ...
 657
 658         # marker and value for standard encoding check
 659         self.encoding = None
 660
 661         self.name, = self.fontnamepattern.search(self.data1).groups()
 662         m11, m12, m21, m22, v1, v2 = list(map(float, self.fontmatrixpattern.search(self.data1).groups()[:6]))
 663         self.fontmatrix = trafo.trafo_pt(matrix=((m11, m12), (m21, m22)), vector=(v1, v2))
 664
 665     def _eexecdecode(self, code):
 666         """eexec decoding of code"""
 667         return decoder(code, self.eexecr, 4)
 668
 669     def _charstringdecode(self, code):
 670         """charstring decoding of code"""
 671         return decoder(code, self.charstringr, self.lenIV)
 672
 673     def _eexecencode(self, data):
 674         """eexec encoding of data"""
 675         return encoder(data, self.eexecr, b"PyX!")
 676
 677     def _charstringencode(self, data):
 678         """eexec encoding of data"""
 679         return encoder(data, self.charstringr, b"PyX!"[:self.lenIV])
 680
 681     def _encoding(self):
 682         """helper method to lookup the encoding in the font"""
 683         c = reader.PStokenizer(self.data1, "/Encoding")
 684         token1 = c.gettoken()
 685         token2 = c.gettoken()
 686         if token1 == "StandardEncoding" and token2 == "def":
 687             self.encoding = adobestandardencoding
 688         else:
 689             self.encoding = [None]*256
 690             while True:
 691                 self.encodingstart = c.pos
 692                 if c.gettoken() == "dup":
 693                     break
 694             while True:
 695                 i = c.getint()
 696                 glyph = c.gettoken()
 697                 if 0 <= i < 256:
 698                     self.encoding[i] = glyph[1:]
 699                 token = c.gettoken(); assert token == "put"
 700                 self.encodingend = c.pos
 701                 token = c.gettoken()
 702                 if token == "readonly" or token == "def":
 703                     break
 704                 assert token == "dup"
 705
 706     lenIVpattern = re.compile(b"/lenIV\s+(\d+)\s+def\s+")
 707     flexhintsubrs = [[3, 0, T1callothersubr, T1pop, T1pop, T1setcurrentpoint, T1return],
 708                      [0, 1, T1callothersubr, T1return],
 709                      [0, 2, T1callothersubr, T1return],
 710                      [T1return]]
 711
 712     def _data2decode(self):
 713         """decodes data2eexec to the data2 string and the subr and glyphs dictionary
 714
 715         It doesn't make sense to call this method twice -- check the content of
 716         data2 before calling. The method also keeps the subrs and charstrings
 717         start and end positions for later use."""
 718         self._data2 = self._eexecdecode(self._data2eexec)
 719
 720         m = self.lenIVpattern.search(self._data2)
 721         if m:
 722             self.lenIV = int(m.group(1))
 723         else:
 724             self.lenIV = 4
 725
 726         self.emptysubr = self._charstringencode(b"\x0b") # 11, i.e. return
 727
 728         # extract Subrs
 729         c = reader.PSbytes_tokenizer(self._data2, b"/Subrs")
 730         self.subrsstart = c.pos
 731         arraycount = c.getint()
 732         token = c.gettoken(); assert token == b"array"
 733         self.subrs = []
 734         for i in range(arraycount):
 735             token = c.gettoken(); assert token == b"dup"
 736             token = c.getint(); assert token == i
 737             size = c.getint()
 738             if not i:
 739                 self.subrrdtoken = c.gettoken()
 740             else:
 741                 token = c.gettoken(); assert token == self.subrrdtoken
 742             self.subrs.append(c.getbytes(size))
 743             token = c.gettoken()
 744             if token == b"noaccess":
 745                 token = token + b" " + c.gettoken()
 746             if not i:
 747                 self.subrnptoken = token
 748             else:
 749                 assert token == self.subrnptoken
 750         self.subrsend = c.pos
 751
 752         # hasflexhintsubrs is a boolean indicating that the font uses flex or
 753         # hint replacement subrs as specified by Adobe (tm). When it does, the
 754         # first 4 subrs should all be copied except when none of them are used
 755         # in the stripped version of the font since we then get a font not
 756         # using flex or hint replacement subrs at all.
 757         self.hasflexhintsubrs = (arraycount >= len(self.flexhintsubrs) and
 758                                  [self.getsubrcmds(i)
 759                                   for i in range(len(self.flexhintsubrs))] == self.flexhintsubrs)
 760
 761         # extract glyphs
 762         self.glyphs = {}
 763         self.glyphlist = [] # we want to keep the order of the glyph names
 764         c = reader.PSbytes_tokenizer(self._data2, b"/CharStrings")
 765         self.charstringsstart = c.pos
 766         c.getint()
 767         token = c.gettoken(); assert token == b"dict"
 768         token = c.gettoken(); assert token == b"dup"
 769         token = c.gettoken(); assert token == b"begin"
 770         first = True
 771         while True:
 772             chartoken = c.gettoken().decode("ascii")
 773             if chartoken == "end":
 774                 break
 775             assert chartoken[0] == "/"
 776             size = c.getint()
 777             if first:
 778                 self.glyphrdtoken = c.gettoken()
 779             else:
 780                 token = c.gettoken(); assert token == self.glyphrdtoken
 781             self.glyphlist.append(chartoken[1:])
 782             self.glyphs[chartoken[1:]] = c.getbytes(size)
 783             if first:
 784                 self.glyphndtoken = c.gettoken()
 785             else:
 786                 token = c.gettoken(); assert token == self.glyphndtoken
 787             first = False
 788         self.charstringsend = c.pos
 789         assert not self.subrs or self.subrrdtoken == self.glyphrdtoken
 790
 791     def _cmds(self, code):
 792         """return a list of T1cmd's for encoded charstring data in code"""
 793         code = array.array("B", self._charstringdecode(code))
 794         cmds = []
 795         while code:
 796             x = code.pop(0)
 797             if x == 12: # this starts an escaped cmd
 798                 cmds.append(T1subcmds[code.pop(0)])
 799             elif 0 <= x < 32: # those are cmd's
 800                 cmds.append(T1cmds[x])
 801             elif 32 <= x <= 246: # short ints
 802                 cmds.append(x-139)
 803             elif 247 <= x <= 250: # mid size ints
 804                 cmds.append(((x - 247)*256) + code.pop(0) + 108)
 805             elif 251 <= x <= 254: # mid size ints
 806                 cmds.append(-((x - 251)*256) - code.pop(0) - 108)
 807             else: # x = 255, i.e. full size ints
 808                 y = ((code.pop(0)*256+code.pop(0))*256+code.pop(0))*256+code.pop(0)
 809                 if y > (1 << 31):
 810                     cmds.append(y - (1 << 32))
 811                 else:
 812                     cmds.append(y)
 813         return cmds
 814
 815     def _code(self, cmds):
 816         """return an encoded charstring data for list of T1cmd's in cmds"""
 817         code = array.array("B")
 818         for cmd in cmds:
 819             try:
 820                 if cmd.subcmd:
 821                     code.append(12)
 822                 code.append(cmd.code)
 823             except AttributeError:
 824                 if -107 <= cmd <= 107:
 825                     code.append(cmd+139)
 826                 elif 108 <= cmd <= 1131:
 827                     a, b = divmod(cmd-108, 256)
 828                     code.append(a+247)
 829                     code.append(b)
 830                 elif -1131 <= cmd <= -108:
 831                     a, b = divmod(-cmd-108, 256)
 832                     code.append(a+251)
 833                     code.append(b)
 834                 else:
 835                     if cmd < 0:
 836                         cmd += 1 << 32
 837                     cmd, x4 = divmod(cmd, 256)
 838                     cmd, x3 = divmod(cmd, 256)
 839                     x1, x2 = divmod(cmd, 256)
 840                     code.append(255)
 841                     code.append(x1)
 842                     code.append(x2)
 843                     code.append(x3)
 844                     code.append(x4)
 845         return self._charstringencode(code.tobytes())
 846
 847     def getsubrcmds(self, subr):
 848         """return a list of T1cmd's for subr subr"""
 849         if not self._data2:
 850             self._data2decode()
 851         return self._cmds(self.subrs[subr])
 852
 853     def getglyphcmds(self, glyph):
 854         """return a list of T1cmd's for glyph glyph"""
 855         if not self._data2:
 856             self._data2decode()
 857         return self._cmds(self.glyphs[glyph])
 858
 859     def setsubrcmds(self, subr, cmds):
 860         """replaces the T1cmd's by the list cmds for subr subr"""
 861         if not self._data2:
 862             self._data2decode()
 863         self._data2eexec = None
 864         self.subrs[subr] = self._code(cmds)
 865
 866     def setglyphcmds(self, glyph, cmds):
 867         """replaces the T1cmd's by the list cmds for glyph glyph"""
 868         if not self._data2:
 869             self._data2decode()
 870         self._data2eexec = None
 871         self.glyphs[glyph] = self._code(cmds)
 872
 873     def updatepath(self, cmds, path, trafo, context):
 874         for cmd in cmds:
 875             if isinstance(cmd, T1cmd):
 876                 cmd.updatepath(path, trafo, context)
 877             else:
 878                 context.t1stack.append(cmd)
 879
 880     def updatesubrpath(self, subr, path, trafo, context):
 881         self.updatepath(self.getsubrcmds(subr), path, trafo, context)
 882
 883     def updateglyphpath(self, glyph, path, trafo, context):
 884         self.updatepath(self.getglyphcmds(glyph), path, trafo, context)
 885
 886     def gathercalls(self, cmds, seacglyphs, subrs, context):
 887         for cmd in cmds:
 888             if isinstance(cmd, T1cmd):
 889                 cmd.gathercalls(seacglyphs, subrs, context)
 890             else:
 891                 context.t1stack.append(cmd)
 892
 893     def gathersubrcalls(self, subr, seacglyphs, subrs, context):
 894         self.gathercalls(self.getsubrcmds(subr), seacglyphs, subrs, context)
 895
 896     def gatherglyphcalls(self, glyph, seacglyphs, subrs, context):
 897         self.gathercalls(self.getglyphcmds(glyph), seacglyphs, subrs, context)
 898
 899     def getglyphpath_pt(self, x_pt, y_pt, glyph, size_pt, convertcharcode=False, flex=True):
 900         """return an object containing the PyX path, wx_pt and wy_pt for glyph named glyph"""
 901         if convertcharcode:
 902             if not self.encoding:
 903                 self._encoding()
 904             glyph = self.encoding[glyph]
 905         t = self.fontmatrix.scaled(size_pt)
 906         tpath = t.translated_pt(x_pt, y_pt)
 907         context = T1context(self, flex=flex)
 908         p = path()
 909         self.updateglyphpath(glyph, p, tpath, context)
 910         class glyphpath:
 911             def __init__(self, p, wx_pt, wy_pt):
 912                 self.path = p
 913                 self.wx_pt = wx_pt
 914                 self.wy_pt = wy_pt
 915         return glyphpath(p, *t.apply_pt(context.wx, context.wy))
 916
 917     def getdata2(self, subrs=None, glyphs=None):
 918         """makes a data2 string
 919
 920         subrs is a dict containing those subrs numbers as keys,
 921         which are to be contained in the subrsstring to be created.
 922         If subrs is None, all subrs in self.subrs will be used.
 923         The subrs dict might be modified *in place*.
 924
 925         glyphs is a dict containing those glyph names as keys,
 926         which are to be contained in the charstringsstring to be created.
 927         If glyphs is None, all glyphs in self.glyphs will be used."""
 928         w = writer.writer(io.BytesIO())
 929
 930         def addsubrs(subrs):
 931             if subrs is not None:
 932                 # some adjustments to the subrs dict
 933                 if subrs:
 934                     subrsmin = min(subrs)
 935                     subrsmax = max(subrs)
 936                     if self.hasflexhintsubrs and subrsmin < len(self.flexhintsubrs):
 937                         # According to the spec we need to keep all the flex and hint subrs
 938                         # as long as any of it is used.
 939                         for subr in range(len(self.flexhintsubrs)):
 940                             subrs.add(subr)
 941                 else:
 942                     subrsmax = -1
 943             else:
 944                 # build a new subrs dict containing all subrs
 945                 subrs = dict([(subr, 1) for subr in range(len(self.subrs))])
 946                 subrsmax = len(self.subrs) - 1
 947
 948             # build the string from all selected subrs
 949             w.write("%d array\n" % (subrsmax + 1))
 950             for subr in range(subrsmax+1):
 951                 if subr in subrs:
 952                     code = self.subrs[subr]
 953                 else:
 954                     code = self.emptysubr
 955                 w.write("dup %d %d " % (subr, len(code)))
 956                 w.write_bytes(self.subrrdtoken)
 957                 w.write_bytes(b" ")
 958                 w.write_bytes(code)
 959                 w.write_bytes(b" ")
 960                 w.write_bytes(self.subrnptoken)
 961                 w.write_bytes(b"\n")
 962
 963         def addcharstrings(glyphs):
 964             w.write("%d dict dup begin\n" % (glyphs is None and len(self.glyphlist) or len(glyphs)))
 965             for glyph in self.glyphlist:
 966                 if glyphs is None or glyph in glyphs:
 967                     w.write("/%s %d " % (glyph, len(self.glyphs[glyph])))
 968                     w.write_bytes(self.glyphrdtoken)
 969                     w.write_bytes(b" ")
 970                     w.write_bytes(self.glyphs[glyph])
 971                     w.write_bytes(b" ")
 972                     w.write_bytes(self.glyphndtoken)
 973                     w.write_bytes(b"\n")
 974             w.write("end\n")
 975
 976         if self.subrsstart < self.charstringsstart:
 977             w.write_bytes(self._data2[:self.subrsstart])
 978             addsubrs(subrs)
 979             w.write_bytes(self._data2[self.subrsend:self.charstringsstart])
 980             addcharstrings(glyphs)
 981             w.write_bytes(self._data2[self.charstringsend:])
 982         else:
 983             w.write_bytes(self._data2[:self.charstringsstart])
 984             addcharstrings(glyphs)
 985             w.write_bytes(self._data2[self.charstringsend:self.subrsstart])
 986             addsubrs(subrs)
 987             w.write_bytes(self._data2[self.subrsend:])
 988         return w.file.getvalue()
 989
 990     def getdata2eexec(self):
 991         if self._data2eexec:
 992             return self._data2eexec
 993         # note that self._data2 is out-of-date here too, hence we need to call getdata2
 994         return self._eexecencode(self.getdata2())
 995
 996     newlinepattern = re.compile("\s*[\r\n]\s*")
 997     uniqueidstrpattern = re.compile("%?/UniqueID\s+\d+\s+def\s+")
 998     uniqueidbytespattern = re.compile(b"%?/UniqueID\s+\d+\s+def\s+")
 999         # when UniqueID is commented out (as in modern latin), prepare to remove the comment character as well
1000
1001     def getstrippedfont(self, glyphs, charcodes):
1002         """create a T1file instance containing only certain glyphs
1003
1004         glyphs is a set of the glyph names. It might be modified *in place*!
1005         """
1006         if not self.encoding:
1007             self._encoding()
1008         for charcode in charcodes:
1009             glyphs.add(self.encoding[charcode])
1010
1011         # collect information about used glyphs and subrs
1012         seacglyphs = set()
1013         subrs = set()
1014         for glyph in glyphs:
1015             self.gatherglyphcalls(glyph, seacglyphs, subrs, T1context(self))
1016         # while we have gathered all subrs for the seacglyphs alreadys, we
1017         # might have missed the glyphs themself (when they are not used stand-alone)
1018         glyphs.update(seacglyphs)
1019         glyphs.add(".notdef")
1020
1021         # strip data1
1022         if self.encoding is adobestandardencoding:
1023             data1 = self.data1
1024         else:
1025             encodingstrings = []
1026             for char, glyph in enumerate(self.encoding):
1027                 if glyph in glyphs:
1028                     encodingstrings.append("dup %i /%s put\n" % (char, glyph))
1029             data1 = self.data1[:self.encodingstart] + "\n" + "".join(encodingstrings) + self.data1[self.encodingend:]
1030         data1 = self.newlinepattern.subn("\n", data1)[0]
1031         data1 = self.uniqueidstrpattern.subn("", data1)[0]
1032
1033         # strip data2
1034         data2 = self.uniqueidbytespattern.subn(b"", self.getdata2(subrs, glyphs))[0]
1035
1036         # strip data3
1037         data3 = self.newlinepattern.subn("\n", self.data3)[0]
1038
1039         # create and return the new font instance
1040         return T1file(data1.rstrip() + "\n", self._eexecencode(data2), data3.rstrip() + "\n")
1041
1042     # The following two methods, writePDFfontinfo and getglyphinfo,
1043     # extract informtion which should better be taken from the afm file.
1044     def writePDFfontinfo(self, file):
1045         try:
1046             glyphinfo_y = self.getglyphinfo("y")
1047             glyphinfo_W = self.getglyphinfo("W")
1048             glyphinfo_H = self.getglyphinfo("H")
1049             glyphinfo_h = self.getglyphinfo("h")
1050             glyphinfo_period = self.getglyphinfo("period")
1051             glyphinfo_colon = self.getglyphinfo("colon")
1052         except:
1053             logger.warning("Auto-guessing of font information for font '%s' failed. We're writing stub data instead." % self.name)
1054             file.write("/Flags 4\n")
1055             file.write("/FontBBox [0 -100 1000 1000]\n")
1056             file.write("/ItalicAngle 0\n")
1057             file.write("/Ascent 1000\n")
1058             file.write("/Descent -100\n")
1059             file.write("/CapHeight 700\n")
1060             file.write("/StemV 100\n")
1061         else:
1062             if not self.encoding:
1063                 self._encoding()
1064             # As a simple heuristics we assume non-symbolic fonts if and only
1065             # if the Adobe standard encoding is used. All other font flags are
1066             # not specified here.
1067             if self.encoding is adobestandardencoding:
1068                 file.write("/Flags 32\n")
1069             else:
1070                 file.write("/Flags 4\n")
1071             file.write("/FontBBox [0 %f %f %f]\n" % (glyphinfo_y[3], glyphinfo_W[0], glyphinfo_H[5]))
1072             file.write("/ItalicAngle %f\n" % math.degrees(math.atan2(glyphinfo_period[4]-glyphinfo_colon[4], glyphinfo_colon[5]-glyphinfo_period[5])))
1073             file.write("/Ascent %f\n" % glyphinfo_H[5])
1074             file.write("/Descent %f\n" % glyphinfo_y[3])
1075             file.write("/CapHeight %f\n" % glyphinfo_h[5])
1076             file.write("/StemV %f\n" % (glyphinfo_period[4]-glyphinfo_period[2]))
1077
1078     def getglyphinfo(self, glyph, flex=True):
1079         logger.warning("We are about to extract font information for the Type 1 font '%s' from its pfb file. This is bad practice (and it's slow). You should use an afm file instead." % self.name)
1080         context = T1context(self, flex=flex)
1081         p = path()
1082         self.updateglyphpath(glyph, p, trafo.trafo(), context)
1083         bbox = p.bbox()
1084         return context.wx, context.wy, bbox.llx_pt, bbox.lly_pt, bbox.urx_pt, bbox.ury_pt
1085
1086     def outputPFA(self, file, remove_UniqueID_lookup=False):
1087         """output the T1file in PFA format"""
1088         data1 = self.data1
1089         data3 = self.data3
1090         if remove_UniqueID_lookup:
1091             m1 = re.search("""FontDirectory\s*/%(name)s\s+known{/%(name)s\s+findfont\s+dup\s*/UniqueID\s+known\s*{\s*dup\s*
1092                               /UniqueID\s+get\s+\d+\s+eq\s+exch\s*/FontType\s+get\s+1\s+eq\s+and\s*}\s*{\s*pop\s+false\s*}\s*ifelse\s*
1093                               {save\s+true\s*}\s*{\s*false\s*}\s*ifelse\s*}\s*{\s*false\s*}\s*ifelse""" % {"name": self.name},
1094                            data1, re.VERBOSE)
1095             m3 = re.search("\s*{restore}\s*if", data3)
1096             if m1 and m3:
1097                 data1 = data1[:m1.start()] + data1[m1.end():]
1098                 data3 = data3[:m3.start()] + data3[m3.end():]
1099         file.write(data1)
1100         data2eexechex = binascii.b2a_hex(self.getdata2eexec())
1101         linelength = 64
1102         for i in range((len(data2eexechex)-1)//linelength + 1):
1103             file.write_bytes(data2eexechex[i*linelength: i*linelength+linelength])
1104             file.write("\n")
1105         file.write(data3)
1106
1107     def outputPFB(self, file):
1108         """output the T1file in PFB format"""
1109         data2eexec = self.getdata2eexec()
1110         def pfblength(data):
1111             l = len(data)
1112             l, x1 = divmod(l, 256)
1113             l, x2 = divmod(l, 256)
1114             x4, x3 = divmod(l, 256)
1115             return chr(x1) + chr(x2) + chr(x3) + chr(x4)
1116         file.write("\200\1")
1117         file.write(pfblength(self.data1))
1118         file.write(self.data1)
1119         file.write("\200\2")
1120         file.write(pfblength(data2eexec))
1121         file.write(data2eexec)
1122         file.write("\200\1")
1123         file.write(pfblength(self.data3))
1124         file.write(self.data3)
1125         file.write("\200\3")
1126
1127     def outputPS(self, file, writer):
1128         """output the PostScript code for the T1file to the file file"""
1129         self.outputPFA(file, remove_UniqueID_lookup=True)
1130
1131     def outputPDF(self, file, writer):
1132         data2eexec = self.getdata2eexec()
1133         data3 = self.data3
1134         # we might be allowed to skip the third part ...
1135         if (data3.replace("\n", "")
1136                  .replace("\r", "")
1137                  .replace("\t", "")
1138                  .replace(" ", "")) == "0"*512 + "cleartomark":
1139             data3 = ""
1140
1141         data = self.data1.encode("ascii", errors="surrogateescape") + data2eexec + data3.encode("ascii", errors="surrogateescape")
1142         if writer.compress and haszlib:
1143             data = zlib.compress(data)
1144
1145         file.write("<<\n"
1146                    "/Length %d\n"
1147                    "/Length1 %d\n"
1148                    "/Length2 %d\n"
1149                    "/Length3 %d\n" % (len(data), len(self.data1), len(data2eexec), len(data3)))
1150         if writer.compress and haszlib:
1151             file.write("/Filter /FlateDecode\n")
1152         file.write(">>\n"
1153                    "stream\n")
1154         file.write_bytes(data)
1155         file.write("\n"
1156                    "endstream\n")
1157
1158 # factory functions
1159
1160 class FontFormatError(Exception):
1161     pass
1162
1163 def from_PFA_bytes(bytes):
1164     """create a T1file instance from a string of bytes corresponding to a PFA file"""
1165     try:
1166         m1 = bytes.index("eexec") + 6
1167         m2 = bytes.index("0"*40)
1168     except ValueError:
1169        raise FontFormatError
1170
1171     data1 = bytes[:m1].decode("ascii", errors="surrogateescape")
1172     data2eexec = binascii.a2b_hex(bytes[m1: m2].replace(" ", "").replace("\r", "").replace("\n", ""))
1173     data3 = bytes[m2:].decode("ascii", errors="surrogateescape")
1174     return T1file(data1, data2eexec, data3)
1175
1176 def from_PFA_filename(filename):
1177     """create a T1file instance from PFA font file of given name"""
1178     with open(filename, "rb") as file:
1179         t1file = from_PFA_bytes(file.read())
1180     return t1file
1181
1182 def from_PFB_bytes(bytes):
1183     """create a T1file instance from a string of bytes corresponding to a PFB file"""
1184
1185     def pfblength(s):
1186         if len(s) != 4:
1187             raise ValueError("invalid string length")
1188         return (s[0] +
1189                 s[1]*256 +
1190                 s[2]*256*256 +
1191                 s[3]*256*256*256)
1192     class consumer:
1193         def __init__(self, bytes):
1194             self.bytes = bytes
1195             self.pos = 0
1196         def __call__(self, n):
1197             result = self.bytes[self.pos:self.pos+n]
1198             self.pos += n
1199             return result
1200
1201     consume = consumer(bytes)
1202     mark = consume(2)
1203     if mark != b"\200\1":
1204         raise FontFormatError
1205     data1 = consume(pfblength(consume(4))).decode("ascii", errors="surrogateescape")
1206     mark = consume(2)
1207     if mark != b"\200\2":
1208         raise FontFormatError
1209     data2eexec = b""
1210     while mark == b"\200\2":
1211         data2eexec = data2eexec + consume(pfblength(consume(4)))
1212         mark = consume(2)
1213     if mark != b"\200\1":
1214         raise FontFormatError
1215     data3 = consume(pfblength(consume(4))).decode("ascii", errors="surrogateescape")
1216     mark = consume(2)
1217     if mark != b"\200\3":
1218         raise FontFormatError
1219     if consume(1):
1220         raise FontFormatError
1221
1222     return T1file(data1, data2eexec, data3)
1223
1224 def from_PFB_filename(filename):
1225     """create a T1file instance from PFB font file of given name"""
1226     with open(filename, "rb") as file:
1227         t1file = from_PFB_bytes(file.read())
1228     return t1file
1229
1230 def from_PF_bytes(bytes):
1231     #try:
1232         return from_PFB_bytes(bytes)
1233     #except FontFormatError:
1234     #    return from_PFA_bytes(bytes)
1235
1236 def from_PF_filename(filename):
1237     """create a T1file instance from PFA or PFB font file of given name"""
1238     with open(filename, "rb") as file:
1239         t1file = from_PF_bytes(file.read())
1240     return t1file