source/libs/poppler/poppler-src/poppler/CharCodeToUnicode.cc

   1 //========================================================================
   2 //
   3 // CharCodeToUnicode.cc
   4 //
   5 // Copyright 2001-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 //========================================================================
  10 //
  11 // Modified under the Poppler project - http://poppler.freedesktop.org
  12 //
  13 // All changes made under the Poppler project to this file are licensed
  14 // under GPL version 2 or later
  15 //
  16 // Copyright (C) 2006, 2008-2010, 2012 Albert Astals Cid <aacid@kde.org>
  17 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
  18 // Copyright (C) 2007 Koji Otani <sho@bbr.jp>
  19 // Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
  20 // Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
  21 // Copyright (C) 2010 William Bader <williambader@hotmail.com>
  22 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
  23 // Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
  24 // Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
  25 // Copyright (C) 2014 Jiri Slaby <jirislaby@gmail.com>
  26 // Copyright (C) 2015 Marek Kasik <mkasik@redhat.com>
  27 //
  28 // To see a description of the changes please see the Changelog file that
  29 // came with your tarball or type make ChangeLog if you are building from git
  30 //
  31 //========================================================================
  32
  33 #include <config.h>
  34
  35 #ifdef USE_GCC_PRAGMAS
  36 #pragma implementation
  37 #endif
  38
  39 #include <stdio.h>
  40 #include <string.h>
  41 #include "goo/gmem.h"
  42 #include "goo/gfile.h"
  43 #include "goo/GooLikely.h"
  44 #include "goo/GooString.h"
  45 #include "Error.h"
  46 #include "GlobalParams.h"
  47 #include "PSTokenizer.h"
  48 #include "CharCodeToUnicode.h"
  49 #include "UTF.h"
  50
  51 //------------------------------------------------------------------------
  52
  53 struct CharCodeToUnicodeString {
  54   CharCode c;
  55   Unicode *u;
  56   int len;
  57 };
  58
  59 //------------------------------------------------------------------------
  60
  61 static int getCharFromString(void *data) {
  62   char *p;
  63   int c;
  64
  65   p = *(char **)data;
  66   if (*p) {
  67     c = *p++;
  68     *(char **)data = p;
  69   } else {
  70     c = EOF;
  71   }
  72   return c;
  73 }
  74
  75 static int getCharFromFile(void *data) {
  76   return fgetc((FILE *)data);
  77 }
  78
  79 //------------------------------------------------------------------------
  80
  81 static int hexCharVals[256] = {
  82   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
  83   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
  84   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
  85    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
  86   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
  87   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
  88   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
  89   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
  90   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
  91   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
  92   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
  93   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
  94   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
  95   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
  96   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
  97   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
  98 };
  99
 100 // Parse a <len>-byte hex string <s> into *<val>.  Returns false on
 101 // error.
 102 static GBool parseHex(char *s, int len, Guint *val) {
 103   int i, x;
 104
 105   *val = 0;
 106   for (i = 0; i < len; ++i) {
 107     x = hexCharVals[s[i] & 0xff];
 108     if (x < 0) {
 109       return gFalse;
 110     }
 111     *val = (*val << 4) + x;
 112   }
 113   return gTrue;
 114 }
 115
 116 //------------------------------------------------------------------------
 117
 118 CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
 119   CharCodeToUnicode *ctu = new CharCodeToUnicode();
 120   ctu->isIdentity = gTrue;
 121   ctu->mapLen = 1;
 122   ctu->map = (Unicode *)gmallocn(ctu->mapLen, sizeof(Unicode));
 123   return ctu;
 124 }
 125
 126 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GooString *fileName,
 127                                                         GooString *collection) {
 128   FILE *f;
 129   Unicode *mapA;
 130   CharCode size, mapLenA;
 131   char buf[64];
 132   Unicode u;
 133   CharCodeToUnicode *ctu;
 134
 135   if (!(f = openFile(fileName->getCString(), "r"))) {
 136     error(errIO, -1, "Couldn't open cidToUnicode file '{0:t}'",
 137           fileName);
 138     return NULL;
 139   }
 140
 141   size = 32768;
 142   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
 143   mapLenA = 0;
 144
 145   while (getLine(buf, sizeof(buf), f)) {
 146     if (mapLenA == size) {
 147       size *= 2;
 148       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
 149     }
 150     if (sscanf(buf, "%x", &u) == 1) {
 151       mapA[mapLenA] = u;
 152     } else {
 153       error(errSyntaxWarning, -1, "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
 154             (int)(mapLenA + 1), fileName);
 155       mapA[mapLenA] = 0;
 156     }
 157     ++mapLenA;
 158   }
 159   fclose(f);
 160
 161   ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
 162                               NULL, 0, 0);
 163   gfree(mapA);
 164   return ctu;
 165 }
 166
 167 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
 168                                                     GooString *fileName) {
 169   FILE *f;
 170   Unicode *mapA;
 171   CharCodeToUnicodeString *sMapA;
 172   CharCode size, oldSize, len, sMapSizeA, sMapLenA;
 173   char buf[256];
 174   char *tok;
 175   Unicode u0;
 176   int uBufSize = 8;
 177   Unicode *uBuf = (Unicode *)gmallocn(uBufSize, sizeof(Unicode));
 178   CharCodeToUnicode *ctu;
 179   int line, n, i;
 180   char *tokptr;
 181
 182   if (!(f = openFile(fileName->getCString(), "r"))) {
 183     gfree(uBuf);
 184     error(errIO, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
 185           fileName);
 186     return NULL;
 187   }
 188
 189   size = 4096;
 190   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
 191   memset(mapA, 0, size * sizeof(Unicode));
 192   len = 0;
 193   sMapA = NULL;
 194   sMapSizeA = sMapLenA = 0;
 195
 196   line = 0;
 197   while (getLine(buf, sizeof(buf), f)) {
 198     ++line;
 199     if (!(tok = strtok_r(buf, " \t\r\n", &tokptr)) ||
 200         !parseHex(tok, strlen(tok), &u0)) {
 201       error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 202             line, fileName);
 203       continue;
 204     }
 205     n = 0;
 206     while ((tok = strtok_r(NULL, " \t\r\n", &tokptr))) {
 207       if (n >= uBufSize)
 208       {
 209         uBufSize += 8;
 210         uBuf = (Unicode *)greallocn(uBuf, uBufSize, sizeof(Unicode));
 211       }
 212       if (!parseHex(tok, strlen(tok), &uBuf[n])) {
 213         error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 214               line, fileName);
 215         break;
 216       }
 217       ++n;
 218     }
 219     if (n < 1) {
 220       error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 221             line, fileName);
 222       continue;
 223     }
 224     if (u0 >= size) {
 225       oldSize = size;
 226       while (u0 >= size) {
 227         size *= 2;
 228       }
 229       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
 230       memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
 231     }
 232     if (n == 1) {
 233       mapA[u0] = uBuf[0];
 234     } else {
 235       mapA[u0] = 0;
 236       if (sMapLenA == sMapSizeA) {
 237         sMapSizeA += 16;
 238         sMapA = (CharCodeToUnicodeString *)
 239                   greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
 240       }
 241       sMapA[sMapLenA].c = u0;
 242       sMapA[sMapLenA].u = (Unicode*)gmallocn(n, sizeof(Unicode));
 243       for (i = 0; i < n; ++i) {
 244         sMapA[sMapLenA].u[i] = uBuf[i];
 245       }
 246       sMapA[sMapLenA].len = n;
 247       ++sMapLenA;
 248     }
 249     if (u0 >= len) {
 250       len = u0 + 1;
 251     }
 252   }
 253   fclose(f);
 254
 255   ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
 256                               sMapA, sMapLenA, sMapSizeA);
 257   gfree(mapA);
 258   gfree(uBuf);
 259   return ctu;
 260 }
 261
 262 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
 263   return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
 264 }
 265
 266 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GooString *buf, int nBits) {
 267   CharCodeToUnicode *ctu;
 268   char *p;
 269
 270   ctu = new CharCodeToUnicode(NULL);
 271   p = buf->getCString();
 272   ctu->parseCMap1(&getCharFromString, &p, nBits);
 273   return ctu;
 274 }
 275
 276 CharCodeToUnicode *CharCodeToUnicode::parseCMapFromFile(GooString *fileName,
 277   int nBits) {
 278   CharCodeToUnicode *ctu;
 279   FILE *f;
 280
 281   ctu = new CharCodeToUnicode(NULL);
 282   if ((f = globalParams->findToUnicodeFile(fileName))) {
 283     ctu->parseCMap1(&getCharFromFile, f, nBits);
 284     fclose(f);
 285   } else {
 286     error(errSyntaxError, -1, "Couldn't find ToUnicode CMap file for '{0:t}'",
 287           fileName);
 288   }
 289   return ctu;
 290 }
 291
 292 void CharCodeToUnicode::mergeCMap(GooString *buf, int nBits) {
 293   char *p;
 294
 295   p = buf->getCString();
 296   parseCMap1(&getCharFromString, &p, nBits);
 297 }
 298
 299 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
 300                                    int nBits) {
 301   PSTokenizer *pst;
 302   char tok1[256], tok2[256], tok3[256];
 303   int n1, n2, n3;
 304   CharCode i;
 305   CharCode maxCode, code1, code2;
 306   GooString *name;
 307   FILE *f;
 308
 309   maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
 310   pst = new PSTokenizer(getCharFunc, data);
 311   pst->getToken(tok1, sizeof(tok1), &n1);
 312   while (pst->getToken(tok2, sizeof(tok2), &n2)) {
 313     if (!strcmp(tok2, "usecmap")) {
 314       if (tok1[0] == '/') {
 315         name = new GooString(tok1 + 1);
 316         if ((f = globalParams->findToUnicodeFile(name))) {
 317           parseCMap1(&getCharFromFile, f, nBits);
 318           fclose(f);
 319         } else {
 320           error(errSyntaxError, -1, "Couldn't find ToUnicode CMap file for '{0:t}'",
 321                 name);
 322         }
 323         delete name;
 324       }
 325       pst->getToken(tok1, sizeof(tok1), &n1);
 326     } else if (!strcmp(tok2, "beginbfchar")) {
 327       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 328         if (!strcmp(tok1, "endbfchar")) {
 329           break;
 330         }
 331         if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 332             !strcmp(tok2, "endbfchar")) {
 333           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 334           break;
 335         }
 336         if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
 337               tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 338           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 339           continue;
 340         }
 341         tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 342         if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
 343           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 344           continue;
 345         }
 346         if (code1 > maxCode) {
 347           error(errSyntaxWarning, -1,
 348                 "Invalid entry in bfchar block in ToUnicode CMap");
 349         }
 350         addMapping(code1, tok2 + 1, n2 - 2, 0);
 351       }
 352       pst->getToken(tok1, sizeof(tok1), &n1);
 353     } else if (!strcmp(tok2, "beginbfrange")) {
 354       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 355         if (!strcmp(tok1, "endbfrange")) {
 356           break;
 357         }
 358         if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 359             !strcmp(tok2, "endbfrange") ||
 360             !pst->getToken(tok3, sizeof(tok3), &n3) ||
 361             !strcmp(tok3, "endbfrange")) {
 362           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 363           break;
 364         }
 365         if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
 366               tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 367           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 368           continue;
 369         }
 370         tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 371         if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
 372             !parseHex(tok2 + 1, n2 - 2, &code2)) {
 373           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 374           continue;
 375         }
 376         if (code1 > maxCode || code2 > maxCode) {
 377           error(errSyntaxWarning, -1,
 378                 "Invalid entry in bfrange block in ToUnicode CMap");
 379           if (code1 > maxCode) {
 380             code1 = maxCode;
 381           }
 382           if (code2 > maxCode) {
 383             code2 = maxCode;
 384           }
 385         }
 386         if (!strcmp(tok3, "[")) {
 387           i = 0;
 388           while (pst->getToken(tok1, sizeof(tok1), &n1) &&
 389                  code1 + i <= code2) {
 390             if (!strcmp(tok1, "]")) {
 391               break;
 392             }
 393             if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
 394               tok1[n1 - 1] = '\0';
 395               addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
 396             } else {
 397               error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 398             }
 399             ++i;
 400           }
 401         } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
 402           tok3[n3 - 1] = '\0';
 403           for (i = 0; code1 <= code2; ++code1, ++i) {
 404             addMapping(code1, tok3 + 1, n3 - 2, i);
 405           }
 406
 407         } else {
 408           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 409         }
 410       }
 411       pst->getToken(tok1, sizeof(tok1), &n1);
 412     } else {
 413       strcpy(tok1, tok2);
 414     }
 415   }
 416   delete pst;
 417 }
 418
 419 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
 420                                    int offset) {
 421   CharCode oldLen, i;
 422   Unicode u;
 423   int j;
 424
 425   if (code > 0xffffff) {
 426     // This is an arbitrary limit to avoid integer overflow issues.
 427     // (I've seen CMaps with mappings for <ffffffff>.)
 428     return;
 429   }
 430   if (code >= mapLen) {
 431     oldLen = mapLen;
 432     mapLen = mapLen ? 2 * mapLen : 256;
 433     if (code >= mapLen) {
 434       mapLen = (code + 256) & ~255;
 435     }
 436     if (unlikely(code >= mapLen)) {
 437       error(errSyntaxWarning, -1, "Illegal code value in CharCodeToUnicode::addMapping");
 438       return;
 439     } else {
 440       map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
 441       for (i = oldLen; i < mapLen; ++i) {
 442         map[i] = 0;
 443       }
 444     }
 445   }
 446   if (n <= 4) {
 447     if (!parseHex(uStr, n, &u)) {
 448       error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
 449       return;
 450     }
 451     map[code] = u + offset;
 452     if (!UnicodeIsValid(map[code])) {
 453       map[code] = 0xfffd;
 454     }
 455   } else {
 456     if (sMapLen >= sMapSize) {
 457       sMapSize = sMapSize + 16;
 458       sMap = (CharCodeToUnicodeString *)
 459                greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
 460     }
 461     map[code] = 0;
 462     sMap[sMapLen].c = code;
 463     int utf16Len = n / 4;
 464     Unicode *utf16 = (Unicode*)gmallocn(utf16Len, sizeof(Unicode));
 465     for (j = 0; j < utf16Len; ++j) {
 466       if (!parseHex(uStr + j*4, 4, &utf16[j])) {
 467         gfree(utf16);
 468         error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
 469         return;
 470       }
 471     }
 472     utf16[utf16Len - 1] += offset;
 473     sMap[sMapLen].len = UTF16toUCS4(utf16, utf16Len, &sMap[sMapLen].u);
 474     gfree(utf16);
 475     ++sMapLen;
 476   }
 477 }
 478
 479 CharCodeToUnicode::CharCodeToUnicode() {
 480   tag = NULL;
 481   map = NULL;
 482   mapLen = 0;
 483   sMap = NULL;
 484   sMapLen = sMapSize = 0;
 485   refCnt = 1;
 486   isIdentity = gFalse;
 487 #if MULTITHREADED
 488   gInitMutex(&mutex);
 489 #endif
 490 }
 491
 492 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA) {
 493   CharCode i;
 494
 495   tag = tagA;
 496   mapLen = 256;
 497   map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
 498   for (i = 0; i < mapLen; ++i) {
 499     map[i] = 0;
 500   }
 501   sMap = NULL;
 502   sMapLen = sMapSize = 0;
 503   refCnt = 1;
 504   isIdentity = gFalse;
 505 #if MULTITHREADED
 506   gInitMutex(&mutex);
 507 #endif
 508 }
 509
 510 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA, Unicode *mapA,
 511                                      CharCode mapLenA, GBool copyMap,
 512                                      CharCodeToUnicodeString *sMapA,
 513                                      int sMapLenA, int sMapSizeA) {
 514   tag = tagA;
 515   mapLen = mapLenA;
 516   if (copyMap) {
 517     map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
 518     memcpy(map, mapA, mapLen * sizeof(Unicode));
 519   } else {
 520     map = mapA;
 521   }
 522   sMap = sMapA;
 523   sMapLen = sMapLenA;
 524   sMapSize = sMapSizeA;
 525   refCnt = 1;
 526   isIdentity = gFalse;
 527 #if MULTITHREADED
 528   gInitMutex(&mutex);
 529 #endif
 530 }
 531
 532 CharCodeToUnicode::~CharCodeToUnicode() {
 533   if (tag) {
 534     delete tag;
 535   }
 536   gfree(map);
 537   if (sMap) {
 538     for (int i = 0; i < sMapLen; ++i) gfree(sMap[i].u);
 539     gfree(sMap);
 540   }
 541 #if MULTITHREADED
 542   gDestroyMutex(&mutex);
 543 #endif
 544 }
 545
 546 void CharCodeToUnicode::incRefCnt() {
 547 #if MULTITHREADED
 548   gLockMutex(&mutex);
 549 #endif
 550   ++refCnt;
 551 #if MULTITHREADED
 552   gUnlockMutex(&mutex);
 553 #endif
 554 }
 555
 556 void CharCodeToUnicode::decRefCnt() {
 557   GBool done;
 558
 559 #if MULTITHREADED
 560   gLockMutex(&mutex);
 561 #endif
 562   done = --refCnt == 0;
 563 #if MULTITHREADED
 564   gUnlockMutex(&mutex);
 565 #endif
 566   if (done) {
 567     delete this;
 568   }
 569 }
 570
 571 GBool CharCodeToUnicode::match(GooString *tagA) {
 572   return tag && !tag->cmp(tagA);
 573 }
 574
 575 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
 576   int i, j;
 577
 578   if (!map || isIdentity) {
 579     return;
 580   }
 581   if (len == 1) {
 582     map[c] = u[0];
 583   } else {
 584     for (i = 0; i < sMapLen; ++i) {
 585       if (sMap[i].c == c) {
 586         gfree(sMap[i].u);
 587         break;
 588       }
 589     }
 590     if (i == sMapLen) {
 591       if (sMapLen == sMapSize) {
 592         sMapSize += 8;
 593         sMap = (CharCodeToUnicodeString *)
 594                  greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
 595       }
 596       ++sMapLen;
 597     }
 598     map[c] = 0;
 599     sMap[i].c = c;
 600     sMap[i].len = len;
 601     sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
 602     for (j = 0; j < len; ++j) {
 603       if (UnicodeIsValid(u[j])) {
 604         sMap[i].u[j] = u[j];
 605       } else {
 606         sMap[i].u[j] = 0xfffd;
 607       }
 608     }
 609   }
 610 }
 611
 612 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode **u) {
 613   int i;
 614
 615   if (isIdentity) {
 616     map[0] = (Unicode)c;
 617     *u = map;
 618     return 1;
 619   }
 620   if (c >= mapLen) {
 621     return 0;
 622   }
 623   if (map[c]) {
 624     *u = &map[c];
 625     return 1;
 626   }
 627   for (i = sMapLen - 1; i >= 0; --i) { // in reverse so CMap takes precedence
 628     if (sMap[i].c == c) {
 629       *u = sMap[i].u;
 630       return sMap[i].len;
 631     }
 632   }
 633   return 0;
 634 }
 635
 636 int CharCodeToUnicode::mapToCharCode(Unicode* u, CharCode *c, int usize) {
 637   //look for charcode in map
 638   if (usize == 1 || (usize > 1 && !(*u & ~0xff))) {
 639     if (isIdentity) {
 640       *c = (CharCode) *u;
 641       return 1;
 642     }
 643     for (CharCode i=0; i<mapLen; i++) {
 644       if (map[i] == *u) {
 645         *c = i;
 646         return 1;
 647       }
 648     }
 649     *c = 'x';
 650   } else {
 651     int i, j;
 652     //for each entry in the sMap
 653     for (i=0; i<sMapLen; i++) {
 654       //if the entry's unicode length isn't the same are usize, the strings
 655       // are obviously differents
 656       if (sMap[i].len != usize) continue;
 657       //compare the string char by char
 658       for (j=0; j<sMap[i].len; j++) {
 659         if (sMap[i].u[j] != u[j]) {
 660           break;
 661         }
 662       }
 663
 664       //we have the same strings
 665       if (j==sMap[i].len) {
 666         *c = sMap[i].c;
 667         return 1;
 668       }
 669     }
 670   }
 671   return 0;
 672 }
 673
 674 //------------------------------------------------------------------------
 675
 676 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
 677   int i;
 678
 679   size = sizeA;
 680   cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
 681   for (i = 0; i < size; ++i) {
 682     cache[i] = NULL;
 683   }
 684 }
 685
 686 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
 687   int i;
 688
 689   for (i = 0; i < size; ++i) {
 690     if (cache[i]) {
 691       cache[i]->decRefCnt();
 692     }
 693   }
 694   gfree(cache);
 695 }
 696
 697 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GooString *tag) {
 698   CharCodeToUnicode *ctu;
 699   int i, j;
 700
 701   if (cache[0] && cache[0]->match(tag)) {
 702     cache[0]->incRefCnt();
 703     return cache[0];
 704   }
 705   for (i = 1; i < size; ++i) {
 706     if (cache[i] && cache[i]->match(tag)) {
 707       ctu = cache[i];
 708       for (j = i; j >= 1; --j) {
 709         cache[j] = cache[j - 1];
 710       }
 711       cache[0] = ctu;
 712       ctu->incRefCnt();
 713       return ctu;
 714     }
 715   }
 716   return NULL;
 717 }
 718
 719 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
 720   int i;
 721
 722   if (cache[size - 1]) {
 723     cache[size - 1]->decRefCnt();
 724   }
 725   for (i = size - 1; i >= 1; --i) {
 726     cache[i] = cache[i - 1];
 727   }
 728   cache[0] = ctu;
 729   ctu->incRefCnt();
 730 }