source/libs/poppler/poppler-0.36.0/poppler/CharCodeToUnicode.cc

   1 //========================================================================
   2 //
   3 // CharCodeToUnicode.cc
   4 //
   5 // Copyright 2001-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 //========================================================================
  10 //
  11 // Modified under the Poppler project - http://poppler.freedesktop.org
  12 //
  13 // All changes made under the Poppler project to this file are licensed
  14 // under GPL version 2 or later
  15 //
  16 // Copyright (C) 2006, 2008-2010, 2012 Albert Astals Cid <aacid@kde.org>
  17 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
  18 // Copyright (C) 2007 Koji Otani <sho@bbr.jp>
  19 // Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
  20 // Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
  21 // Copyright (C) 2010 William Bader <williambader@hotmail.com>
  22 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
  23 // Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
  24 // Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
  25 // Copyright (C) 2014 Jiri Slaby <jirislaby@gmail.com>
  26 //
  27 // To see a description of the changes please see the Changelog file that
  28 // came with your tarball or type make ChangeLog if you are building from git
  29 //
  30 //========================================================================
  31
  32 #include <config.h>
  33
  34 #ifdef USE_GCC_PRAGMAS
  35 #pragma implementation
  36 #endif
  37
  38 #include <stdio.h>
  39 #include <string.h>
  40 #include "goo/gmem.h"
  41 #include "goo/gfile.h"
  42 #include "goo/GooLikely.h"
  43 #include "goo/GooString.h"
  44 #include "Error.h"
  45 #include "GlobalParams.h"
  46 #include "PSTokenizer.h"
  47 #include "CharCodeToUnicode.h"
  48 #include "UTF.h"
  49
  50 //------------------------------------------------------------------------
  51
  52 struct CharCodeToUnicodeString {
  53   CharCode c;
  54   Unicode *u;
  55   int len;
  56 };
  57
  58 //------------------------------------------------------------------------
  59
  60 static int getCharFromString(void *data) {
  61   char *p;
  62   int c;
  63
  64   p = *(char **)data;
  65   if (*p) {
  66     c = *p++;
  67     *(char **)data = p;
  68   } else {
  69     c = EOF;
  70   }
  71   return c;
  72 }
  73
  74 static int getCharFromFile(void *data) {
  75   return fgetc((FILE *)data);
  76 }
  77
  78 //------------------------------------------------------------------------
  79
  80 static int hexCharVals[256] = {
  81   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
  82   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
  83   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
  84    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
  85   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
  86   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
  87   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
  88   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
  89   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
  90   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
  91   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
  92   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
  93   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
  94   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
  95   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
  96   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
  97 };
  98
  99 // Parse a <len>-byte hex string <s> into *<val>.  Returns false on
 100 // error.
 101 static GBool parseHex(char *s, int len, Guint *val) {
 102   int i, x;
 103
 104   *val = 0;
 105   for (i = 0; i < len; ++i) {
 106     x = hexCharVals[s[i] & 0xff];
 107     if (x < 0) {
 108       return gFalse;
 109     }
 110     *val = (*val << 4) + x;
 111   }
 112   return gTrue;
 113 }
 114
 115 //------------------------------------------------------------------------
 116
 117 CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
 118   CharCodeToUnicode *ctu = new CharCodeToUnicode();
 119   ctu->isIdentity = gTrue;
 120   ctu->mapLen = 1;
 121   ctu->map = (Unicode *)gmallocn(ctu->mapLen, sizeof(Unicode));
 122   return ctu;
 123 }
 124
 125 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GooString *fileName,
 126                                                         GooString *collection) {
 127   FILE *f;
 128   Unicode *mapA;
 129   CharCode size, mapLenA;
 130   char buf[64];
 131   Unicode u;
 132   CharCodeToUnicode *ctu;
 133
 134   if (!(f = openFile(fileName->getCString(), "r"))) {
 135     error(errIO, -1, "Couldn't open cidToUnicode file '{0:t}'",
 136           fileName);
 137     return NULL;
 138   }
 139
 140   size = 32768;
 141   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
 142   mapLenA = 0;
 143
 144   while (getLine(buf, sizeof(buf), f)) {
 145     if (mapLenA == size) {
 146       size *= 2;
 147       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
 148     }
 149     if (sscanf(buf, "%x", &u) == 1) {
 150       mapA[mapLenA] = u;
 151     } else {
 152       error(errSyntaxWarning, -1, "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
 153             (int)(mapLenA + 1), fileName);
 154       mapA[mapLenA] = 0;
 155     }
 156     ++mapLenA;
 157   }
 158   fclose(f);
 159
 160   ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
 161                               NULL, 0, 0);
 162   gfree(mapA);
 163   return ctu;
 164 }
 165
 166 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
 167                                                     GooString *fileName) {
 168   FILE *f;
 169   Unicode *mapA;
 170   CharCodeToUnicodeString *sMapA;
 171   CharCode size, oldSize, len, sMapSizeA, sMapLenA;
 172   char buf[256];
 173   char *tok;
 174   Unicode u0;
 175   int uBufSize = 8;
 176   Unicode *uBuf = (Unicode *)gmallocn(uBufSize, sizeof(Unicode));
 177   CharCodeToUnicode *ctu;
 178   int line, n, i;
 179   char *tokptr;
 180
 181   if (!(f = openFile(fileName->getCString(), "r"))) {
 182     gfree(uBuf);
 183     error(errIO, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
 184           fileName);
 185     return NULL;
 186   }
 187
 188   size = 4096;
 189   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
 190   memset(mapA, 0, size * sizeof(Unicode));
 191   len = 0;
 192   sMapA = NULL;
 193   sMapSizeA = sMapLenA = 0;
 194
 195   line = 0;
 196   while (getLine(buf, sizeof(buf), f)) {
 197     ++line;
 198     if (!(tok = strtok_r(buf, " \t\r\n", &tokptr)) ||
 199         !parseHex(tok, strlen(tok), &u0)) {
 200       error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 201             line, fileName);
 202       continue;
 203     }
 204     n = 0;
 205     while ((tok = strtok_r(NULL, " \t\r\n", &tokptr))) {
 206       if (n >= uBufSize)
 207       {
 208         uBufSize += 8;
 209         uBuf = (Unicode *)greallocn(uBuf, uBufSize, sizeof(Unicode));
 210       }
 211       if (!parseHex(tok, strlen(tok), &uBuf[n])) {
 212         error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 213               line, fileName);
 214         break;
 215       }
 216       ++n;
 217     }
 218     if (n < 1) {
 219       error(errSyntaxWarning, -1, "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
 220             line, fileName);
 221       continue;
 222     }
 223     if (u0 >= size) {
 224       oldSize = size;
 225       while (u0 >= size) {
 226         size *= 2;
 227       }
 228       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
 229       memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
 230     }
 231     if (n == 1) {
 232       mapA[u0] = uBuf[0];
 233     } else {
 234       mapA[u0] = 0;
 235       if (sMapLenA == sMapSizeA) {
 236         sMapSizeA += 16;
 237         sMapA = (CharCodeToUnicodeString *)
 238                   greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
 239       }
 240       sMapA[sMapLenA].c = u0;
 241       sMapA[sMapLenA].u = (Unicode*)gmallocn(n, sizeof(Unicode));
 242       for (i = 0; i < n; ++i) {
 243         sMapA[sMapLenA].u[i] = uBuf[i];
 244       }
 245       sMapA[sMapLenA].len = n;
 246       ++sMapLenA;
 247     }
 248     if (u0 >= len) {
 249       len = u0 + 1;
 250     }
 251   }
 252   fclose(f);
 253
 254   ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
 255                               sMapA, sMapLenA, sMapSizeA);
 256   gfree(mapA);
 257   gfree(uBuf);
 258   return ctu;
 259 }
 260
 261 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
 262   return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
 263 }
 264
 265 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GooString *buf, int nBits) {
 266   CharCodeToUnicode *ctu;
 267   char *p;
 268
 269   ctu = new CharCodeToUnicode(NULL);
 270   p = buf->getCString();
 271   ctu->parseCMap1(&getCharFromString, &p, nBits);
 272   return ctu;
 273 }
 274
 275 CharCodeToUnicode *CharCodeToUnicode::parseCMapFromFile(GooString *fileName,
 276   int nBits) {
 277   CharCodeToUnicode *ctu;
 278   FILE *f;
 279
 280   ctu = new CharCodeToUnicode(NULL);
 281   if ((f = globalParams->findToUnicodeFile(fileName))) {
 282     ctu->parseCMap1(&getCharFromFile, f, nBits);
 283     fclose(f);
 284   } else {
 285     error(errSyntaxError, -1, "Couldn't find ToUnicode CMap file for '{0:t}'",
 286           fileName);
 287   }
 288   return ctu;
 289 }
 290
 291 void CharCodeToUnicode::mergeCMap(GooString *buf, int nBits) {
 292   char *p;
 293
 294   p = buf->getCString();
 295   parseCMap1(&getCharFromString, &p, nBits);
 296 }
 297
 298 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
 299                                    int nBits) {
 300   PSTokenizer *pst;
 301   char tok1[256], tok2[256], tok3[256];
 302   int n1, n2, n3;
 303   CharCode i;
 304   CharCode maxCode, code1, code2;
 305   GooString *name;
 306   FILE *f;
 307
 308   maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
 309   pst = new PSTokenizer(getCharFunc, data);
 310   pst->getToken(tok1, sizeof(tok1), &n1);
 311   while (pst->getToken(tok2, sizeof(tok2), &n2)) {
 312     if (!strcmp(tok2, "usecmap")) {
 313       if (tok1[0] == '/') {
 314         name = new GooString(tok1 + 1);
 315         if ((f = globalParams->findToUnicodeFile(name))) {
 316           parseCMap1(&getCharFromFile, f, nBits);
 317           fclose(f);
 318         } else {
 319           error(errSyntaxError, -1, "Couldn't find ToUnicode CMap file for '{0:t}'",
 320                 name);
 321         }
 322         delete name;
 323       }
 324       pst->getToken(tok1, sizeof(tok1), &n1);
 325     } else if (!strcmp(tok2, "beginbfchar")) {
 326       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 327         if (!strcmp(tok1, "endbfchar")) {
 328           break;
 329         }
 330         if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 331             !strcmp(tok2, "endbfchar")) {
 332           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 333           break;
 334         }
 335         if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
 336               tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 337           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 338           continue;
 339         }
 340         tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 341         if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
 342           error(errSyntaxWarning, -1, "Illegal entry in bfchar block in ToUnicode CMap");
 343           continue;
 344         }
 345         if (code1 > maxCode) {
 346           error(errSyntaxWarning, -1,
 347                 "Invalid entry in bfchar block in ToUnicode CMap");
 348         }
 349         addMapping(code1, tok2 + 1, n2 - 2, 0);
 350       }
 351       pst->getToken(tok1, sizeof(tok1), &n1);
 352     } else if (!strcmp(tok2, "beginbfrange")) {
 353       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 354         if (!strcmp(tok1, "endbfrange")) {
 355           break;
 356         }
 357         if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 358             !strcmp(tok2, "endbfrange") ||
 359             !pst->getToken(tok3, sizeof(tok3), &n3) ||
 360             !strcmp(tok3, "endbfrange")) {
 361           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 362           break;
 363         }
 364         if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
 365               tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 366           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 367           continue;
 368         }
 369         tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 370         if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
 371             !parseHex(tok2 + 1, n2 - 2, &code2)) {
 372           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 373           continue;
 374         }
 375         if (code1 > maxCode || code2 > maxCode) {
 376           error(errSyntaxWarning, -1,
 377                 "Invalid entry in bfrange block in ToUnicode CMap");
 378           if (code1 > maxCode) {
 379             code1 = maxCode;
 380           }
 381           if (code2 > maxCode) {
 382             code2 = maxCode;
 383           }
 384         }
 385         if (!strcmp(tok3, "[")) {
 386           i = 0;
 387           while (pst->getToken(tok1, sizeof(tok1), &n1) &&
 388                  code1 + i <= code2) {
 389             if (!strcmp(tok1, "]")) {
 390               break;
 391             }
 392             if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
 393               tok1[n1 - 1] = '\0';
 394               addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
 395             } else {
 396               error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 397             }
 398             ++i;
 399           }
 400         } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
 401           tok3[n3 - 1] = '\0';
 402           for (i = 0; code1 <= code2; ++code1, ++i) {
 403             addMapping(code1, tok3 + 1, n3 - 2, i);
 404           }
 405
 406         } else {
 407           error(errSyntaxWarning, -1, "Illegal entry in bfrange block in ToUnicode CMap");
 408         }
 409       }
 410       pst->getToken(tok1, sizeof(tok1), &n1);
 411     } else {
 412       strcpy(tok1, tok2);
 413     }
 414   }
 415   delete pst;
 416 }
 417
 418 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
 419                                    int offset) {
 420   CharCode oldLen, i;
 421   Unicode u;
 422   int j;
 423
 424   if (code > 0xffffff) {
 425     // This is an arbitrary limit to avoid integer overflow issues.
 426     // (I've seen CMaps with mappings for <ffffffff>.)
 427     return;
 428   }
 429   if (code >= mapLen) {
 430     oldLen = mapLen;
 431     mapLen = mapLen ? 2 * mapLen : 256;
 432     if (code >= mapLen) {
 433       mapLen = (code + 256) & ~255;
 434     }
 435     if (unlikely(code >= mapLen)) {
 436       error(errSyntaxWarning, -1, "Illegal code value in CharCodeToUnicode::addMapping");
 437       return;
 438     } else {
 439       map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
 440       for (i = oldLen; i < mapLen; ++i) {
 441         map[i] = 0;
 442       }
 443     }
 444   }
 445   if (n <= 4) {
 446     if (!parseHex(uStr, n, &u)) {
 447       error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
 448       return;
 449     }
 450     map[code] = u + offset;
 451     if (!UnicodeIsValid(map[code])) {
 452       map[code] = 0xfffd;
 453     }
 454   } else {
 455     if (sMapLen >= sMapSize) {
 456       sMapSize = sMapSize + 16;
 457       sMap = (CharCodeToUnicodeString *)
 458                greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
 459     }
 460     map[code] = 0;
 461     sMap[sMapLen].c = code;
 462     int utf16Len = n / 4;
 463     Unicode *utf16 = (Unicode*)gmallocn(utf16Len, sizeof(Unicode));
 464     for (j = 0; j < utf16Len; ++j) {
 465       if (!parseHex(uStr + j*4, 4, &utf16[j])) {
 466         gfree(utf16);
 467         error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
 468         return;
 469       }
 470     }
 471     utf16[utf16Len - 1] += offset;
 472     sMap[sMapLen].len = UTF16toUCS4(utf16, utf16Len, &sMap[sMapLen].u);
 473     gfree(utf16);
 474     ++sMapLen;
 475   }
 476 }
 477
 478 CharCodeToUnicode::CharCodeToUnicode() {
 479   tag = NULL;
 480   map = NULL;
 481   mapLen = 0;
 482   sMap = NULL;
 483   sMapLen = sMapSize = 0;
 484   refCnt = 1;
 485   isIdentity = gFalse;
 486 #if MULTITHREADED
 487   gInitMutex(&mutex);
 488 #endif
 489 }
 490
 491 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA) {
 492   CharCode i;
 493
 494   tag = tagA;
 495   mapLen = 256;
 496   map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
 497   for (i = 0; i < mapLen; ++i) {
 498     map[i] = 0;
 499   }
 500   sMap = NULL;
 501   sMapLen = sMapSize = 0;
 502   refCnt = 1;
 503   isIdentity = gFalse;
 504 #if MULTITHREADED
 505   gInitMutex(&mutex);
 506 #endif
 507 }
 508
 509 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA, Unicode *mapA,
 510                                      CharCode mapLenA, GBool copyMap,
 511                                      CharCodeToUnicodeString *sMapA,
 512                                      int sMapLenA, int sMapSizeA) {
 513   tag = tagA;
 514   mapLen = mapLenA;
 515   if (copyMap) {
 516     map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
 517     memcpy(map, mapA, mapLen * sizeof(Unicode));
 518   } else {
 519     map = mapA;
 520   }
 521   sMap = sMapA;
 522   sMapLen = sMapLenA;
 523   sMapSize = sMapSizeA;
 524   refCnt = 1;
 525   isIdentity = gFalse;
 526 #if MULTITHREADED
 527   gInitMutex(&mutex);
 528 #endif
 529 }
 530
 531 CharCodeToUnicode::~CharCodeToUnicode() {
 532   if (tag) {
 533     delete tag;
 534   }
 535   gfree(map);
 536   if (sMap) {
 537     for (int i = 0; i < sMapLen; ++i) gfree(sMap[i].u);
 538     gfree(sMap);
 539   }
 540 #if MULTITHREADED
 541   gDestroyMutex(&mutex);
 542 #endif
 543 }
 544
 545 void CharCodeToUnicode::incRefCnt() {
 546 #if MULTITHREADED
 547   gLockMutex(&mutex);
 548 #endif
 549   ++refCnt;
 550 #if MULTITHREADED
 551   gUnlockMutex(&mutex);
 552 #endif
 553 }
 554
 555 void CharCodeToUnicode::decRefCnt() {
 556   GBool done;
 557
 558 #if MULTITHREADED
 559   gLockMutex(&mutex);
 560 #endif
 561   done = --refCnt == 0;
 562 #if MULTITHREADED
 563   gUnlockMutex(&mutex);
 564 #endif
 565   if (done) {
 566     delete this;
 567   }
 568 }
 569
 570 GBool CharCodeToUnicode::match(GooString *tagA) {
 571   return tag && !tag->cmp(tagA);
 572 }
 573
 574 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
 575   int i, j;
 576
 577   if (!map || isIdentity) {
 578     return;
 579   }
 580   if (len == 1) {
 581     map[c] = u[0];
 582   } else {
 583     for (i = 0; i < sMapLen; ++i) {
 584       if (sMap[i].c == c) {
 585         gfree(sMap[i].u);
 586         break;
 587       }
 588     }
 589     if (i == sMapLen) {
 590       if (sMapLen == sMapSize) {
 591         sMapSize += 8;
 592         sMap = (CharCodeToUnicodeString *)
 593                  greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
 594       }
 595       ++sMapLen;
 596     }
 597     map[c] = 0;
 598     sMap[i].c = c;
 599     sMap[i].len = len;
 600     sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
 601     for (j = 0; j < len; ++j) {
 602       if (UnicodeIsValid(u[j])) {
 603         sMap[i].u[j] = u[j];
 604       } else {
 605         sMap[i].u[j] = 0xfffd;
 606       }
 607     }
 608   }
 609 }
 610
 611 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode **u) {
 612   int i;
 613
 614   if (isIdentity) {
 615     map[0] = (Unicode)c;
 616     *u = map;
 617     return 1;
 618   }
 619   if (c >= mapLen) {
 620     return 0;
 621   }
 622   if (map[c]) {
 623     *u = &map[c];
 624     return 1;
 625   }
 626   for (i = sMapLen - 1; i >= 0; --i) { // in reverse so CMap takes precedence
 627     if (sMap[i].c == c) {
 628       *u = sMap[i].u;
 629       return sMap[i].len;
 630     }
 631   }
 632   return 0;
 633 }
 634
 635 int CharCodeToUnicode::mapToCharCode(Unicode* u, CharCode *c, int usize) {
 636   //look for charcode in map
 637   if (usize == 1 || (usize > 1 && !(*u & ~0xff))) {
 638     if (isIdentity) {
 639       *c = (CharCode) *u;
 640       return 1;
 641     }
 642     for (CharCode i=0; i<mapLen; i++) {
 643       if (map[i] == *u) {
 644         *c = i;
 645         return 1;
 646       }
 647     }
 648     *c = 'x';
 649   } else {
 650     int i, j;
 651     //for each entry in the sMap
 652     for (i=0; i<sMapLen; i++) {
 653       //if the entry's unicode length isn't the same are usize, the strings
 654       // are obviously differents
 655       if (sMap[i].len != usize) continue;
 656       //compare the string char by char
 657       for (j=0; j<sMap[i].len; j++) {
 658         if (sMap[i].u[j] != u[j]) {
 659           continue;
 660         }
 661       }
 662
 663       //we have the same strings
 664       if (j==sMap[i].len) {
 665         *c = sMap[i].c;
 666         return 1;
 667       }
 668     }
 669   }
 670   return 0;
 671 }
 672
 673 //------------------------------------------------------------------------
 674
 675 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
 676   int i;
 677
 678   size = sizeA;
 679   cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
 680   for (i = 0; i < size; ++i) {
 681     cache[i] = NULL;
 682   }
 683 }
 684
 685 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
 686   int i;
 687
 688   for (i = 0; i < size; ++i) {
 689     if (cache[i]) {
 690       cache[i]->decRefCnt();
 691     }
 692   }
 693   gfree(cache);
 694 }
 695
 696 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GooString *tag) {
 697   CharCodeToUnicode *ctu;
 698   int i, j;
 699
 700   if (cache[0] && cache[0]->match(tag)) {
 701     cache[0]->incRefCnt();
 702     return cache[0];
 703   }
 704   for (i = 1; i < size; ++i) {
 705     if (cache[i] && cache[i]->match(tag)) {
 706       ctu = cache[i];
 707       for (j = i; j >= 1; --j) {
 708         cache[j] = cache[j - 1];
 709       }
 710       cache[0] = ctu;
 711       ctu->incRefCnt();
 712       return ctu;
 713     }
 714   }
 715   return NULL;
 716 }
 717
 718 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
 719   int i;
 720
 721   if (cache[size - 1]) {
 722     cache[size - 1]->decRefCnt();
 723   }
 724   for (i = size - 1; i >= 1; --i) {
 725     cache[i] = cache[i - 1];
 726   }
 727   cache[0] = ctu;
 728   ctu->incRefCnt();
 729 }