source/libs/poppler/poppler-0.37.0/poppler/PDFDoc.cc

   1 //========================================================================
   2 //
   3 // PDFDoc.cc
   4 //
   5 // Copyright 1996-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 //========================================================================
  10 //
  11 // Modified under the Poppler project - http://poppler.freedesktop.org
  12 //
  13 // All changes made under the Poppler project to this file are licensed
  14 // under GPL version 2 or later
  15 //
  16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
  17 // Copyright (C) 2005, 2007-2009, 2011-2014 Albert Astals Cid <aacid@kde.org>
  18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
  19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
  20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
  21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
  22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
  23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
  24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
  25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
  26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
  27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
  28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
  29 // Copyright (C) 2011-2015 Thomas Freitag <Thomas.Freitag@alfa.de>
  30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
  31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
  32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
  33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
  34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
  35 //
  36 // To see a description of the changes please see the Changelog file that
  37 // came with your tarball or type make ChangeLog if you are building from git
  38 //
  39 //========================================================================
  40
  41 #include <config.h>
  42
  43 #ifdef USE_GCC_PRAGMAS
  44 #pragma implementation
  45 #endif
  46
  47 #include <ctype.h>
  48 #include <locale.h>
  49 #include <stdio.h>
  50 #include <errno.h>
  51 #include <stdlib.h>
  52 #include <stddef.h>
  53 #include <string.h>
  54 #include <time.h>
  55 #include <sys/stat.h>
  56 #include "goo/gstrtod.h"
  57 #include "goo/GooString.h"
  58 #include "goo/gfile.h"
  59 #include "poppler-config.h"
  60 #include "GlobalParams.h"
  61 #include "Page.h"
  62 #include "Catalog.h"
  63 #include "Stream.h"
  64 #include "XRef.h"
  65 #include "Linearization.h"
  66 #include "Link.h"
  67 #include "OutputDev.h"
  68 #include "Error.h"
  69 #include "ErrorCodes.h"
  70 #include "Lexer.h"
  71 #include "Parser.h"
  72 #include "SecurityHandler.h"
  73 #include "Decrypt.h"
  74 #ifndef DISABLE_OUTLINE
  75 #include "Outline.h"
  76 #endif
  77 #include "PDFDoc.h"
  78 #include "Hints.h"
  79
  80 #if MULTITHREADED
  81 #  define pdfdocLocker()   MutexLocker locker(&mutex)
  82 #else
  83 #  define pdfdocLocker()
  84 #endif
  85
  86 //------------------------------------------------------------------------
  87
  88 #define headerSearchSize 1024   // read this many bytes at beginning of
  89                                 //   file to look for '%PDF'
  90 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
  91
  92 #define linearizationSearchSize 1024    // read this many bytes at beginning of
  93                                         // file to look for linearization
  94                                         // dictionary
  95
  96 #define xrefSearchSize 1024     // read this many bytes at end of file
  97                                 //   to look for 'startxref'
  98
  99 //------------------------------------------------------------------------
 100 // PDFDoc
 101 //------------------------------------------------------------------------
 102
 103 void PDFDoc::init()
 104 {
 105 #if MULTITHREADED
 106   gInitMutex(&mutex);
 107 #endif
 108   ok = gFalse;
 109   errCode = errNone;
 110   fileName = NULL;
 111   file = NULL;
 112   str = NULL;
 113   xref = NULL;
 114   linearization = NULL;
 115   catalog = NULL;
 116   hints = NULL;
 117 #ifndef DISABLE_OUTLINE
 118   outline = NULL;
 119 #endif
 120   startXRefPos = -1;
 121   secHdlr = NULL;
 122   pageCache = NULL;
 123 }
 124
 125 PDFDoc::PDFDoc()
 126 {
 127   init();
 128 }
 129
 130 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 131                GooString *userPassword, void *guiDataA) {
 132   Object obj;
 133 #ifdef _WIN32
 134   int n, i;
 135 #endif
 136
 137   init();
 138
 139   fileName = fileNameA;
 140   guiData = guiDataA;
 141 #ifdef _WIN32
 142   n = fileName->getLength();
 143   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 144   for (i = 0; i < n; ++i) {
 145     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 146   }
 147   fileNameU[n] = L'\0';
 148 #endif
 149
 150   // try to open file
 151   file = GooFile::open(fileName);
 152   if (file == NULL) {
 153     // fopen() has failed.
 154     // Keep a copy of the errno returned by fopen so that it can be
 155     // referred to later.
 156     fopenErrno = errno;
 157     error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
 158     errCode = errOpenFile;
 159     return;
 160   }
 161
 162   // create stream
 163   obj.initNull();
 164   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 165
 166   ok = setup(ownerPassword, userPassword);
 167 }
 168
 169 #ifdef _WIN32
 170 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 171                GooString *userPassword, void *guiDataA) {
 172   OSVERSIONINFO version;
 173   Object obj;
 174   int i;
 175
 176   init();
 177
 178   guiData = guiDataA;
 179
 180   // save both Unicode and 8-bit copies of the file name
 181   fileName = new GooString();
 182   fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
 183   for (i = 0; i < fileNameLen; ++i) {
 184     fileName->append((char)fileNameA[i]);
 185     fileNameU[i] = fileNameA[i];
 186   }
 187   fileNameU[fileNameLen] = L'\0';
 188
 189   // try to open file
 190   // NB: _wfopen is only available in NT
 191   version.dwOSVersionInfoSize = sizeof(version);
 192   GetVersionEx(&version);
 193   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
 194     file = GooFile::open(fileNameU);
 195   } else {
 196     file = GooFile::open(fileName);
 197   }
 198   if (!file) {
 199     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
 200     errCode = errOpenFile;
 201     return;
 202   }
 203
 204   // create stream
 205   obj.initNull();
 206   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 207
 208   ok = setup(ownerPassword, userPassword);
 209 }
 210 #endif
 211
 212 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
 213                GooString *userPassword, void *guiDataA) {
 214 #ifdef _WIN32
 215   int n, i;
 216 #endif
 217
 218   init();
 219   guiData = guiDataA;
 220   if (strA->getFileName()) {
 221     fileName = strA->getFileName()->copy();
 222 #ifdef _WIN32
 223     n = fileName->getLength();
 224     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 225     for (i = 0; i < n; ++i) {
 226       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 227     }
 228     fileNameU[n] = L'\0';
 229 #endif
 230   } else {
 231     fileName = NULL;
 232 #ifdef _WIN32
 233     fileNameU = NULL;
 234 #endif
 235   }
 236   str = strA;
 237   ok = setup(ownerPassword, userPassword);
 238 }
 239
 240 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 241   pdfdocLocker();
 242   str->setPos(0, -1);
 243   if (str->getPos() < 0)
 244   {
 245     error(errSyntaxError, -1, "Document base stream is not seekable");
 246     return gFalse;
 247   }
 248
 249   str->reset();
 250
 251   // check footer
 252   // Adobe does not seem to enforce %%EOF, so we do the same
 253 //  if (!checkFooter()) return gFalse;
 254
 255   // check header
 256   checkHeader();
 257
 258   GBool wasReconstructed = false;
 259
 260   // read xref table
 261   xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
 262   if (!xref->isOk()) {
 263     if (wasReconstructed) {
 264       delete xref;
 265       startXRefPos = -1;
 266       xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
 267     }
 268     if (!xref->isOk()) {
 269       error(errSyntaxError, -1, "Couldn't read xref table");
 270       errCode = xref->getErrorCode();
 271       return gFalse;
 272     }
 273   }
 274
 275   // check for encryption
 276   if (!checkEncryption(ownerPassword, userPassword)) {
 277     errCode = errEncrypted;
 278     return gFalse;
 279   }
 280
 281   // read catalog
 282   catalog = new Catalog(this);
 283   if (catalog && !catalog->isOk()) {
 284     if (!wasReconstructed)
 285     {
 286       // try one more time to contruct the Catalog, maybe the problem is damaged XRef
 287       delete catalog;
 288       delete xref;
 289       xref = new XRef(str, 0, 0, NULL, true);
 290       catalog = new Catalog(this);
 291     }
 292
 293     if (catalog && !catalog->isOk()) {
 294       error(errSyntaxError, -1, "Couldn't read page catalog");
 295       errCode = errBadCatalog;
 296       return gFalse;
 297     }
 298   }
 299
 300   // done
 301   return gTrue;
 302 }
 303
 304 PDFDoc::~PDFDoc() {
 305   if (pageCache) {
 306     for (int i = 0; i < getNumPages(); i++) {
 307       if (pageCache[i]) {
 308         delete pageCache[i];
 309       }
 310     }
 311     gfree(pageCache);
 312   }
 313   delete secHdlr;
 314 #ifndef DISABLE_OUTLINE
 315   if (outline) {
 316     delete outline;
 317   }
 318 #endif
 319   if (catalog) {
 320     delete catalog;
 321   }
 322   if (xref) {
 323     delete xref;
 324   }
 325   if (hints) {
 326     delete hints;
 327   }
 328   if (linearization) {
 329     delete linearization;
 330   }
 331   if (str) {
 332     delete str;
 333   }
 334   if (file) {
 335     delete file;
 336   }
 337   if (fileName) {
 338     delete fileName;
 339   }
 340 #ifdef _WIN32
 341   if (fileNameU) {
 342     gfree(fileNameU);
 343   }
 344 #endif
 345 #if MULTITHREADED
 346   gDestroyMutex(&mutex);
 347 #endif
 348 }
 349
 350
 351 // Check for a %%EOF at the end of this stream
 352 GBool PDFDoc::checkFooter() {
 353   // we look in the last 1024 chars because Adobe does the same
 354   char *eof = new char[1025];
 355   Goffset pos = str->getPos();
 356   str->setPos(1024, -1);
 357   int i, ch;
 358   for (i = 0; i < 1024; i++)
 359   {
 360     ch = str->getChar();
 361     if (ch == EOF)
 362       break;
 363     eof[i] = ch;
 364   }
 365   eof[i] = '\0';
 366
 367   bool found = false;
 368   for (i = i - 5; i >= 0; i--) {
 369     if (strncmp (&eof[i], "%%EOF", 5) == 0) {
 370       found = true;
 371       break;
 372     }
 373   }
 374   if (!found)
 375   {
 376     error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
 377     errCode = errDamaged;
 378     delete[] eof;
 379     return gFalse;
 380   }
 381   delete[] eof;
 382   str->setPos(pos);
 383   return gTrue;
 384 }
 385
 386 // Check for a PDF header on this stream.  Skip past some garbage
 387 // if necessary.
 388 void PDFDoc::checkHeader() {
 389   char hdrBuf[headerSearchSize+1];
 390   char *p;
 391   char *tokptr;
 392   int i;
 393
 394   pdfMajorVersion = 0;
 395   pdfMinorVersion = 0;
 396   for (i = 0; i < headerSearchSize; ++i) {
 397     hdrBuf[i] = str->getChar();
 398   }
 399   hdrBuf[headerSearchSize] = '\0';
 400   for (i = 0; i < headerSearchSize - 5; ++i) {
 401     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
 402       break;
 403     }
 404   }
 405   if (i >= headerSearchSize - 5) {
 406     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 407     return;
 408   }
 409   str->moveStart(i);
 410   if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
 411     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 412     return;
 413   }
 414   sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
 415   // We don't do the version check. Don't add it back in.
 416 }
 417
 418 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
 419   Object encrypt;
 420   GBool encrypted;
 421   GBool ret;
 422
 423   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
 424   if ((encrypted = encrypt.isDict())) {
 425     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
 426       if (secHdlr->isUnencrypted()) {
 427         // no encryption
 428         ret = gTrue;
 429       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
 430         // authorization succeeded
 431         xref->setEncryption(secHdlr->getPermissionFlags(),
 432                             secHdlr->getOwnerPasswordOk(),
 433                             secHdlr->getFileKey(),
 434                             secHdlr->getFileKeyLength(),
 435                             secHdlr->getEncVersion(),
 436                             secHdlr->getEncRevision(),
 437                             secHdlr->getEncAlgorithm());
 438         ret = gTrue;
 439       } else {
 440         // authorization failed
 441         ret = gFalse;
 442       }
 443     } else {
 444       // couldn't find the matching security handler
 445       ret = gFalse;
 446     }
 447   } else {
 448     // document is not encrypted
 449     ret = gTrue;
 450   }
 451   encrypt.free();
 452   return ret;
 453 }
 454
 455 void PDFDoc::displayPage(OutputDev *out, int page,
 456                          double hDPI, double vDPI, int rotate,
 457                          GBool useMediaBox, GBool crop, GBool printing,
 458                          GBool (*abortCheckCbk)(void *data),
 459                          void *abortCheckCbkData,
 460                          GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 461                          void *annotDisplayDecideCbkData, GBool copyXRef) {
 462   if (globalParams->getPrintCommands()) {
 463     printf("***** page %d *****\n", page);
 464   }
 465
 466   if (getPage(page))
 467     getPage(page)->display(out, hDPI, vDPI,
 468                                     rotate, useMediaBox, crop, printing,
 469                                     abortCheckCbk, abortCheckCbkData,
 470                                     annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 471
 472 }
 473
 474 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
 475                           double hDPI, double vDPI, int rotate,
 476                           GBool useMediaBox, GBool crop, GBool printing,
 477                           GBool (*abortCheckCbk)(void *data),
 478                           void *abortCheckCbkData,
 479                           GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 480                           void *annotDisplayDecideCbkData) {
 481   int page;
 482
 483   for (page = firstPage; page <= lastPage; ++page) {
 484     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
 485                 abortCheckCbk, abortCheckCbkData,
 486                 annotDisplayDecideCbk, annotDisplayDecideCbkData);
 487   }
 488 }
 489
 490 void PDFDoc::displayPageSlice(OutputDev *out, int page,
 491                               double hDPI, double vDPI, int rotate,
 492                               GBool useMediaBox, GBool crop, GBool printing,
 493                               int sliceX, int sliceY, int sliceW, int sliceH,
 494                               GBool (*abortCheckCbk)(void *data),
 495                               void *abortCheckCbkData,
 496                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 497                               void *annotDisplayDecideCbkData, GBool copyXRef) {
 498   if (getPage(page))
 499     getPage(page)->displaySlice(out, hDPI, vDPI,
 500                                          rotate, useMediaBox, crop,
 501                                          sliceX, sliceY, sliceW, sliceH,
 502                                          printing,
 503                                          abortCheckCbk, abortCheckCbkData,
 504                                          annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 505 }
 506
 507 Links *PDFDoc::getLinks(int page) {
 508   Page *p = getPage(page);
 509   if (!p) {
 510     return new Links (NULL);
 511   }
 512   return p->getLinks();
 513 }
 514
 515 void PDFDoc::processLinks(OutputDev *out, int page) {
 516   if (getPage(page))
 517     getPage(page)->processLinks(out);
 518 }
 519
 520 Linearization *PDFDoc::getLinearization()
 521 {
 522   if (!linearization) {
 523     linearization = new Linearization(str);
 524   }
 525   return linearization;
 526 }
 527
 528 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
 529   if ((str->getLength()) &&
 530       (getLinearization()->getLength() == str->getLength()))
 531     return gTrue;
 532   else {
 533     if (tryingToReconstruct)
 534       return getLinearization()->getLength() > 0;
 535     else
 536       return gFalse;
 537   }
 538 }
 539
 540 static GBool
 541 get_id (GooString *encodedidstring, GooString *id) {
 542   const char *encodedid = encodedidstring->getCString();
 543   char pdfid[pdfIdLength + 1];
 544   int n;
 545
 546   if (encodedidstring->getLength() != pdfIdLength / 2)
 547     return gFalse;
 548
 549   n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
 550               encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
 551               encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
 552               encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
 553               encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
 554   if (n != pdfIdLength)
 555     return gFalse;
 556
 557   id->Set(pdfid, pdfIdLength);
 558   return gTrue;
 559 }
 560
 561 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
 562   Object obj;
 563   xref->getTrailerDict()->dictLookup ("ID", &obj);
 564
 565   if (obj.isArray() && obj.arrayGetLength() == 2) {
 566     Object obj2;
 567
 568     if (permanent_id) {
 569       if (obj.arrayGet(0, &obj2)->isString()) {
 570         if (!get_id (obj2.getString(), permanent_id)) {
 571           obj2.free();
 572           return gFalse;
 573         }
 574       } else {
 575         error(errSyntaxError, -1, "Invalid permanent ID");
 576         obj2.free();
 577         return gFalse;
 578       }
 579       obj2.free();
 580     }
 581
 582     if (update_id) {
 583       if (obj.arrayGet(1, &obj2)->isString()) {
 584         if (!get_id (obj2.getString(), update_id)) {
 585           obj2.free();
 586           return gFalse;
 587         }
 588       } else {
 589         error(errSyntaxError, -1, "Invalid update ID");
 590         obj2.free();
 591         return gFalse;
 592       }
 593       obj2.free();
 594     }
 595
 596     obj.free();
 597
 598     return gTrue;
 599   }
 600   obj.free();
 601
 602   return gFalse;
 603 }
 604
 605 Hints *PDFDoc::getHints()
 606 {
 607   if (!hints && isLinearized()) {
 608     hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
 609   }
 610
 611   return hints;
 612 }
 613
 614 int PDFDoc::savePageAs(GooString *name, int pageNo)
 615 {
 616   FILE *f;
 617   OutStream *outStr;
 618   XRef *yRef, *countRef;
 619   int rootNum = getXRef()->getNumObjects() + 1;
 620
 621   // Make sure that special flags are set, because we are going to read
 622   // all objects, including Unencrypted ones.
 623   xref->scanSpecialFlags();
 624
 625   Guchar *fileKey;
 626   CryptAlgorithm encAlgorithm;
 627   int keyLength;
 628   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 629
 630   if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
 631     error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
 632     return errOpenFile;
 633   }
 634   PDFRectangle *cropBox = NULL;
 635   if (getCatalog()->getPage(pageNo)->isCropped()) {
 636     cropBox = getCatalog()->getPage(pageNo)->getCropBox();
 637   }
 638   replacePageDict(pageNo,
 639     getCatalog()->getPage(pageNo)->getRotate(),
 640     getCatalog()->getPage(pageNo)->getMediaBox(),
 641     cropBox);
 642   Ref *refPage = getCatalog()->getPageRef(pageNo);
 643   Object page;
 644   getXRef()->fetch(refPage->num, refPage->gen, &page);
 645
 646   if (!(f = fopen(name->getCString(), "wb"))) {
 647     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 648     return errOpenFile;
 649   }
 650   outStr = new FileOutStream(f,0);
 651
 652   yRef = new XRef(getXRef()->getTrailerDict());
 653
 654   if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
 655     yRef->setEncryption(secHdlr->getPermissionFlags(),
 656       secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
 657   }
 658   countRef = new XRef();
 659   Object *trailerObj = getXRef()->getTrailerDict();
 660   if (trailerObj->isDict()) {
 661     markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 662   }
 663   yRef->add(0, 65535, 0, gFalse);
 664   writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
 665
 666   // get and mark info dict
 667   Object infoObj;
 668   getXRef()->getDocInfo(&infoObj);
 669   if (infoObj.isDict()) {
 670     Dict *infoDict = infoObj.getDict();
 671     markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 672     if (trailerObj->isDict()) {
 673       Dict *trailerDict = trailerObj->getDict();
 674       Object ref;
 675       trailerDict->lookupNF("Info", &ref);
 676       if (ref.isRef()) {
 677         yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
 678         if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
 679           yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
 680         }
 681       }
 682       ref.free();
 683     }
 684   }
 685   infoObj.free();
 686
 687   // get and mark output intents etc.
 688   Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
 689   getXRef()->getCatalog(&catObj);
 690   Dict *catDict = catObj.getDict();
 691   catDict->lookup("Pages", &pagesObj);
 692   catDict->lookupNF("AcroForm", &afObj);
 693   if (!afObj.isNull()) {
 694     markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 695     afObj.free();
 696   }
 697   Dict *pagesDict = pagesObj.getDict();
 698   pagesDict->lookup("Resources", &resourcesObj);
 699   if (resourcesObj.isDict())
 700     markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 701   markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 702
 703   Dict *pageDict = page.getDict();
 704   markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 705   pageDict->lookupNF("Annots", &annotsObj);
 706   if (!annotsObj.isNull()) {
 707     markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 708     annotsObj.free();
 709   }
 710   yRef->markUnencrypted();
 711   writePageObjects(outStr, yRef, 0);
 712
 713   yRef->add(rootNum,0,outStr->getPos(),gTrue);
 714   outStr->printf("%d 0 obj\n", rootNum);
 715   outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
 716   for (int j = 0; j < catDict->getLength(); j++) {
 717     const char *key = catDict->getKey(j);
 718     if (strcmp(key, "Type") != 0 &&
 719       strcmp(key, "Catalog") != 0 &&
 720       strcmp(key, "Pages") != 0)
 721     {
 722       if (j > 0) outStr->printf(" ");
 723       Object value; catDict->getValNF(j, &value);
 724       outStr->printf("/%s ", key);
 725       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 726       value.free();
 727     }
 728   }
 729   catObj.free();
 730   pagesObj.free();
 731   outStr->printf(">>\nendobj\n");
 732
 733   yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
 734   outStr->printf("%d 0 obj\n", rootNum + 1);
 735   outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
 736   if (resourcesObj.isDict()) {
 737     outStr->printf("/Resources ");
 738     writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 739     resourcesObj.free();
 740   }
 741   outStr->printf(">>\n");
 742   outStr->printf("endobj\n");
 743
 744   yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
 745   outStr->printf("%d 0 obj\n", rootNum + 2);
 746   outStr->printf("<< ");
 747   for (int n = 0; n < pageDict->getLength(); n++) {
 748     if (n > 0) outStr->printf(" ");
 749     const char *key = pageDict->getKey(n);
 750     Object value; pageDict->getValNF(n, &value);
 751     if (strcmp(key, "Parent") == 0) {
 752       outStr->printf("/Parent %d 0 R", rootNum + 1);
 753     } else {
 754       outStr->printf("/%s ", key);
 755       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 756     }
 757     value.free();
 758   }
 759   outStr->printf(" >>\nendobj\n");
 760   page.free();
 761
 762   Goffset uxrefOffset = outStr->getPos();
 763   Ref ref;
 764   ref.num = rootNum;
 765   ref.gen = 0;
 766   Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
 767                                         name->getCString(), uxrefOffset);
 768   writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
 769                         uxrefOffset, outStr, getXRef());
 770   delete trailerDict;
 771
 772   outStr->close();
 773   fclose(f);
 774   delete yRef;
 775   delete countRef;
 776   delete outStr;
 777
 778   return errNone;
 779 }
 780
 781 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
 782   FILE *f;
 783   OutStream *outStr;
 784   int res;
 785
 786   if (!(f = fopen(name->getCString(), "wb"))) {
 787     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 788     return errOpenFile;
 789   }
 790   outStr = new FileOutStream(f,0);
 791   res = saveAs(outStr, mode);
 792   delete outStr;
 793   fclose(f);
 794   return res;
 795 }
 796
 797 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
 798
 799   // find if we have updated objects
 800   GBool updated = gFalse;
 801   for(int i=0; i<xref->getNumObjects(); i++) {
 802     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) {
 803       updated = gTrue;
 804       break;
 805     }
 806   }
 807
 808   if (!updated && mode == writeStandard) {
 809     // simply copy the original file
 810     saveWithoutChangesAs (outStr);
 811   } else if (mode == writeForceRewrite) {
 812     saveCompleteRewrite(outStr);
 813   } else {
 814     saveIncrementalUpdate(outStr);
 815   }
 816
 817   return errNone;
 818 }
 819
 820 int PDFDoc::saveWithoutChangesAs(GooString *name) {
 821   FILE *f;
 822   OutStream *outStr;
 823   int res;
 824
 825   if (!(f = fopen(name->getCString(), "wb"))) {
 826     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 827     return errOpenFile;
 828   }
 829
 830   outStr = new FileOutStream(f,0);
 831   res = saveWithoutChangesAs(outStr);
 832   delete outStr;
 833
 834   fclose(f);
 835
 836   return res;
 837 }
 838
 839 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
 840   int c;
 841
 842   BaseStream *copyStr = str->copy();
 843   copyStr->reset();
 844   while ((c = copyStr->getChar()) != EOF) {
 845     outStr->put(c);
 846   }
 847   copyStr->close();
 848   delete copyStr;
 849
 850   return errNone;
 851 }
 852
 853 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
 854 {
 855   XRef *uxref;
 856   int c;
 857   //copy the original file
 858   BaseStream *copyStr = str->copy();
 859   copyStr->reset();
 860   while ((c = copyStr->getChar()) != EOF) {
 861     outStr->put(c);
 862   }
 863   copyStr->close();
 864   delete copyStr;
 865
 866   Guchar *fileKey;
 867   CryptAlgorithm encAlgorithm;
 868   int keyLength;
 869   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 870
 871   uxref = new XRef();
 872   uxref->add(0, 65535, 0, gFalse);
 873   xref->lock();
 874   for(int i=0; i<xref->getNumObjects(); i++) {
 875     if ((xref->getEntry(i)->type == xrefEntryFree) &&
 876         (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
 877       continue;
 878
 879     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
 880       Ref ref;
 881       ref.num = i;
 882       ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
 883       if (xref->getEntry(i)->type != xrefEntryFree) {
 884         Object obj1;
 885         xref->fetch(ref.num, ref.gen, &obj1, 1);
 886         Goffset offset = writeObjectHeader(&ref, outStr);
 887         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 888         writeObjectFooter(outStr);
 889         uxref->add(ref.num, ref.gen, offset, gTrue);
 890         obj1.free();
 891       } else {
 892         uxref->add(ref.num, ref.gen, 0, gFalse);
 893       }
 894     }
 895   }
 896   xref->unlock();
 897   if (uxref->getNumObjects() == 0) { //we have nothing to update
 898     delete uxref;
 899     return;
 900   }
 901
 902   Goffset uxrefOffset = outStr->getPos();
 903   int numobjects = xref->getNumObjects();
 904   const char *fileNameA = fileName ? fileName->getCString() : NULL;
 905   Ref rootRef, uxrefStreamRef;
 906   rootRef.num = getXRef()->getRootNum();
 907   rootRef.gen = getXRef()->getRootGen();
 908
 909   // Output a xref stream if there is a xref stream already
 910   GBool xRefStream = xref->isXRefStream();
 911
 912   if (xRefStream) {
 913     // Append an entry for the xref stream itself
 914     uxrefStreamRef.num = numobjects++;
 915     uxrefStreamRef.gen = 0;
 916     uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
 917   }
 918
 919   Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
 920   if (xRefStream) {
 921     writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
 922   } else {
 923     writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
 924   }
 925
 926   delete trailerDict;
 927   delete uxref;
 928 }
 929
 930 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 931 {
 932   // Make sure that special flags are set, because we are going to read
 933   // all objects, including Unencrypted ones.
 934   xref->scanSpecialFlags();
 935
 936   Guchar *fileKey;
 937   CryptAlgorithm encAlgorithm;
 938   int keyLength;
 939   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 940
 941   outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
 942   XRef *uxref = new XRef();
 943   uxref->add(0, 65535, 0, gFalse);
 944   xref->lock();
 945   for(int i=0; i<xref->getNumObjects(); i++) {
 946     Object obj1;
 947     Ref ref;
 948     XRefEntryType type = xref->getEntry(i)->type;
 949     if (type == xrefEntryFree) {
 950       ref.num = i;
 951       ref.gen = xref->getEntry(i)->gen;
 952       /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
 953           and we don't want the one with num=0 because it has already been added (gen = 65535)*/
 954       if (ref.gen > 0 && ref.num > 0)
 955         uxref->add(ref.num, ref.gen, 0, gFalse);
 956     } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
 957       // This entry must not be written, put a free entry instead (with incremented gen)
 958       ref.num = i;
 959       ref.gen = xref->getEntry(i)->gen + 1;
 960       uxref->add(ref.num, ref.gen, 0, gFalse);
 961     } else if (type == xrefEntryUncompressed){
 962       ref.num = i;
 963       ref.gen = xref->getEntry(i)->gen;
 964       xref->fetch(ref.num, ref.gen, &obj1, 1);
 965       Goffset offset = writeObjectHeader(&ref, outStr);
 966       // Write unencrypted objects in unencrypted form
 967       if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
 968         writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
 969       } else {
 970         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 971       }
 972       writeObjectFooter(outStr);
 973       uxref->add(ref.num, ref.gen, offset, gTrue);
 974       obj1.free();
 975     } else if (type == xrefEntryCompressed) {
 976       ref.num = i;
 977       ref.gen = 0; //compressed entries have gen == 0
 978       xref->fetch(ref.num, ref.gen, &obj1, 1);
 979       Goffset offset = writeObjectHeader(&ref, outStr);
 980       writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 981       writeObjectFooter(outStr);
 982       uxref->add(ref.num, ref.gen, offset, gTrue);
 983       obj1.free();
 984     }
 985   }
 986   xref->unlock();
 987   Goffset uxrefOffset = outStr->getPos();
 988   writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
 989                         uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
 990   delete uxref;
 991 }
 992
 993 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
 994                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
 995 {
 996   Object obj1;
 997   outStr->printf("<<");
 998   for (int i=0; i<dict->getLength(); i++) {
 999     GooString keyName(dict->getKey(i));
1000     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1001     outStr->printf("/%s ", keyNameToPrint->getCString());
1002     delete keyNameToPrint;
1003     writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1004     obj1.free();
1005   }
1006   outStr->printf(">> ");
1007 }
1008
1009 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1010 {
1011   outStr->printf("stream\r\n");
1012   str->reset();
1013   for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1014     outStr->printf("%c", c);
1015   }
1016   outStr->printf("\r\nendstream\r\n");
1017 }
1018
1019 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1020 {
1021   Object obj1;
1022   str->getDict()->lookup("Length", &obj1);
1023   if (!obj1.isInt() && !obj1.isInt64()) {
1024     error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1025     return;
1026   }
1027
1028   Goffset length;
1029   if (obj1.isInt())
1030     length = obj1.getInt();
1031   else
1032     length = obj1.getInt64();
1033   obj1.free();
1034
1035   outStr->printf("stream\r\n");
1036   str->unfilteredReset();
1037   for (Goffset i = 0; i < length; i++) {
1038     int c = str->getUnfilteredChar();
1039     if (unlikely(c == EOF)) {
1040       error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1041       break;
1042     }
1043     outStr->printf("%c", c);
1044   }
1045   str->reset();
1046   outStr->printf("\r\nendstream\r\n");
1047 }
1048
1049 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1050                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1051 {
1052   // Encrypt string if encryption is enabled
1053   GooString *sEnc = NULL;
1054   if (fileKey) {
1055     Object obj;
1056     EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1057                                            fileKey, encAlgorithm, keyLength, objNum, objGen);
1058     sEnc = new GooString();
1059     int c;
1060     enc->reset();
1061     while ((c = enc->getChar()) != EOF) {
1062       sEnc->append((char)c);
1063     }
1064
1065     delete enc;
1066     s = sEnc;
1067   }
1068
1069   // Write data
1070   if (s->hasUnicodeMarker()) {
1071     //unicode string don't necessary end with \0
1072     const char* c = s->getCString();
1073     outStr->printf("(");
1074     for(int i=0; i<s->getLength(); i++) {
1075       char unescaped = *(c+i)&0x000000ff;
1076       //escape if needed
1077       if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1078         outStr->printf("%c", '\\');
1079       outStr->printf("%c", unescaped);
1080     }
1081     outStr->printf(") ");
1082   } else {
1083     const char* c = s->getCString();
1084     outStr->printf("(");
1085     for(int i=0; i<s->getLength(); i++) {
1086       char unescaped = *(c+i)&0x000000ff;
1087       //escape if needed
1088       if (unescaped == '\r')
1089         outStr->printf("\\r");
1090       else if (unescaped == '\n')
1091         outStr->printf("\\n");
1092       else {
1093         if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1094           outStr->printf("%c", '\\');
1095         }
1096         outStr->printf("%c", unescaped);
1097       }
1098     }
1099     outStr->printf(") ");
1100   }
1101
1102   delete sEnc;
1103 }
1104
1105 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1106 {
1107   Goffset offset = outStr->getPos();
1108   outStr->printf("%i %i obj ", ref->num, ref->gen);
1109   return offset;
1110 }
1111
1112 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1113                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1114 {
1115   Array *array;
1116   Object obj1;
1117   Goffset tmp;
1118
1119   switch (obj->getType()) {
1120     case objBool:
1121       outStr->printf("%s ", obj->getBool()?"true":"false");
1122       break;
1123     case objInt:
1124       outStr->printf("%i ", obj->getInt());
1125       break;
1126     case objInt64:
1127       outStr->printf("%lli ", obj->getInt64());
1128       break;
1129     case objReal:
1130     {
1131       GooString s;
1132       s.appendf("{0:.10g}", obj->getReal());
1133       outStr->printf("%s ", s.getCString());
1134       break;
1135     }
1136     case objString:
1137       writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1138       break;
1139     case objName:
1140     {
1141       GooString name(obj->getName());
1142       GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1143       outStr->printf("/%s ", nameToPrint->getCString());
1144       delete nameToPrint;
1145       break;
1146     }
1147     case objNull:
1148       outStr->printf( "null ");
1149       break;
1150     case objArray:
1151       array = obj->getArray();
1152       outStr->printf("[");
1153       for (int i=0; i<array->getLength(); i++) {
1154         writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1155         obj1.free();
1156       }
1157       outStr->printf("] ");
1158       break;
1159     case objDict:
1160       writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1161       break;
1162     case objStream:
1163       {
1164         //We can't modify stream with the current implementation (no write functions in Stream API)
1165         // => the only type of streams which that have been modified are internal streams (=strWeird)
1166         Stream *stream = obj->getStream();
1167         if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1168           //we write the stream unencoded => TODO: write stream encoder
1169
1170           // Encrypt stream
1171           EncryptStream *encStream = NULL;
1172           GBool removeFilter = gTrue;
1173           if (stream->getKind() == strWeird && fileKey) {
1174             Object filter;
1175             stream->getDict()->lookup("Filter", &filter);
1176             if (!filter.isName("Crypt")) {
1177               if (filter.isArray()) {
1178                 for (int i = 0; i < filter.arrayGetLength(); i++) {
1179                   Object filterEle;
1180                   filter.arrayGet(i, &filterEle);
1181                   if (filterEle.isName("Crypt")) {
1182                     filterEle.free();
1183                     removeFilter = gFalse;
1184                     break;
1185                   }
1186                   filterEle.free();
1187                 }
1188                 if (removeFilter) {
1189                   encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1190                   encStream->setAutoDelete(gFalse);
1191                   stream = encStream;
1192                 }
1193               } else {
1194                 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1195                 encStream->setAutoDelete(gFalse);
1196                 stream = encStream;
1197               }
1198             } else {
1199               removeFilter = gFalse;
1200             }
1201             filter.free();
1202           } else if (fileKey != NULL) { // Encrypt stream
1203             encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1204             encStream->setAutoDelete(gFalse);
1205             stream = encStream;
1206           }
1207
1208           stream->reset();
1209           //recalculate stream length
1210           tmp = 0;
1211           for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1212             tmp++;
1213           }
1214           obj1.initInt64(tmp);
1215           stream->getDict()->set("Length", &obj1);
1216
1217           //Remove Stream encoding
1218           if (removeFilter) {
1219             stream->getDict()->remove("Filter");
1220           }
1221           stream->getDict()->remove("DecodeParms");
1222
1223           writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1224           writeStream (stream,outStr);
1225           delete encStream;
1226           obj1.free();
1227         } else {
1228           //raw stream copy
1229           FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1230           if (fs) {
1231             BaseStream *bs = fs->getBaseStream();
1232             if (bs) {
1233               Goffset streamEnd;
1234                 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1235                   Object val;
1236                   val.initInt64(streamEnd - bs->getStart());
1237                   stream->getDict()->set("Length", &val);
1238                 }
1239               }
1240           }
1241           writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1242           writeRawStream (stream, outStr);
1243         }
1244         break;
1245       }
1246     case objRef:
1247       outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1248       break;
1249     case objCmd:
1250       outStr->printf("%s\n", obj->getCmd());
1251       break;
1252     case objError:
1253       outStr->printf("error\r\n");
1254       break;
1255     case objEOF:
1256       outStr->printf("eof\r\n");
1257       break;
1258     case objNone:
1259       outStr->printf("none\r\n");
1260       break;
1261     default:
1262       error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1263       break;
1264   }
1265 }
1266
1267 void PDFDoc::writeObjectFooter (OutStream* outStr)
1268 {
1269   outStr->printf("endobj\r\n");
1270 }
1271
1272 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1273                                 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1274 {
1275   Dict *trailerDict = new Dict(xRef);
1276   Object obj1;
1277   obj1.initInt(uxrefSize);
1278   trailerDict->set("Size", &obj1);
1279   obj1.free();
1280
1281   //build a new ID, as recommended in the reference, uses:
1282   // - current time
1283   // - file name
1284   // - file size
1285   // - values of entry in information dictionnary
1286   GooString message;
1287   char buffer[256];
1288   sprintf(buffer, "%i", (int)time(NULL));
1289   message.append(buffer);
1290
1291   if (fileName)
1292     message.append(fileName);
1293
1294   sprintf(buffer, "%lli", (long long)fileSize);
1295   message.append(buffer);
1296
1297   //info dict -- only use text string
1298   if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1299     for(int i=0; i<obj1.getDict()->getLength(); i++) {
1300       Object obj2;
1301       obj1.getDict()->getVal(i, &obj2);
1302       if (obj2.isString()) {
1303         message.append(obj2.getString());
1304       }
1305       obj2.free();
1306     }
1307   }
1308   obj1.free();
1309
1310   GBool hasEncrypt = gFalse;
1311   if (!xRef->getTrailerDict()->isNone()) {
1312     Object obj2;
1313     xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1314     if (!obj2.isNull()) {
1315       trailerDict->set("Encrypt", &obj2);
1316       hasEncrypt = gTrue;
1317       obj2.free();
1318     }
1319   }
1320
1321   //calculate md5 digest
1322   Guchar digest[16];
1323   md5((Guchar*)message.getCString(), message.getLength(), digest);
1324   obj1.initString(new GooString((const char*)digest, 16));
1325
1326   //create ID array
1327   Object obj2,obj3,obj5;
1328   obj2.initArray(xRef);
1329
1330   // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1331   if (incrUpdate || hasEncrypt) {
1332     Object obj4;
1333     //only update the second part of the array
1334     xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1335     if (!obj4.isArray()) {
1336       error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1337     } else {
1338       //Get the first part of the ID
1339       obj4.arrayGet(0,&obj3);
1340
1341       obj2.arrayAdd(&obj3);
1342       obj2.arrayAdd(&obj1);
1343       trailerDict->set("ID", &obj2);
1344     }
1345     obj4.free();
1346   } else {
1347     //new file => same values for the two identifiers
1348     obj2.arrayAdd(&obj1);
1349     obj1.initString(new GooString((const char*)digest, 16));
1350     obj2.arrayAdd(&obj1);
1351     trailerDict->set("ID", &obj2);
1352   }
1353
1354   obj1.initRef(root->num, root->gen);
1355   trailerDict->set("Root", &obj1);
1356
1357   if (incrUpdate) {
1358     obj1.initInt64(startxRef);
1359     trailerDict->set("Prev", &obj1);
1360   }
1361
1362   if (!xRef->getTrailerDict()->isNone()) {
1363     xRef->getDocInfoNF(&obj5);
1364     if (!obj5.isNull()) {
1365       trailerDict->set("Info", &obj5);
1366     }
1367   }
1368
1369   return trailerDict;
1370 }
1371
1372 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1373 {
1374   uxref->writeTableToFile( outStr, writeAllEntries );
1375   outStr->printf( "trailer\r\n");
1376   writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1377   outStr->printf( "\r\nstartxref\r\n");
1378   outStr->printf( "%lli\r\n", uxrefOffset);
1379   outStr->printf( "%%%%EOF\r\n");
1380 }
1381
1382 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1383 {
1384   GooString stmData;
1385
1386   // Fill stmData and some trailerDict fields
1387   uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1388
1389   // Create XRef stream object and write it
1390   Object obj1;
1391   MemStream *mStream = new MemStream( stmData.getCString(), 0,
1392                                       stmData.getLength(), obj1.initDict(trailerDict) );
1393   writeObjectHeader(uxrefStreamRef, outStr);
1394   writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1395   writeObjectFooter(outStr);
1396   obj1.free();
1397
1398   outStr->printf( "startxref\r\n");
1399   outStr->printf( "%lli\r\n", uxrefOffset);
1400   outStr->printf( "%%%%EOF\r\n");
1401 }
1402
1403 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1404                                    int uxrefSize, OutStream* outStr, GBool incrUpdate)
1405 {
1406   const char *fileNameA = fileName ? fileName->getCString() : NULL;
1407   // file size (doesn't include the trailer)
1408   unsigned int fileSize = 0;
1409   int c;
1410   str->reset();
1411   while ((c = str->getChar()) != EOF) {
1412     fileSize++;
1413   }
1414   str->close();
1415   Ref ref;
1416   ref.num = getXRef()->getRootNum();
1417   ref.gen = getXRef()->getRootGen();
1418   Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1419                                          getXRef(), fileNameA, fileSize);
1420   writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1421   delete trailerDict;
1422 }
1423
1424 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1425 {
1426    outStr->printf("%%PDF-%d.%d\n", major, minor);
1427    outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1428 }
1429
1430 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1431 {
1432   Object obj1;
1433   for (int i=0; i<dict->getLength(); i++) {
1434     const char *key = dict->getKey(i);
1435     if (strcmp(key, "Annots") != 0) {
1436       markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1437     } else {
1438       Object annotsObj;
1439       dict->getValNF(i, &annotsObj);
1440       if (!annotsObj.isNull()) {
1441         markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1442         annotsObj.free();
1443       }
1444     }
1445     obj1.free();
1446   }
1447 }
1448
1449 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1450 {
1451   Array *array;
1452   Object obj1;
1453
1454   switch (obj->getType()) {
1455     case objArray:
1456       array = obj->getArray();
1457       for (int i=0; i<array->getLength(); i++) {
1458         markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1459         obj1.free();
1460       }
1461       break;
1462     case objDict:
1463       markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1464       break;
1465     case objStream:
1466       {
1467         Stream *stream = obj->getStream();
1468         markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1469       }
1470       break;
1471     case objRef:
1472       {
1473         if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1474           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1475             return;  // already marked as free => should be replaced
1476           }
1477           xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1478           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1479             xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1480           }
1481         }
1482         if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1483             countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1484         {
1485           countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1486         } else {
1487           XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1488           entry->gen++;
1489           if (entry->gen > 9)
1490             break;
1491         }
1492         Object obj1;
1493         getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1494         markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1495         obj1.free();
1496       }
1497       break;
1498     default:
1499       break;
1500   }
1501 }
1502
1503 void PDFDoc::replacePageDict(int pageNo, int rotate,
1504                              PDFRectangle *mediaBox,
1505                              PDFRectangle *cropBox)
1506 {
1507   Ref *refPage = getCatalog()->getPageRef(pageNo);
1508   Object page;
1509   getXRef()->fetch(refPage->num, refPage->gen, &page);
1510   Dict *pageDict = page.getDict();
1511   pageDict->remove("MediaBoxssdf");
1512   pageDict->remove("MediaBox");
1513   pageDict->remove("CropBox");
1514   pageDict->remove("ArtBox");
1515   pageDict->remove("BleedBox");
1516   pageDict->remove("TrimBox");
1517   pageDict->remove("Rotate");
1518   Object mediaBoxObj;
1519   mediaBoxObj.initArray(getXRef());
1520   Object murx;
1521   murx.initReal(mediaBox->x1);
1522   Object mury;
1523   mury.initReal(mediaBox->y1);
1524   Object mllx;
1525   mllx.initReal(mediaBox->x2);
1526   Object mlly;
1527   mlly.initReal(mediaBox->y2);
1528   mediaBoxObj.arrayAdd(&murx);
1529   mediaBoxObj.arrayAdd(&mury);
1530   mediaBoxObj.arrayAdd(&mllx);
1531   mediaBoxObj.arrayAdd(&mlly);
1532   pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1533   if (cropBox != NULL) {
1534     Object cropBoxObj;
1535     cropBoxObj.initArray(getXRef());
1536     Object curx;
1537     curx.initReal(cropBox->x1);
1538     Object cury;
1539     cury.initReal(cropBox->y1);
1540     Object cllx;
1541     cllx.initReal(cropBox->x2);
1542     Object clly;
1543     clly.initReal(cropBox->y2);
1544     cropBoxObj.arrayAdd(&curx);
1545     cropBoxObj.arrayAdd(&cury);
1546     cropBoxObj.arrayAdd(&cllx);
1547     cropBoxObj.arrayAdd(&clly);
1548     pageDict->add(copyString("CropBox"), &cropBoxObj);
1549     cropBoxObj.getArray()->incRef();
1550     pageDict->add(copyString("TrimBox"), &cropBoxObj);
1551   } else {
1552     mediaBoxObj.getArray()->incRef();
1553     pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1554   }
1555   Object rotateObj;
1556   rotateObj.initInt(rotate);
1557   pageDict->add(copyString("Rotate"), &rotateObj);
1558   getXRef()->setModifiedObject(&page, *refPage);
1559   page.free();
1560 }
1561
1562 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1563 {
1564   pageDict->remove("OpenAction");
1565   pageDict->remove("Outlines");
1566   pageDict->remove("StructTreeRoot");
1567
1568   for (int n = 0; n < pageDict->getLength(); n++) {
1569     const char *key = pageDict->getKey(n);
1570     Object value; pageDict->getValNF(n, &value);
1571     if (strcmp(key, "Parent") != 0 &&
1572               strcmp(key, "Pages") != 0 &&
1573               strcmp(key, "AcroForm") != 0 &&
1574               strcmp(key, "Annots") != 0 &&
1575               strcmp(key, "P") != 0 &&
1576         strcmp(key, "Root") != 0) {
1577       markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1578     }
1579     value.free();
1580   }
1581 }
1582
1583 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1584   Object annots;
1585   GBool modified = gFalse;
1586   annotsObj->fetch(getXRef(), &annots);
1587   if (annots.isArray()) {
1588       Array *array = annots.getArray();
1589       for (int i=array->getLength() - 1; i >= 0; i--) {
1590         Object obj1;
1591         if (array->get(i, &obj1)->isDict()) {
1592           Object type;
1593           Dict *dict = obj1.getDict();
1594           dict->lookup("Type", &type);
1595           if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1596             Object obj2;
1597             if (dict->lookupNF("P", &obj2)->isRef()) {
1598               if (obj2.getRef().num == oldPageNum) {
1599                 Object obj3;
1600                 array->getNF(i, &obj3);
1601                 if (obj3.isRef()) {
1602                   Object *newRef = new Object();
1603                   newRef->initRef(newPageNum, 0);
1604                   dict->set("P", newRef);
1605                   getXRef()->setModifiedObject(&obj1, obj3.getRef());
1606                 }
1607                 obj3.free();
1608               } else if (obj2.getRef().num == newPageNum) {
1609                 obj1.free();
1610                 obj2.free();
1611                 type.free();
1612                 continue;
1613               } else {
1614                 Object page;
1615                 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1616                 if (page.isDict()) {
1617                   Object pagetype;
1618                   Dict *dict = page.getDict();
1619                   dict->lookup("Type", &pagetype);
1620                   if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1621                     obj1.free();
1622                     obj2.free();
1623                     type.free();
1624                     page.free();
1625                     pagetype.free();
1626                     continue;
1627                   }
1628                   pagetype.free();
1629                 }
1630                 page.free();
1631                 obj1.free();
1632                 obj2.free();
1633                 type.free();
1634                 array->remove(i);
1635                 modified = gTrue;
1636                 continue;
1637               }
1638             }
1639             obj2.free();
1640           }
1641           type.free();
1642           markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1643         }
1644         obj1.free();
1645         array->getNF(i, &obj1);
1646         if (obj1.isRef()) {
1647           if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1648             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1649               continue;  // already marked as free => should be replaced
1650             }
1651             xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1652             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1653               xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1654             }
1655           }
1656           if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1657               countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1658           {
1659             countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1660           } else {
1661             XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1662             entry->gen++;
1663           }
1664         }
1665         obj1.free();
1666       }
1667   }
1668   if (annotsObj->isRef()) {
1669     if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1670       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1671         return modified;  // already marked as free => should be replaced
1672       }
1673       xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1674       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1675         xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1676       }
1677     }
1678     if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1679         countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1680     {
1681       countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1682     } else {
1683       XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1684       entry->gen++;
1685     }
1686     getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1687   }
1688   annots.free();
1689   return modified;
1690 }
1691
1692 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1693   Object acroform;
1694   GBool modified = gFalse;
1695   afObj->fetch(getXRef(), &acroform);
1696   if (acroform.isDict()) {
1697       Dict *dict = acroform.getDict();
1698       for (int i=0; i < dict->getLength(); i++) {
1699         if (strcmp(dict->getKey(i), "Fields") == 0) {
1700           Object fields;
1701           modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1702           fields.free();
1703         } else {
1704           Object obj;
1705           markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1706           obj.free();
1707         }
1708       }
1709   }
1710   if (afObj->isRef()) {
1711     if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1712       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1713         return;  // already marked as free => should be replaced
1714       }
1715       xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1716       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1717         xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1718       }
1719     }
1720     if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1721         countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1722     {
1723       countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1724     } else {
1725       XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1726       entry->gen++;
1727     }
1728     if (modified){
1729       getXRef()->setModifiedObject(&acroform, afObj->getRef());
1730     }
1731   }
1732   acroform.free();
1733   return;
1734 }
1735
1736 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1737 {
1738   Guint objectsCount = 0; //count the number of objects in the XRef(s)
1739   Guchar *fileKey;
1740   CryptAlgorithm encAlgorithm;
1741   int keyLength;
1742   xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1743
1744   for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1745     if (xRef->getEntry(n)->type != xrefEntryFree) {
1746       Object obj;
1747       Ref ref;
1748       ref.num = n;
1749       ref.gen = xRef->getEntry(n)->gen;
1750       objectsCount++;
1751       getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1752       Goffset offset = writeObjectHeader(&ref, outStr);
1753       if (combine) {
1754         writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1755       } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1756         writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1757       } else {
1758         writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1759       }
1760       writeObjectFooter(outStr);
1761       xRef->add(ref.num, ref.gen, offset, gTrue);
1762       obj.free();
1763     }
1764   }
1765   return objectsCount;
1766 }
1767
1768 #ifndef DISABLE_OUTLINE
1769 Outline *PDFDoc::getOutline()
1770 {
1771   if (!outline) {
1772     pdfdocLocker();
1773     // read outline
1774     outline = new Outline(catalog->getOutline(), xref);
1775   }
1776
1777   return outline;
1778 }
1779 #endif
1780
1781 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1782 {
1783   PDFDoc *doc = new PDFDoc();
1784   doc->errCode = errorCode;
1785   doc->fileName = fileNameA;
1786
1787   return doc;
1788 }
1789
1790 long long PDFDoc::strToLongLong(char *s) {
1791   long long x, d;
1792   char *p;
1793
1794   x = 0;
1795   for (p = s; *p && isdigit(*p & 0xff); ++p) {
1796     d = *p - '0';
1797     if (x > (LLONG_MAX - d) / 10) {
1798       break;
1799     }
1800     x = 10 * x + d;
1801   }
1802   return x;
1803 }
1804
1805 // Read the 'startxref' position.
1806 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1807 {
1808   if (startXRefPos == -1) {
1809
1810     if (isLinearized(tryingToReconstruct)) {
1811       char buf[linearizationSearchSize+1];
1812       int c, n, i;
1813
1814       str->setPos(0);
1815       for (n = 0; n < linearizationSearchSize; ++n) {
1816         if ((c = str->getChar()) == EOF) {
1817           break;
1818         }
1819         buf[n] = c;
1820       }
1821       buf[n] = '\0';
1822
1823       // find end of first obj (linearization dictionary)
1824       startXRefPos = 0;
1825       for (i = 0; i < n; i++) {
1826         if (!strncmp("endobj", &buf[i], 6)) {
1827           i += 6;
1828           //skip whitespace
1829           while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1830           startXRefPos = i;
1831           break;
1832         }
1833       }
1834     } else {
1835       char buf[xrefSearchSize+1];
1836       char *p;
1837       int c, n, i;
1838
1839       // read last xrefSearchSize bytes
1840       int segnum = 0;
1841       int maxXRefSearch = 24576;
1842       if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1843       for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1844         str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1845         for (n = 0; n < xrefSearchSize; ++n) {
1846           if ((c = str->getChar()) == EOF) {
1847             break;
1848           }
1849           buf[n] = c;
1850         }
1851         buf[n] = '\0';
1852
1853         // find startxref
1854         for (i = n - 9; i >= 0; --i) {
1855           if (!strncmp(&buf[i], "startxref", 9)) {
1856             break;
1857           }
1858         }
1859         if (i < 0) {
1860           startXRefPos = 0;
1861         } else {
1862           for (p = &buf[i + 9]; isspace(*p); ++p);
1863           startXRefPos = strToLongLong(p);
1864           break;
1865         }
1866       }
1867     }
1868
1869   }
1870
1871   return startXRefPos;
1872 }
1873
1874 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
1875 {
1876   Guint mainXRefEntriesOffset = 0;
1877
1878   if (isLinearized(tryingToReconstruct)) {
1879     mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1880   }
1881
1882   return mainXRefEntriesOffset;
1883 }
1884
1885 int PDFDoc::getNumPages()
1886 {
1887   if (isLinearized()) {
1888     int n;
1889     if ((n = getLinearization()->getNumPages())) {
1890       return n;
1891     }
1892   }
1893
1894   return catalog->getNumPages();
1895 }
1896
1897 Page *PDFDoc::parsePage(int page)
1898 {
1899   Page *p = NULL;
1900   Object obj;
1901   Ref pageRef;
1902   Dict *pageDict;
1903
1904   pageRef.num = getHints()->getPageObjectNum(page);
1905   if (!pageRef.num) {
1906     error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
1907     return NULL;
1908   }
1909
1910   // check for bogus ref - this can happen in corrupted PDF files
1911   if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1912     error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
1913     return NULL;
1914   }
1915
1916   pageRef.gen = xref->getEntry(pageRef.num)->gen;
1917   xref->fetch(pageRef.num, pageRef.gen, &obj);
1918   if (!obj.isDict("Page")) {
1919     obj.free();
1920     error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
1921     return NULL;
1922   }
1923   pageDict = obj.getDict();
1924
1925   p = new Page(this, page, pageDict, pageRef,
1926                new PageAttrs(NULL, pageDict), catalog->getForm());
1927   obj.free();
1928
1929   return p;
1930 }
1931
1932 Page *PDFDoc::getPage(int page)
1933 {
1934   if ((page < 1) || page > getNumPages()) return NULL;
1935
1936   if (isLinearized()) {
1937     pdfdocLocker();
1938     if (!pageCache) {
1939       pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
1940       for (int i = 0; i < getNumPages(); i++) {
1941         pageCache[i] = NULL;
1942       }
1943     }
1944     if (!pageCache[page-1]) {
1945       pageCache[page-1] = parsePage(page);
1946     }
1947     if (pageCache[page-1]) {
1948        return pageCache[page-1];
1949     } else {
1950        error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
1951     }
1952   }
1953
1954   return catalog->getPage(page);
1955 }