source/libs/poppler/poppler-src/poppler/PDFDoc.cc

   1 //========================================================================
   2 //
   3 // PDFDoc.cc
   4 //
   5 // Copyright 1996-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 //========================================================================
  10 //
  11 // Modified under the Poppler project - http://poppler.freedesktop.org
  12 //
  13 // All changes made under the Poppler project to this file are licensed
  14 // under GPL version 2 or later
  15 //
  16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
  17 // Copyright (C) 2005, 2007-2009, 2011-2014 Albert Astals Cid <aacid@kde.org>
  18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
  19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
  20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
  21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
  22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
  23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
  24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
  25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
  26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
  27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
  28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
  29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
  30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
  31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
  32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
  33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
  34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
  35 //
  36 // To see a description of the changes please see the Changelog file that
  37 // came with your tarball or type make ChangeLog if you are building from git
  38 //
  39 //========================================================================
  40
  41 #include <config.h>
  42
  43 #ifdef USE_GCC_PRAGMAS
  44 #pragma implementation
  45 #endif
  46
  47 #include <ctype.h>
  48 #include <locale.h>
  49 #include <stdio.h>
  50 #include <errno.h>
  51 #include <stdlib.h>
  52 #include <stddef.h>
  53 #include <string.h>
  54 #include <time.h>
  55 #include <sys/stat.h>
  56 #include "goo/gstrtod.h"
  57 #include "goo/GooString.h"
  58 #include "goo/gfile.h"
  59 #include "poppler-config.h"
  60 #include "GlobalParams.h"
  61 #include "Page.h"
  62 #include "Catalog.h"
  63 #include "Stream.h"
  64 #include "XRef.h"
  65 #include "Linearization.h"
  66 #include "Link.h"
  67 #include "OutputDev.h"
  68 #include "Error.h"
  69 #include "ErrorCodes.h"
  70 #include "Lexer.h"
  71 #include "Parser.h"
  72 #include "SecurityHandler.h"
  73 #include "Decrypt.h"
  74 #ifndef DISABLE_OUTLINE
  75 #include "Outline.h"
  76 #endif
  77 #include "PDFDoc.h"
  78 #include "Hints.h"
  79
  80 #if MULTITHREADED
  81 #  define pdfdocLocker()   MutexLocker locker(&mutex)
  82 #else
  83 #  define pdfdocLocker()
  84 #endif
  85
  86 //------------------------------------------------------------------------
  87
  88 #define headerSearchSize 1024   // read this many bytes at beginning of
  89                                 //   file to look for '%PDF'
  90 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
  91
  92 #define linearizationSearchSize 1024    // read this many bytes at beginning of
  93                                         // file to look for linearization
  94                                         // dictionary
  95
  96 #define xrefSearchSize 1024     // read this many bytes at end of file
  97                                 //   to look for 'startxref'
  98
  99 //------------------------------------------------------------------------
 100 // PDFDoc
 101 //------------------------------------------------------------------------
 102
 103 void PDFDoc::init()
 104 {
 105 #if MULTITHREADED
 106   gInitMutex(&mutex);
 107 #endif
 108   ok = gFalse;
 109   errCode = errNone;
 110   fileName = NULL;
 111   file = NULL;
 112   str = NULL;
 113   xref = NULL;
 114   linearization = NULL;
 115   catalog = NULL;
 116   hints = NULL;
 117 #ifndef DISABLE_OUTLINE
 118   outline = NULL;
 119 #endif
 120   startXRefPos = -1;
 121   secHdlr = NULL;
 122   pageCache = NULL;
 123 }
 124
 125 PDFDoc::PDFDoc()
 126 {
 127   init();
 128 }
 129
 130 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 131                GooString *userPassword, void *guiDataA) {
 132   Object obj;
 133 #ifdef _WIN32
 134   int n, i;
 135 #endif
 136
 137   init();
 138
 139   fileName = fileNameA;
 140   guiData = guiDataA;
 141 #ifdef _WIN32
 142   n = fileName->getLength();
 143   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 144   for (i = 0; i < n; ++i) {
 145     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 146   }
 147   fileNameU[n] = L'\0';
 148 #endif
 149
 150   // try to open file
 151   file = GooFile::open(fileName);
 152   if (file == NULL) {
 153     // fopen() has failed.
 154     // Keep a copy of the errno returned by fopen so that it can be
 155     // referred to later.
 156     fopenErrno = errno;
 157     error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
 158     errCode = errOpenFile;
 159     return;
 160   }
 161
 162   // create stream
 163   obj.initNull();
 164   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 165
 166   ok = setup(ownerPassword, userPassword);
 167 }
 168
 169 #ifdef _WIN32
 170 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 171                GooString *userPassword, void *guiDataA) {
 172   OSVERSIONINFO version;
 173   Object obj;
 174   int i;
 175
 176   init();
 177
 178   guiData = guiDataA;
 179
 180   // save both Unicode and 8-bit copies of the file name
 181   fileName = new GooString();
 182   fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
 183   for (i = 0; i < fileNameLen; ++i) {
 184     fileName->append((char)fileNameA[i]);
 185     fileNameU[i] = fileNameA[i];
 186   }
 187   fileNameU[fileNameLen] = L'\0';
 188
 189   // try to open file
 190   // NB: _wfopen is only available in NT
 191   version.dwOSVersionInfoSize = sizeof(version);
 192   GetVersionEx(&version);
 193   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
 194     file = GooFile::open(fileNameU);
 195   } else {
 196     file = GooFile::open(fileName);
 197   }
 198   if (!file) {
 199     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
 200     errCode = errOpenFile;
 201     return;
 202   }
 203
 204   // create stream
 205   obj.initNull();
 206   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 207
 208   ok = setup(ownerPassword, userPassword);
 209 }
 210 #endif
 211
 212 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
 213                GooString *userPassword, void *guiDataA) {
 214 #ifdef _WIN32
 215   int n, i;
 216 #endif
 217
 218   init();
 219   guiData = guiDataA;
 220   if (strA->getFileName()) {
 221     fileName = strA->getFileName()->copy();
 222 #ifdef _WIN32
 223     n = fileName->getLength();
 224     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 225     for (i = 0; i < n; ++i) {
 226       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 227     }
 228     fileNameU[n] = L'\0';
 229 #endif
 230   } else {
 231     fileName = NULL;
 232 #ifdef _WIN32
 233     fileNameU = NULL;
 234 #endif
 235   }
 236   str = strA;
 237   ok = setup(ownerPassword, userPassword);
 238 }
 239
 240 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 241   pdfdocLocker();
 242   str->setPos(0, -1);
 243   if (str->getPos() < 0)
 244   {
 245     error(errSyntaxError, -1, "Document base stream is not seekable");
 246     return gFalse;
 247   }
 248
 249   str->reset();
 250
 251   // check footer
 252   // Adobe does not seem to enforce %%EOF, so we do the same
 253 //  if (!checkFooter()) return gFalse;
 254
 255   // check header
 256   checkHeader();
 257
 258   GBool wasReconstructed = false;
 259
 260   // read xref table
 261   xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
 262   if (!xref->isOk()) {
 263     if (wasReconstructed) {
 264       delete xref;
 265       startXRefPos = -1;
 266       xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
 267     }
 268     if (!xref->isOk()) {
 269       error(errSyntaxError, -1, "Couldn't read xref table");
 270       errCode = xref->getErrorCode();
 271       return gFalse;
 272     }
 273   }
 274
 275   // check for encryption
 276   if (!checkEncryption(ownerPassword, userPassword)) {
 277     errCode = errEncrypted;
 278     return gFalse;
 279   }
 280
 281   // read catalog
 282   catalog = new Catalog(this);
 283   if (catalog && !catalog->isOk()) {
 284     if (!wasReconstructed)
 285     {
 286       // try one more time to contruct the Catalog, maybe the problem is damaged XRef
 287       delete catalog;
 288       delete xref;
 289       xref = new XRef(str, 0, 0, NULL, true);
 290       catalog = new Catalog(this);
 291     }
 292
 293     if (catalog && !catalog->isOk()) {
 294       error(errSyntaxError, -1, "Couldn't read page catalog");
 295       errCode = errBadCatalog;
 296       return gFalse;
 297     }
 298   }
 299
 300   // done
 301   return gTrue;
 302 }
 303
 304 PDFDoc::~PDFDoc() {
 305   if (pageCache) {
 306     for (int i = 0; i < getNumPages(); i++) {
 307       if (pageCache[i]) {
 308         delete pageCache[i];
 309       }
 310     }
 311     gfree(pageCache);
 312   }
 313   delete secHdlr;
 314 #ifndef DISABLE_OUTLINE
 315   if (outline) {
 316     delete outline;
 317   }
 318 #endif
 319   if (catalog) {
 320     delete catalog;
 321   }
 322   if (xref) {
 323     delete xref;
 324   }
 325   if (hints) {
 326     delete hints;
 327   }
 328   if (linearization) {
 329     delete linearization;
 330   }
 331   if (str) {
 332     delete str;
 333   }
 334   if (file) {
 335     delete file;
 336   }
 337   if (fileName) {
 338     delete fileName;
 339   }
 340 #ifdef _WIN32
 341   if (fileNameU) {
 342     gfree(fileNameU);
 343   }
 344 #endif
 345 #if MULTITHREADED
 346   gDestroyMutex(&mutex);
 347 #endif
 348 }
 349
 350
 351 // Check for a %%EOF at the end of this stream
 352 GBool PDFDoc::checkFooter() {
 353   // we look in the last 1024 chars because Adobe does the same
 354   char *eof = new char[1025];
 355   Goffset pos = str->getPos();
 356   str->setPos(1024, -1);
 357   int i, ch;
 358   for (i = 0; i < 1024; i++)
 359   {
 360     ch = str->getChar();
 361     if (ch == EOF)
 362       break;
 363     eof[i] = ch;
 364   }
 365   eof[i] = '\0';
 366
 367   bool found = false;
 368   for (i = i - 5; i >= 0; i--) {
 369     if (strncmp (&eof[i], "%%EOF", 5) == 0) {
 370       found = true;
 371       break;
 372     }
 373   }
 374   if (!found)
 375   {
 376     error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
 377     errCode = errDamaged;
 378     delete[] eof;
 379     return gFalse;
 380   }
 381   delete[] eof;
 382   str->setPos(pos);
 383   return gTrue;
 384 }
 385
 386 // Check for a PDF header on this stream.  Skip past some garbage
 387 // if necessary.
 388 void PDFDoc::checkHeader() {
 389   char hdrBuf[headerSearchSize+1];
 390   char *p;
 391   char *tokptr;
 392   int i;
 393
 394   pdfMajorVersion = 0;
 395   pdfMinorVersion = 0;
 396   for (i = 0; i < headerSearchSize; ++i) {
 397     hdrBuf[i] = str->getChar();
 398   }
 399   hdrBuf[headerSearchSize] = '\0';
 400   for (i = 0; i < headerSearchSize - 5; ++i) {
 401     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
 402       break;
 403     }
 404   }
 405   if (i >= headerSearchSize - 5) {
 406     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 407     return;
 408   }
 409   str->moveStart(i);
 410   if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
 411     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 412     return;
 413   }
 414   sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
 415   // We don't do the version check. Don't add it back in.
 416 }
 417
 418 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
 419   Object encrypt;
 420   GBool encrypted;
 421   GBool ret;
 422
 423   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
 424   if ((encrypted = encrypt.isDict())) {
 425     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
 426       if (secHdlr->isUnencrypted()) {
 427         // no encryption
 428         ret = gTrue;
 429       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
 430         // authorization succeeded
 431         xref->setEncryption(secHdlr->getPermissionFlags(),
 432                             secHdlr->getOwnerPasswordOk(),
 433                             secHdlr->getFileKey(),
 434                             secHdlr->getFileKeyLength(),
 435                             secHdlr->getEncVersion(),
 436                             secHdlr->getEncRevision(),
 437                             secHdlr->getEncAlgorithm());
 438         ret = gTrue;
 439       } else {
 440         // authorization failed
 441         ret = gFalse;
 442       }
 443     } else {
 444       // couldn't find the matching security handler
 445       ret = gFalse;
 446     }
 447   } else {
 448     // document is not encrypted
 449     ret = gTrue;
 450   }
 451   encrypt.free();
 452   return ret;
 453 }
 454
 455 void PDFDoc::displayPage(OutputDev *out, int page,
 456                          double hDPI, double vDPI, int rotate,
 457                          GBool useMediaBox, GBool crop, GBool printing,
 458                          GBool (*abortCheckCbk)(void *data),
 459                          void *abortCheckCbkData,
 460                          GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 461                          void *annotDisplayDecideCbkData, GBool copyXRef) {
 462   if (globalParams->getPrintCommands()) {
 463     printf("***** page %d *****\n", page);
 464   }
 465
 466   if (getPage(page))
 467     getPage(page)->display(out, hDPI, vDPI,
 468                                     rotate, useMediaBox, crop, printing,
 469                                     abortCheckCbk, abortCheckCbkData,
 470                                     annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 471
 472 }
 473
 474 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
 475                           double hDPI, double vDPI, int rotate,
 476                           GBool useMediaBox, GBool crop, GBool printing,
 477                           GBool (*abortCheckCbk)(void *data),
 478                           void *abortCheckCbkData,
 479                           GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 480                           void *annotDisplayDecideCbkData) {
 481   int page;
 482
 483   for (page = firstPage; page <= lastPage; ++page) {
 484     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
 485                 abortCheckCbk, abortCheckCbkData,
 486                 annotDisplayDecideCbk, annotDisplayDecideCbkData);
 487   }
 488 }
 489
 490 void PDFDoc::displayPageSlice(OutputDev *out, int page,
 491                               double hDPI, double vDPI, int rotate,
 492                               GBool useMediaBox, GBool crop, GBool printing,
 493                               int sliceX, int sliceY, int sliceW, int sliceH,
 494                               GBool (*abortCheckCbk)(void *data),
 495                               void *abortCheckCbkData,
 496                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 497                               void *annotDisplayDecideCbkData, GBool copyXRef) {
 498   if (getPage(page))
 499     getPage(page)->displaySlice(out, hDPI, vDPI,
 500                                          rotate, useMediaBox, crop,
 501                                          sliceX, sliceY, sliceW, sliceH,
 502                                          printing,
 503                                          abortCheckCbk, abortCheckCbkData,
 504                                          annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 505 }
 506
 507 Links *PDFDoc::getLinks(int page) {
 508   Page *p = getPage(page);
 509   if (!p) {
 510     return new Links (NULL);
 511   }
 512   return p->getLinks();
 513 }
 514
 515 void PDFDoc::processLinks(OutputDev *out, int page) {
 516   if (getPage(page))
 517     getPage(page)->processLinks(out);
 518 }
 519
 520 Linearization *PDFDoc::getLinearization()
 521 {
 522   if (!linearization) {
 523     linearization = new Linearization(str);
 524   }
 525   return linearization;
 526 }
 527
 528 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
 529   if ((str->getLength()) &&
 530       (getLinearization()->getLength() == str->getLength()))
 531     return gTrue;
 532   else {
 533     if (tryingToReconstruct)
 534       return getLinearization()->getLength() > 0;
 535     else
 536       return gFalse;
 537   }
 538 }
 539
 540 static GBool
 541 get_id (GooString *encodedidstring, GooString *id) {
 542   const char *encodedid = encodedidstring->getCString();
 543   char pdfid[pdfIdLength + 1];
 544   int n;
 545
 546   if (encodedidstring->getLength() != pdfIdLength / 2)
 547     return gFalse;
 548
 549   n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
 550               encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
 551               encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
 552               encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
 553               encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
 554   if (n != pdfIdLength)
 555     return gFalse;
 556
 557   id->Set(pdfid, pdfIdLength);
 558   return gTrue;
 559 }
 560
 561 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
 562   Object obj;
 563   xref->getTrailerDict()->dictLookup ("ID", &obj);
 564
 565   if (obj.isArray() && obj.arrayGetLength() == 2) {
 566     Object obj2;
 567
 568     if (permanent_id) {
 569       if (obj.arrayGet(0, &obj2)->isString()) {
 570         if (!get_id (obj2.getString(), permanent_id)) {
 571           obj2.free();
 572           return gFalse;
 573         }
 574       } else {
 575         error(errSyntaxError, -1, "Invalid permanent ID");
 576         obj2.free();
 577         return gFalse;
 578       }
 579       obj2.free();
 580     }
 581
 582     if (update_id) {
 583       if (obj.arrayGet(1, &obj2)->isString()) {
 584         if (!get_id (obj2.getString(), update_id)) {
 585           obj2.free();
 586           return gFalse;
 587         }
 588       } else {
 589         error(errSyntaxError, -1, "Invalid update ID");
 590         obj2.free();
 591         return gFalse;
 592       }
 593       obj2.free();
 594     }
 595
 596     obj.free();
 597
 598     return gTrue;
 599   }
 600   obj.free();
 601
 602   return gFalse;
 603 }
 604
 605 Hints *PDFDoc::getHints()
 606 {
 607   if (!hints && isLinearized()) {
 608     hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
 609   }
 610
 611   return hints;
 612 }
 613
 614 int PDFDoc::savePageAs(GooString *name, int pageNo)
 615 {
 616   FILE *f;
 617   OutStream *outStr;
 618   XRef *yRef, *countRef;
 619   int rootNum = getXRef()->getNumObjects() + 1;
 620
 621   // Make sure that special flags are set, because we are going to read
 622   // all objects, including Unencrypted ones.
 623   xref->scanSpecialFlags();
 624
 625   Guchar *fileKey;
 626   CryptAlgorithm encAlgorithm;
 627   int keyLength;
 628   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 629
 630   if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
 631     error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
 632     return errOpenFile;
 633   }
 634   PDFRectangle *cropBox = NULL;
 635   if (getCatalog()->getPage(pageNo)->isCropped()) {
 636     cropBox = getCatalog()->getPage(pageNo)->getCropBox();
 637   }
 638   replacePageDict(pageNo,
 639     getCatalog()->getPage(pageNo)->getRotate(),
 640     getCatalog()->getPage(pageNo)->getMediaBox(),
 641     cropBox);
 642   Ref *refPage = getCatalog()->getPageRef(pageNo);
 643   Object page;
 644   getXRef()->fetch(refPage->num, refPage->gen, &page);
 645
 646   if (!(f = fopen(name->getCString(), "wb"))) {
 647     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 648     return errOpenFile;
 649   }
 650   outStr = new FileOutStream(f,0);
 651
 652   yRef = new XRef(getXRef()->getTrailerDict());
 653
 654   if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
 655     yRef->setEncryption(secHdlr->getPermissionFlags(),
 656       secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
 657   }
 658   countRef = new XRef();
 659   Object *trailerObj = getXRef()->getTrailerDict();
 660   if (trailerObj->isDict()) {
 661     markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 662   }
 663   yRef->add(0, 65535, 0, gFalse);
 664   writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
 665
 666   // get and mark info dict
 667   Object infoObj;
 668   getXRef()->getDocInfo(&infoObj);
 669   if (infoObj.isDict()) {
 670     Dict *infoDict = infoObj.getDict();
 671     markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 672     if (trailerObj->isDict()) {
 673       Dict *trailerDict = trailerObj->getDict();
 674       Object ref;
 675       trailerDict->lookupNF("Info", &ref);
 676       if (ref.isRef()) {
 677         yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
 678         if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
 679           yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
 680         }
 681       }
 682       ref.free();
 683     }
 684   }
 685   infoObj.free();
 686
 687   // get and mark output intents etc.
 688   Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
 689   getXRef()->getCatalog(&catObj);
 690   Dict *catDict = catObj.getDict();
 691   catDict->lookup("Pages", &pagesObj);
 692   catDict->lookupNF("AcroForm", &afObj);
 693   if (!afObj.isNull()) {
 694     markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 695     afObj.free();
 696   }
 697   Dict *pagesDict = pagesObj.getDict();
 698   pagesDict->lookup("Resources", &resourcesObj);
 699   if (resourcesObj.isDict())
 700     markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 701   markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 702
 703   Dict *pageDict = page.getDict();
 704   if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
 705     Dict *resourceDict = getCatalog()->getPage(pageNo)->getResourceDict();
 706     if (resourceDict != NULL) {
 707       resourcesObj.initDict(resourceDict);
 708       markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 709     }
 710   }
 711   markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 712   pageDict->lookupNF("Annots", &annotsObj);
 713   if (!annotsObj.isNull()) {
 714     markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 715     annotsObj.free();
 716   }
 717   yRef->markUnencrypted();
 718   writePageObjects(outStr, yRef, 0);
 719
 720   yRef->add(rootNum,0,outStr->getPos(),gTrue);
 721   outStr->printf("%d 0 obj\n", rootNum);
 722   outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
 723   for (int j = 0; j < catDict->getLength(); j++) {
 724     const char *key = catDict->getKey(j);
 725     if (strcmp(key, "Type") != 0 &&
 726       strcmp(key, "Catalog") != 0 &&
 727       strcmp(key, "Pages") != 0)
 728     {
 729       if (j > 0) outStr->printf(" ");
 730       Object value; catDict->getValNF(j, &value);
 731       outStr->printf("/%s ", key);
 732       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 733       value.free();
 734     }
 735   }
 736   catObj.free();
 737   pagesObj.free();
 738   outStr->printf(">>\nendobj\n");
 739
 740   yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
 741   outStr->printf("%d 0 obj\n", rootNum + 1);
 742   outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
 743   if (resourcesObj.isDict()) {
 744     outStr->printf("/Resources ");
 745     writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 746     resourcesObj.free();
 747   }
 748   outStr->printf(">>\n");
 749   outStr->printf("endobj\n");
 750
 751   yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
 752   outStr->printf("%d 0 obj\n", rootNum + 2);
 753   outStr->printf("<< ");
 754   for (int n = 0; n < pageDict->getLength(); n++) {
 755     if (n > 0) outStr->printf(" ");
 756     const char *key = pageDict->getKey(n);
 757     Object value; pageDict->getValNF(n, &value);
 758     if (strcmp(key, "Parent") == 0) {
 759       outStr->printf("/Parent %d 0 R", rootNum + 1);
 760     } else {
 761       outStr->printf("/%s ", key);
 762       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 763     }
 764     value.free();
 765   }
 766   outStr->printf(" >>\nendobj\n");
 767   page.free();
 768
 769   Goffset uxrefOffset = outStr->getPos();
 770   Ref ref;
 771   ref.num = rootNum;
 772   ref.gen = 0;
 773   Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
 774                                         name->getCString(), uxrefOffset);
 775   writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
 776                         uxrefOffset, outStr, getXRef());
 777   delete trailerDict;
 778
 779   outStr->close();
 780   fclose(f);
 781   delete yRef;
 782   delete countRef;
 783   delete outStr;
 784
 785   return errNone;
 786 }
 787
 788 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
 789   FILE *f;
 790   OutStream *outStr;
 791   int res;
 792
 793   if (!(f = fopen(name->getCString(), "wb"))) {
 794     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 795     return errOpenFile;
 796   }
 797   outStr = new FileOutStream(f,0);
 798   res = saveAs(outStr, mode);
 799   delete outStr;
 800   fclose(f);
 801   return res;
 802 }
 803
 804 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
 805
 806   // find if we have updated objects
 807   GBool updated = gFalse;
 808   for(int i=0; i<xref->getNumObjects(); i++) {
 809     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) {
 810       updated = gTrue;
 811       break;
 812     }
 813   }
 814
 815   if (!updated && mode == writeStandard) {
 816     // simply copy the original file
 817     saveWithoutChangesAs (outStr);
 818   } else if (mode == writeForceRewrite) {
 819     saveCompleteRewrite(outStr);
 820   } else {
 821     saveIncrementalUpdate(outStr);
 822   }
 823
 824   return errNone;
 825 }
 826
 827 int PDFDoc::saveWithoutChangesAs(GooString *name) {
 828   FILE *f;
 829   OutStream *outStr;
 830   int res;
 831
 832   if (!(f = fopen(name->getCString(), "wb"))) {
 833     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 834     return errOpenFile;
 835   }
 836
 837   outStr = new FileOutStream(f,0);
 838   res = saveWithoutChangesAs(outStr);
 839   delete outStr;
 840
 841   fclose(f);
 842
 843   return res;
 844 }
 845
 846 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
 847   int c;
 848
 849   BaseStream *copyStr = str->copy();
 850   copyStr->reset();
 851   while ((c = copyStr->getChar()) != EOF) {
 852     outStr->put(c);
 853   }
 854   copyStr->close();
 855   delete copyStr;
 856
 857   return errNone;
 858 }
 859
 860 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
 861 {
 862   XRef *uxref;
 863   int c;
 864   //copy the original file
 865   BaseStream *copyStr = str->copy();
 866   copyStr->reset();
 867   while ((c = copyStr->getChar()) != EOF) {
 868     outStr->put(c);
 869   }
 870   copyStr->close();
 871   delete copyStr;
 872
 873   Guchar *fileKey;
 874   CryptAlgorithm encAlgorithm;
 875   int keyLength;
 876   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 877
 878   uxref = new XRef();
 879   uxref->add(0, 65535, 0, gFalse);
 880   xref->lock();
 881   for(int i=0; i<xref->getNumObjects(); i++) {
 882     if ((xref->getEntry(i)->type == xrefEntryFree) &&
 883         (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
 884       continue;
 885
 886     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
 887       Ref ref;
 888       ref.num = i;
 889       ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
 890       if (xref->getEntry(i)->type != xrefEntryFree) {
 891         Object obj1;
 892         xref->fetch(ref.num, ref.gen, &obj1, 1);
 893         Goffset offset = writeObjectHeader(&ref, outStr);
 894         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 895         writeObjectFooter(outStr);
 896         uxref->add(ref.num, ref.gen, offset, gTrue);
 897         obj1.free();
 898       } else {
 899         uxref->add(ref.num, ref.gen, 0, gFalse);
 900       }
 901     }
 902   }
 903   xref->unlock();
 904   if (uxref->getNumObjects() == 0) { //we have nothing to update
 905     delete uxref;
 906     return;
 907   }
 908
 909   Goffset uxrefOffset = outStr->getPos();
 910   int numobjects = xref->getNumObjects();
 911   const char *fileNameA = fileName ? fileName->getCString() : NULL;
 912   Ref rootRef, uxrefStreamRef;
 913   rootRef.num = getXRef()->getRootNum();
 914   rootRef.gen = getXRef()->getRootGen();
 915
 916   // Output a xref stream if there is a xref stream already
 917   GBool xRefStream = xref->isXRefStream();
 918
 919   if (xRefStream) {
 920     // Append an entry for the xref stream itself
 921     uxrefStreamRef.num = numobjects++;
 922     uxrefStreamRef.gen = 0;
 923     uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
 924   }
 925
 926   Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
 927   if (xRefStream) {
 928     writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
 929   } else {
 930     writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
 931   }
 932
 933   delete trailerDict;
 934   delete uxref;
 935 }
 936
 937 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 938 {
 939   // Make sure that special flags are set, because we are going to read
 940   // all objects, including Unencrypted ones.
 941   xref->scanSpecialFlags();
 942
 943   Guchar *fileKey;
 944   CryptAlgorithm encAlgorithm;
 945   int keyLength;
 946   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 947
 948   outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
 949   XRef *uxref = new XRef();
 950   uxref->add(0, 65535, 0, gFalse);
 951   xref->lock();
 952   for(int i=0; i<xref->getNumObjects(); i++) {
 953     Object obj1;
 954     Ref ref;
 955     XRefEntryType type = xref->getEntry(i)->type;
 956     if (type == xrefEntryFree) {
 957       ref.num = i;
 958       ref.gen = xref->getEntry(i)->gen;
 959       /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
 960           and we don't want the one with num=0 because it has already been added (gen = 65535)*/
 961       if (ref.gen > 0 && ref.num > 0)
 962         uxref->add(ref.num, ref.gen, 0, gFalse);
 963     } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
 964       // This entry must not be written, put a free entry instead (with incremented gen)
 965       ref.num = i;
 966       ref.gen = xref->getEntry(i)->gen + 1;
 967       uxref->add(ref.num, ref.gen, 0, gFalse);
 968     } else if (type == xrefEntryUncompressed){
 969       ref.num = i;
 970       ref.gen = xref->getEntry(i)->gen;
 971       xref->fetch(ref.num, ref.gen, &obj1, 1);
 972       Goffset offset = writeObjectHeader(&ref, outStr);
 973       // Write unencrypted objects in unencrypted form
 974       if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
 975         writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
 976       } else {
 977         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 978       }
 979       writeObjectFooter(outStr);
 980       uxref->add(ref.num, ref.gen, offset, gTrue);
 981       obj1.free();
 982     } else if (type == xrefEntryCompressed) {
 983       ref.num = i;
 984       ref.gen = 0; //compressed entries have gen == 0
 985       xref->fetch(ref.num, ref.gen, &obj1, 1);
 986       Goffset offset = writeObjectHeader(&ref, outStr);
 987       writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
 988       writeObjectFooter(outStr);
 989       uxref->add(ref.num, ref.gen, offset, gTrue);
 990       obj1.free();
 991     }
 992   }
 993   xref->unlock();
 994   Goffset uxrefOffset = outStr->getPos();
 995   writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
 996                         uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
 997   delete uxref;
 998 }
 999
1000 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1001                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1002 {
1003   Object obj1;
1004   outStr->printf("<<");
1005   for (int i=0; i<dict->getLength(); i++) {
1006     GooString keyName(dict->getKey(i));
1007     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1008     outStr->printf("/%s ", keyNameToPrint->getCString());
1009     delete keyNameToPrint;
1010     writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1011     obj1.free();
1012   }
1013   outStr->printf(">> ");
1014 }
1015
1016 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1017 {
1018   outStr->printf("stream\r\n");
1019   str->reset();
1020   for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1021     outStr->printf("%c", c);
1022   }
1023   outStr->printf("\r\nendstream\r\n");
1024 }
1025
1026 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1027 {
1028   Object obj1;
1029   str->getDict()->lookup("Length", &obj1);
1030   if (!obj1.isInt() && !obj1.isInt64()) {
1031     error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1032     return;
1033   }
1034
1035   Goffset length;
1036   if (obj1.isInt())
1037     length = obj1.getInt();
1038   else
1039     length = obj1.getInt64();
1040   obj1.free();
1041
1042   outStr->printf("stream\r\n");
1043   str->unfilteredReset();
1044   for (Goffset i = 0; i < length; i++) {
1045     int c = str->getUnfilteredChar();
1046     if (unlikely(c == EOF)) {
1047       error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1048       break;
1049     }
1050     outStr->printf("%c", c);
1051   }
1052   str->reset();
1053   outStr->printf("\r\nendstream\r\n");
1054 }
1055
1056 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1057                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1058 {
1059   // Encrypt string if encryption is enabled
1060   GooString *sEnc = NULL;
1061   if (fileKey) {
1062     Object obj;
1063     EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1064                                            fileKey, encAlgorithm, keyLength, objNum, objGen);
1065     sEnc = new GooString();
1066     int c;
1067     enc->reset();
1068     while ((c = enc->getChar()) != EOF) {
1069       sEnc->append((char)c);
1070     }
1071
1072     delete enc;
1073     s = sEnc;
1074   }
1075
1076   // Write data
1077   if (s->hasUnicodeMarker()) {
1078     //unicode string don't necessary end with \0
1079     const char* c = s->getCString();
1080     outStr->printf("(");
1081     for(int i=0; i<s->getLength(); i++) {
1082       char unescaped = *(c+i)&0x000000ff;
1083       //escape if needed
1084       if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1085         outStr->printf("%c", '\\');
1086       outStr->printf("%c", unescaped);
1087     }
1088     outStr->printf(") ");
1089   } else {
1090     const char* c = s->getCString();
1091     outStr->printf("(");
1092     for(int i=0; i<s->getLength(); i++) {
1093       char unescaped = *(c+i)&0x000000ff;
1094       //escape if needed
1095       if (unescaped == '\r')
1096         outStr->printf("\\r");
1097       else if (unescaped == '\n')
1098         outStr->printf("\\n");
1099       else {
1100         if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1101           outStr->printf("%c", '\\');
1102         }
1103         outStr->printf("%c", unescaped);
1104       }
1105     }
1106     outStr->printf(") ");
1107   }
1108
1109   delete sEnc;
1110 }
1111
1112 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1113 {
1114   Goffset offset = outStr->getPos();
1115   outStr->printf("%i %i obj ", ref->num, ref->gen);
1116   return offset;
1117 }
1118
1119 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1120                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1121 {
1122   Array *array;
1123   Object obj1;
1124   Goffset tmp;
1125
1126   switch (obj->getType()) {
1127     case objBool:
1128       outStr->printf("%s ", obj->getBool()?"true":"false");
1129       break;
1130     case objInt:
1131       outStr->printf("%i ", obj->getInt());
1132       break;
1133     case objInt64:
1134       outStr->printf("%lli ", obj->getInt64());
1135       break;
1136     case objReal:
1137     {
1138       GooString s;
1139       s.appendf("{0:.10g}", obj->getReal());
1140       outStr->printf("%s ", s.getCString());
1141       break;
1142     }
1143     case objString:
1144       writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1145       break;
1146     case objName:
1147     {
1148       GooString name(obj->getName());
1149       GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1150       outStr->printf("/%s ", nameToPrint->getCString());
1151       delete nameToPrint;
1152       break;
1153     }
1154     case objNull:
1155       outStr->printf( "null ");
1156       break;
1157     case objArray:
1158       array = obj->getArray();
1159       outStr->printf("[");
1160       for (int i=0; i<array->getLength(); i++) {
1161         writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1162         obj1.free();
1163       }
1164       outStr->printf("] ");
1165       break;
1166     case objDict:
1167       writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1168       break;
1169     case objStream:
1170       {
1171         //We can't modify stream with the current implementation (no write functions in Stream API)
1172         // => the only type of streams which that have been modified are internal streams (=strWeird)
1173         Stream *stream = obj->getStream();
1174         if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1175           //we write the stream unencoded => TODO: write stream encoder
1176
1177           // Encrypt stream
1178           EncryptStream *encStream = NULL;
1179           GBool removeFilter = gTrue;
1180           if (stream->getKind() == strWeird && fileKey) {
1181             Object filter;
1182             stream->getDict()->lookup("Filter", &filter);
1183             if (!filter.isName("Crypt")) {
1184               if (filter.isArray()) {
1185                 for (int i = 0; i < filter.arrayGetLength(); i++) {
1186                   Object filterEle;
1187                   filter.arrayGet(i, &filterEle);
1188                   if (filterEle.isName("Crypt")) {
1189                     filterEle.free();
1190                     removeFilter = gFalse;
1191                     break;
1192                   }
1193                   filterEle.free();
1194                 }
1195                 if (removeFilter) {
1196                   encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1197                   encStream->setAutoDelete(gFalse);
1198                   stream = encStream;
1199                 }
1200               } else {
1201                 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1202                 encStream->setAutoDelete(gFalse);
1203                 stream = encStream;
1204               }
1205             } else {
1206               removeFilter = gFalse;
1207             }
1208             filter.free();
1209           } else if (fileKey != NULL) { // Encrypt stream
1210             encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1211             encStream->setAutoDelete(gFalse);
1212             stream = encStream;
1213           }
1214
1215           stream->reset();
1216           //recalculate stream length
1217           tmp = 0;
1218           for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1219             tmp++;
1220           }
1221           obj1.initInt64(tmp);
1222           stream->getDict()->set("Length", &obj1);
1223
1224           //Remove Stream encoding
1225           if (removeFilter) {
1226             stream->getDict()->remove("Filter");
1227           }
1228           stream->getDict()->remove("DecodeParms");
1229
1230           writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1231           writeStream (stream,outStr);
1232           delete encStream;
1233           obj1.free();
1234         } else {
1235           //raw stream copy
1236           FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1237           if (fs) {
1238             BaseStream *bs = fs->getBaseStream();
1239             if (bs) {
1240               Goffset streamEnd;
1241                 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1242                   Object val;
1243                   val.initInt64(streamEnd - bs->getStart());
1244                   stream->getDict()->set("Length", &val);
1245                 }
1246               }
1247           }
1248           writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1249           writeRawStream (stream, outStr);
1250         }
1251         break;
1252       }
1253     case objRef:
1254       outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1255       break;
1256     case objCmd:
1257       outStr->printf("%s\n", obj->getCmd());
1258       break;
1259     case objError:
1260       outStr->printf("error\r\n");
1261       break;
1262     case objEOF:
1263       outStr->printf("eof\r\n");
1264       break;
1265     case objNone:
1266       outStr->printf("none\r\n");
1267       break;
1268     default:
1269       error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1270       break;
1271   }
1272 }
1273
1274 void PDFDoc::writeObjectFooter (OutStream* outStr)
1275 {
1276   outStr->printf("endobj\r\n");
1277 }
1278
1279 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1280                                 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1281 {
1282   Dict *trailerDict = new Dict(xRef);
1283   Object obj1;
1284   obj1.initInt(uxrefSize);
1285   trailerDict->set("Size", &obj1);
1286   obj1.free();
1287
1288   //build a new ID, as recommended in the reference, uses:
1289   // - current time
1290   // - file name
1291   // - file size
1292   // - values of entry in information dictionnary
1293   GooString message;
1294   char buffer[256];
1295   sprintf(buffer, "%i", (int)time(NULL));
1296   message.append(buffer);
1297
1298   if (fileName)
1299     message.append(fileName);
1300
1301   sprintf(buffer, "%lli", (long long)fileSize);
1302   message.append(buffer);
1303
1304   //info dict -- only use text string
1305   if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1306     for(int i=0; i<obj1.getDict()->getLength(); i++) {
1307       Object obj2;
1308       obj1.getDict()->getVal(i, &obj2);
1309       if (obj2.isString()) {
1310         message.append(obj2.getString());
1311       }
1312       obj2.free();
1313     }
1314   }
1315   obj1.free();
1316
1317   GBool hasEncrypt = gFalse;
1318   if (!xRef->getTrailerDict()->isNone()) {
1319     Object obj2;
1320     xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1321     if (!obj2.isNull()) {
1322       trailerDict->set("Encrypt", &obj2);
1323       hasEncrypt = gTrue;
1324       obj2.free();
1325     }
1326   }
1327
1328   //calculate md5 digest
1329   Guchar digest[16];
1330   md5((Guchar*)message.getCString(), message.getLength(), digest);
1331   obj1.initString(new GooString((const char*)digest, 16));
1332
1333   //create ID array
1334   Object obj2,obj3,obj5;
1335   obj2.initArray(xRef);
1336
1337   // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1338   if (incrUpdate || hasEncrypt) {
1339     Object obj4;
1340     //only update the second part of the array
1341     xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1342     if (!obj4.isArray()) {
1343       error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1344     } else {
1345       //Get the first part of the ID
1346       obj4.arrayGet(0,&obj3);
1347
1348       obj2.arrayAdd(&obj3);
1349       obj2.arrayAdd(&obj1);
1350       trailerDict->set("ID", &obj2);
1351     }
1352     obj4.free();
1353   } else {
1354     //new file => same values for the two identifiers
1355     obj2.arrayAdd(&obj1);
1356     obj1.initString(new GooString((const char*)digest, 16));
1357     obj2.arrayAdd(&obj1);
1358     trailerDict->set("ID", &obj2);
1359   }
1360
1361   obj1.initRef(root->num, root->gen);
1362   trailerDict->set("Root", &obj1);
1363
1364   if (incrUpdate) {
1365     obj1.initInt64(startxRef);
1366     trailerDict->set("Prev", &obj1);
1367   }
1368
1369   if (!xRef->getTrailerDict()->isNone()) {
1370     xRef->getDocInfoNF(&obj5);
1371     if (!obj5.isNull()) {
1372       trailerDict->set("Info", &obj5);
1373     }
1374   }
1375
1376   return trailerDict;
1377 }
1378
1379 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1380 {
1381   uxref->writeTableToFile( outStr, writeAllEntries );
1382   outStr->printf( "trailer\r\n");
1383   writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1384   outStr->printf( "\r\nstartxref\r\n");
1385   outStr->printf( "%lli\r\n", uxrefOffset);
1386   outStr->printf( "%%%%EOF\r\n");
1387 }
1388
1389 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1390 {
1391   GooString stmData;
1392
1393   // Fill stmData and some trailerDict fields
1394   uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1395
1396   // Create XRef stream object and write it
1397   Object obj1;
1398   MemStream *mStream = new MemStream( stmData.getCString(), 0,
1399                                       stmData.getLength(), obj1.initDict(trailerDict) );
1400   writeObjectHeader(uxrefStreamRef, outStr);
1401   writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1402   writeObjectFooter(outStr);
1403   obj1.free();
1404
1405   outStr->printf( "startxref\r\n");
1406   outStr->printf( "%lli\r\n", uxrefOffset);
1407   outStr->printf( "%%%%EOF\r\n");
1408 }
1409
1410 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1411                                    int uxrefSize, OutStream* outStr, GBool incrUpdate)
1412 {
1413   const char *fileNameA = fileName ? fileName->getCString() : NULL;
1414   // file size (doesn't include the trailer)
1415   unsigned int fileSize = 0;
1416   int c;
1417   str->reset();
1418   while ((c = str->getChar()) != EOF) {
1419     fileSize++;
1420   }
1421   str->close();
1422   Ref ref;
1423   ref.num = getXRef()->getRootNum();
1424   ref.gen = getXRef()->getRootGen();
1425   Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1426                                          getXRef(), fileNameA, fileSize);
1427   writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1428   delete trailerDict;
1429 }
1430
1431 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1432 {
1433    outStr->printf("%%PDF-%d.%d\n", major, minor);
1434    outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1435 }
1436
1437 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1438 {
1439   Object obj1;
1440   for (int i=0; i<dict->getLength(); i++) {
1441     const char *key = dict->getKey(i);
1442     if (strcmp(key, "Annots") != 0) {
1443       markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1444     } else {
1445       Object annotsObj;
1446       dict->getValNF(i, &annotsObj);
1447       if (!annotsObj.isNull()) {
1448         markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1449         annotsObj.free();
1450       }
1451     }
1452     obj1.free();
1453   }
1454 }
1455
1456 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1457 {
1458   Array *array;
1459   Object obj1;
1460
1461   switch (obj->getType()) {
1462     case objArray:
1463       array = obj->getArray();
1464       for (int i=0; i<array->getLength(); i++) {
1465         markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1466         obj1.free();
1467       }
1468       break;
1469     case objDict:
1470       markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1471       break;
1472     case objStream:
1473       {
1474         Stream *stream = obj->getStream();
1475         markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1476       }
1477       break;
1478     case objRef:
1479       {
1480         if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1481           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1482             return;  // already marked as free => should be replaced
1483           }
1484           xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1485           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1486             xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1487           }
1488         }
1489         if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1490             countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1491         {
1492           countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1493         } else {
1494           XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1495           entry->gen++;
1496           if (entry->gen > 9)
1497             break;
1498         }
1499         Object obj1;
1500         getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1501         markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1502         obj1.free();
1503       }
1504       break;
1505     default:
1506       break;
1507   }
1508 }
1509
1510 void PDFDoc::replacePageDict(int pageNo, int rotate,
1511                              PDFRectangle *mediaBox,
1512                              PDFRectangle *cropBox)
1513 {
1514   Ref *refPage = getCatalog()->getPageRef(pageNo);
1515   Object page;
1516   getXRef()->fetch(refPage->num, refPage->gen, &page);
1517   Dict *pageDict = page.getDict();
1518   pageDict->remove("MediaBoxssdf");
1519   pageDict->remove("MediaBox");
1520   pageDict->remove("CropBox");
1521   pageDict->remove("ArtBox");
1522   pageDict->remove("BleedBox");
1523   pageDict->remove("TrimBox");
1524   pageDict->remove("Rotate");
1525   Object mediaBoxObj;
1526   mediaBoxObj.initArray(getXRef());
1527   Object murx;
1528   murx.initReal(mediaBox->x1);
1529   Object mury;
1530   mury.initReal(mediaBox->y1);
1531   Object mllx;
1532   mllx.initReal(mediaBox->x2);
1533   Object mlly;
1534   mlly.initReal(mediaBox->y2);
1535   mediaBoxObj.arrayAdd(&murx);
1536   mediaBoxObj.arrayAdd(&mury);
1537   mediaBoxObj.arrayAdd(&mllx);
1538   mediaBoxObj.arrayAdd(&mlly);
1539   pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1540   if (cropBox != NULL) {
1541     Object cropBoxObj;
1542     cropBoxObj.initArray(getXRef());
1543     Object curx;
1544     curx.initReal(cropBox->x1);
1545     Object cury;
1546     cury.initReal(cropBox->y1);
1547     Object cllx;
1548     cllx.initReal(cropBox->x2);
1549     Object clly;
1550     clly.initReal(cropBox->y2);
1551     cropBoxObj.arrayAdd(&curx);
1552     cropBoxObj.arrayAdd(&cury);
1553     cropBoxObj.arrayAdd(&cllx);
1554     cropBoxObj.arrayAdd(&clly);
1555     pageDict->add(copyString("CropBox"), &cropBoxObj);
1556     cropBoxObj.getArray()->incRef();
1557     pageDict->add(copyString("TrimBox"), &cropBoxObj);
1558   } else {
1559     mediaBoxObj.getArray()->incRef();
1560     pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1561   }
1562   Object rotateObj;
1563   rotateObj.initInt(rotate);
1564   pageDict->add(copyString("Rotate"), &rotateObj);
1565   getXRef()->setModifiedObject(&page, *refPage);
1566   page.free();
1567 }
1568
1569 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1570 {
1571   pageDict->remove("OpenAction");
1572   pageDict->remove("Outlines");
1573   pageDict->remove("StructTreeRoot");
1574
1575   for (int n = 0; n < pageDict->getLength(); n++) {
1576     const char *key = pageDict->getKey(n);
1577     Object value; pageDict->getValNF(n, &value);
1578     if (strcmp(key, "Parent") != 0 &&
1579               strcmp(key, "Pages") != 0 &&
1580               strcmp(key, "AcroForm") != 0 &&
1581               strcmp(key, "Annots") != 0 &&
1582               strcmp(key, "P") != 0 &&
1583         strcmp(key, "Root") != 0) {
1584       markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1585     }
1586     value.free();
1587   }
1588 }
1589
1590 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1591   Object annots;
1592   GBool modified = gFalse;
1593   annotsObj->fetch(getXRef(), &annots);
1594   if (annots.isArray()) {
1595       Array *array = annots.getArray();
1596       for (int i=array->getLength() - 1; i >= 0; i--) {
1597         Object obj1;
1598         if (array->get(i, &obj1)->isDict()) {
1599           Object type;
1600           Dict *dict = obj1.getDict();
1601           dict->lookup("Type", &type);
1602           if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1603             Object obj2;
1604             if (dict->lookupNF("P", &obj2)->isRef()) {
1605               if (obj2.getRef().num == oldPageNum) {
1606                 Object obj3;
1607                 array->getNF(i, &obj3);
1608                 if (obj3.isRef()) {
1609                   Object *newRef = new Object();
1610                   newRef->initRef(newPageNum, 0);
1611                   dict->set("P", newRef);
1612                   getXRef()->setModifiedObject(&obj1, obj3.getRef());
1613                 }
1614                 obj3.free();
1615               } else if (obj2.getRef().num == newPageNum) {
1616                 obj1.free();
1617                 obj2.free();
1618                 type.free();
1619                 continue;
1620               } else {
1621                 Object page;
1622                 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1623                 if (page.isDict()) {
1624                   Object pagetype;
1625                   Dict *dict = page.getDict();
1626                   dict->lookup("Type", &pagetype);
1627                   if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1628                     obj1.free();
1629                     obj2.free();
1630                     type.free();
1631                     page.free();
1632                     pagetype.free();
1633                     continue;
1634                   }
1635                   pagetype.free();
1636                 }
1637                 page.free();
1638                 obj1.free();
1639                 obj2.free();
1640                 type.free();
1641                 array->remove(i);
1642                 modified = gTrue;
1643                 continue;
1644               }
1645             }
1646             obj2.free();
1647           }
1648           type.free();
1649           markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1650         }
1651         obj1.free();
1652         array->getNF(i, &obj1);
1653         if (obj1.isRef()) {
1654           if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1655             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1656               continue;  // already marked as free => should be replaced
1657             }
1658             xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1659             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1660               xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1661             }
1662           }
1663           if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1664               countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1665           {
1666             countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1667           } else {
1668             XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1669             entry->gen++;
1670           }
1671         }
1672         obj1.free();
1673       }
1674   }
1675   if (annotsObj->isRef()) {
1676     if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1677       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1678         return modified;  // already marked as free => should be replaced
1679       }
1680       xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1681       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1682         xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1683       }
1684     }
1685     if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1686         countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1687     {
1688       countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1689     } else {
1690       XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1691       entry->gen++;
1692     }
1693     getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1694   }
1695   annots.free();
1696   return modified;
1697 }
1698
1699 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1700   Object acroform;
1701   GBool modified = gFalse;
1702   afObj->fetch(getXRef(), &acroform);
1703   if (acroform.isDict()) {
1704       Dict *dict = acroform.getDict();
1705       for (int i=0; i < dict->getLength(); i++) {
1706         if (strcmp(dict->getKey(i), "Fields") == 0) {
1707           Object fields;
1708           modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1709           fields.free();
1710         } else {
1711           Object obj;
1712           markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1713           obj.free();
1714         }
1715       }
1716   }
1717   if (afObj->isRef()) {
1718     if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1719       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1720         return;  // already marked as free => should be replaced
1721       }
1722       xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1723       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1724         xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1725       }
1726     }
1727     if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1728         countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1729     {
1730       countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1731     } else {
1732       XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1733       entry->gen++;
1734     }
1735     if (modified){
1736       getXRef()->setModifiedObject(&acroform, afObj->getRef());
1737     }
1738   }
1739   acroform.free();
1740   return;
1741 }
1742
1743 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1744 {
1745   Guint objectsCount = 0; //count the number of objects in the XRef(s)
1746   Guchar *fileKey;
1747   CryptAlgorithm encAlgorithm;
1748   int keyLength;
1749   xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1750
1751   for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1752     if (xRef->getEntry(n)->type != xrefEntryFree) {
1753       Object obj;
1754       Ref ref;
1755       ref.num = n;
1756       ref.gen = xRef->getEntry(n)->gen;
1757       objectsCount++;
1758       getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1759       Goffset offset = writeObjectHeader(&ref, outStr);
1760       if (combine) {
1761         writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1762       } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1763         writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1764       } else {
1765         writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1766       }
1767       writeObjectFooter(outStr);
1768       xRef->add(ref.num, ref.gen, offset, gTrue);
1769       obj.free();
1770     }
1771   }
1772   return objectsCount;
1773 }
1774
1775 #ifndef DISABLE_OUTLINE
1776 Outline *PDFDoc::getOutline()
1777 {
1778   if (!outline) {
1779     pdfdocLocker();
1780     // read outline
1781     outline = new Outline(catalog->getOutline(), xref);
1782   }
1783
1784   return outline;
1785 }
1786 #endif
1787
1788 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1789 {
1790   PDFDoc *doc = new PDFDoc();
1791   doc->errCode = errorCode;
1792   doc->fileName = fileNameA;
1793
1794   return doc;
1795 }
1796
1797 long long PDFDoc::strToLongLong(char *s) {
1798   long long x, d;
1799   char *p;
1800
1801   x = 0;
1802   for (p = s; *p && isdigit(*p & 0xff); ++p) {
1803     d = *p - '0';
1804     if (x > (LLONG_MAX - d) / 10) {
1805       break;
1806     }
1807     x = 10 * x + d;
1808   }
1809   return x;
1810 }
1811
1812 // Read the 'startxref' position.
1813 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1814 {
1815   if (startXRefPos == -1) {
1816
1817     if (isLinearized(tryingToReconstruct)) {
1818       char buf[linearizationSearchSize+1];
1819       int c, n, i;
1820
1821       str->setPos(0);
1822       for (n = 0; n < linearizationSearchSize; ++n) {
1823         if ((c = str->getChar()) == EOF) {
1824           break;
1825         }
1826         buf[n] = c;
1827       }
1828       buf[n] = '\0';
1829
1830       // find end of first obj (linearization dictionary)
1831       startXRefPos = 0;
1832       for (i = 0; i < n; i++) {
1833         if (!strncmp("endobj", &buf[i], 6)) {
1834           i += 6;
1835           //skip whitespace
1836           while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1837           startXRefPos = i;
1838           break;
1839         }
1840       }
1841     } else {
1842       char buf[xrefSearchSize+1];
1843       char *p;
1844       int c, n, i;
1845
1846       // read last xrefSearchSize bytes
1847       int segnum = 0;
1848       int maxXRefSearch = 24576;
1849       if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1850       for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1851         str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1852         for (n = 0; n < xrefSearchSize; ++n) {
1853           if ((c = str->getChar()) == EOF) {
1854             break;
1855           }
1856           buf[n] = c;
1857         }
1858         buf[n] = '\0';
1859
1860         // find startxref
1861         for (i = n - 9; i >= 0; --i) {
1862           if (!strncmp(&buf[i], "startxref", 9)) {
1863             break;
1864           }
1865         }
1866         if (i < 0) {
1867           startXRefPos = 0;
1868         } else {
1869           for (p = &buf[i + 9]; isspace(*p); ++p);
1870           startXRefPos = strToLongLong(p);
1871           break;
1872         }
1873       }
1874     }
1875
1876   }
1877
1878   return startXRefPos;
1879 }
1880
1881 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
1882 {
1883   Guint mainXRefEntriesOffset = 0;
1884
1885   if (isLinearized(tryingToReconstruct)) {
1886     mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
1887   }
1888
1889   return mainXRefEntriesOffset;
1890 }
1891
1892 int PDFDoc::getNumPages()
1893 {
1894   if (isLinearized()) {
1895     int n;
1896     if ((n = getLinearization()->getNumPages())) {
1897       return n;
1898     }
1899   }
1900
1901   return catalog->getNumPages();
1902 }
1903
1904 Page *PDFDoc::parsePage(int page)
1905 {
1906   Page *p = NULL;
1907   Object obj;
1908   Ref pageRef;
1909   Dict *pageDict;
1910
1911   pageRef.num = getHints()->getPageObjectNum(page);
1912   if (!pageRef.num) {
1913     error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
1914     return NULL;
1915   }
1916
1917   // check for bogus ref - this can happen in corrupted PDF files
1918   if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
1919     error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
1920     return NULL;
1921   }
1922
1923   pageRef.gen = xref->getEntry(pageRef.num)->gen;
1924   xref->fetch(pageRef.num, pageRef.gen, &obj);
1925   if (!obj.isDict("Page")) {
1926     obj.free();
1927     error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
1928     return NULL;
1929   }
1930   pageDict = obj.getDict();
1931
1932   p = new Page(this, page, pageDict, pageRef,
1933                new PageAttrs(NULL, pageDict), catalog->getForm());
1934   obj.free();
1935
1936   return p;
1937 }
1938
1939 Page *PDFDoc::getPage(int page)
1940 {
1941   if ((page < 1) || page > getNumPages()) return NULL;
1942
1943   if (isLinearized()) {
1944     pdfdocLocker();
1945     if (!pageCache) {
1946       pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
1947       for (int i = 0; i < getNumPages(); i++) {
1948         pageCache[i] = NULL;
1949       }
1950     }
1951     if (!pageCache[page-1]) {
1952       pageCache[page-1] = parsePage(page);
1953     }
1954     if (pageCache[page-1]) {
1955        return pageCache[page-1];
1956     } else {
1957        error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
1958     }
1959   }
1960
1961   return catalog->getPage(page);
1962 }