source/libs/poppler/poppler-src/poppler/PDFDoc.cc

   1 //========================================================================
   2 //
   3 // PDFDoc.cc
   4 //
   5 // Copyright 1996-2003 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 //========================================================================
  10 //
  11 // Modified under the Poppler project - http://poppler.freedesktop.org
  12 //
  13 // All changes made under the Poppler project to this file are licensed
  14 // under GPL version 2 or later
  15 //
  16 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
  17 // Copyright (C) 2005, 2007-2009, 2011-2016 Albert Astals Cid <aacid@kde.org>
  18 // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
  19 // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org>
  20 // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
  21 // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
  22 // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
  23 // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de>
  24 // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl>
  25 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
  26 // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
  27 // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
  28 // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com>
  29 // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de>
  30 // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
  31 // Copyright (C) 2013, 2014 Adrian Johnson <ajohnson@redneon.com>
  32 // Copyright (C) 2013 Adam Reichold <adamreichold@myopera.com>
  33 // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com>
  34 // Copyright (C) 2015 Li Junling <lijunling@sina.com>
  35 // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
  36 // Copyright (C) 2015 André Esser <bepandre@hotmail.com>
  37 // Copyright (C) 2016 Jakub Kucharski <jakubkucharski97@gmail.com>
  38 //
  39 // To see a description of the changes please see the Changelog file that
  40 // came with your tarball or type make ChangeLog if you are building from git
  41 //
  42 //========================================================================
  43
  44 #include <config.h>
  45
  46 #ifdef USE_GCC_PRAGMAS
  47 #pragma implementation
  48 #endif
  49
  50 #include <ctype.h>
  51 #include <locale.h>
  52 #include <stdio.h>
  53 #include <errno.h>
  54 #include <stdlib.h>
  55 #include <stddef.h>
  56 #include <string.h>
  57 #include <time.h>
  58 #include <sys/stat.h>
  59 #include "goo/gstrtod.h"
  60 #include "goo/GooString.h"
  61 #include "goo/gfile.h"
  62 #include "poppler-config.h"
  63 #include "GlobalParams.h"
  64 #include "Page.h"
  65 #include "Catalog.h"
  66 #include "Stream.h"
  67 #include "XRef.h"
  68 #include "Linearization.h"
  69 #include "Link.h"
  70 #include "OutputDev.h"
  71 #include "Error.h"
  72 #include "ErrorCodes.h"
  73 #include "Lexer.h"
  74 #include "Parser.h"
  75 #include "SecurityHandler.h"
  76 #include "Decrypt.h"
  77 #ifndef DISABLE_OUTLINE
  78 #include "Outline.h"
  79 #endif
  80 #include "PDFDoc.h"
  81 #include "Hints.h"
  82
  83 #if MULTITHREADED
  84 #  define pdfdocLocker()   MutexLocker locker(&mutex)
  85 #else
  86 #  define pdfdocLocker()
  87 #endif
  88
  89 //------------------------------------------------------------------------
  90
  91 #define headerSearchSize 1024   // read this many bytes at beginning of
  92                                 //   file to look for '%PDF'
  93 #define pdfIdLength 32   // PDF Document IDs (PermanentId, UpdateId) length
  94
  95 #define linearizationSearchSize 1024    // read this many bytes at beginning of
  96                                         // file to look for linearization
  97                                         // dictionary
  98
  99 #define xrefSearchSize 1024     // read this many bytes at end of file
 100                                 //   to look for 'startxref'
 101
 102 //------------------------------------------------------------------------
 103 // PDFDoc
 104 //------------------------------------------------------------------------
 105
 106 void PDFDoc::init()
 107 {
 108 #if MULTITHREADED
 109   gInitMutex(&mutex);
 110 #endif
 111   ok = gFalse;
 112   errCode = errNone;
 113   fileName = NULL;
 114   file = NULL;
 115   str = NULL;
 116   xref = NULL;
 117   linearization = NULL;
 118   catalog = NULL;
 119   hints = NULL;
 120 #ifndef DISABLE_OUTLINE
 121   outline = NULL;
 122 #endif
 123   startXRefPos = -1;
 124   secHdlr = NULL;
 125   pageCache = NULL;
 126 }
 127
 128 PDFDoc::PDFDoc()
 129 {
 130   init();
 131 }
 132
 133 PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
 134                GooString *userPassword, void *guiDataA) {
 135   Object obj;
 136 #ifdef _WIN32
 137   int n, i;
 138 #endif
 139
 140   init();
 141
 142   fileName = fileNameA;
 143   guiData = guiDataA;
 144 #ifdef _WIN32
 145   n = fileName->getLength();
 146   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 147   for (i = 0; i < n; ++i) {
 148     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 149   }
 150   fileNameU[n] = L'\0';
 151 #endif
 152
 153   // try to open file
 154   file = GooFile::open(fileName);
 155   if (file == NULL) {
 156     // fopen() has failed.
 157     // Keep a copy of the errno returned by fopen so that it can be
 158     // referred to later.
 159     fopenErrno = errno;
 160     error(errIO, -1, "Couldn't open file '{0:t}': {1:s}.", fileName, strerror(errno));
 161     errCode = errOpenFile;
 162     return;
 163   }
 164
 165   // create stream
 166   obj.initNull();
 167   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 168
 169   ok = setup(ownerPassword, userPassword);
 170 }
 171
 172 #ifdef _WIN32
 173 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GooString *ownerPassword,
 174                GooString *userPassword, void *guiDataA) {
 175   OSVERSIONINFO version;
 176   Object obj;
 177   int i;
 178
 179   init();
 180
 181   guiData = guiDataA;
 182
 183   // save both Unicode and 8-bit copies of the file name
 184   fileName = new GooString();
 185   fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t));
 186   for (i = 0; i < fileNameLen; ++i) {
 187     fileName->append((char)fileNameA[i]);
 188     fileNameU[i] = fileNameA[i];
 189   }
 190   fileNameU[fileNameLen] = L'\0';
 191
 192   // try to open file
 193   // NB: _wfopen is only available in NT
 194   version.dwOSVersionInfoSize = sizeof(version);
 195   GetVersionEx(&version);
 196   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
 197     file = GooFile::open(fileNameU);
 198   } else {
 199     file = GooFile::open(fileName);
 200   }
 201   if (!file) {
 202     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
 203     errCode = errOpenFile;
 204     return;
 205   }
 206
 207   // create stream
 208   obj.initNull();
 209   str = new FileStream(file, 0, gFalse, file->size(), &obj);
 210
 211   ok = setup(ownerPassword, userPassword);
 212 }
 213 #endif
 214
 215 PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword,
 216                GooString *userPassword, void *guiDataA) {
 217 #ifdef _WIN32
 218   int n, i;
 219 #endif
 220
 221   init();
 222   guiData = guiDataA;
 223   if (strA->getFileName()) {
 224     fileName = strA->getFileName()->copy();
 225 #ifdef _WIN32
 226     n = fileName->getLength();
 227     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
 228     for (i = 0; i < n; ++i) {
 229       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
 230     }
 231     fileNameU[n] = L'\0';
 232 #endif
 233   } else {
 234     fileName = NULL;
 235 #ifdef _WIN32
 236     fileNameU = NULL;
 237 #endif
 238   }
 239   str = strA;
 240   ok = setup(ownerPassword, userPassword);
 241 }
 242
 243 GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 244   pdfdocLocker();
 245   str->setPos(0, -1);
 246   if (str->getPos() < 0)
 247   {
 248     error(errSyntaxError, -1, "Document base stream is not seekable");
 249     return gFalse;
 250   }
 251
 252   str->reset();
 253
 254   // check footer
 255   // Adobe does not seem to enforce %%EOF, so we do the same
 256 //  if (!checkFooter()) return gFalse;
 257
 258   // check header
 259   checkHeader();
 260
 261   GBool wasReconstructed = false;
 262
 263   // read xref table
 264   xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed);
 265   if (!xref->isOk()) {
 266     if (wasReconstructed) {
 267       delete xref;
 268       startXRefPos = -1;
 269       xref = new XRef(str, getStartXRef(gTrue), getMainXRefEntriesOffset(gTrue), &wasReconstructed);
 270     }
 271     if (!xref->isOk()) {
 272       error(errSyntaxError, -1, "Couldn't read xref table");
 273       errCode = xref->getErrorCode();
 274       return gFalse;
 275     }
 276   }
 277
 278   // check for encryption
 279   if (!checkEncryption(ownerPassword, userPassword)) {
 280     errCode = errEncrypted;
 281     return gFalse;
 282   }
 283
 284   // read catalog
 285   catalog = new Catalog(this);
 286   if (catalog && !catalog->isOk()) {
 287     if (!wasReconstructed)
 288     {
 289       // try one more time to contruct the Catalog, maybe the problem is damaged XRef
 290       delete catalog;
 291       delete xref;
 292       xref = new XRef(str, 0, 0, NULL, true);
 293       catalog = new Catalog(this);
 294     }
 295
 296     if (catalog && !catalog->isOk()) {
 297       error(errSyntaxError, -1, "Couldn't read page catalog");
 298       errCode = errBadCatalog;
 299       return gFalse;
 300     }
 301   }
 302
 303   // done
 304   return gTrue;
 305 }
 306
 307 PDFDoc::~PDFDoc() {
 308   if (pageCache) {
 309     for (int i = 0; i < getNumPages(); i++) {
 310       if (pageCache[i]) {
 311         delete pageCache[i];
 312       }
 313     }
 314     gfree(pageCache);
 315   }
 316   delete secHdlr;
 317 #ifndef DISABLE_OUTLINE
 318   if (outline) {
 319     delete outline;
 320   }
 321 #endif
 322   if (catalog) {
 323     delete catalog;
 324   }
 325   if (xref) {
 326     delete xref;
 327   }
 328   if (hints) {
 329     delete hints;
 330   }
 331   if (linearization) {
 332     delete linearization;
 333   }
 334   if (str) {
 335     delete str;
 336   }
 337   if (file) {
 338     delete file;
 339   }
 340   if (fileName) {
 341     delete fileName;
 342   }
 343 #ifdef _WIN32
 344   if (fileNameU) {
 345     gfree(fileNameU);
 346   }
 347 #endif
 348 #if MULTITHREADED
 349   gDestroyMutex(&mutex);
 350 #endif
 351 }
 352
 353
 354 // Check for a %%EOF at the end of this stream
 355 GBool PDFDoc::checkFooter() {
 356   // we look in the last 1024 chars because Adobe does the same
 357   char *eof = new char[1025];
 358   Goffset pos = str->getPos();
 359   str->setPos(1024, -1);
 360   int i, ch;
 361   for (i = 0; i < 1024; i++)
 362   {
 363     ch = str->getChar();
 364     if (ch == EOF)
 365       break;
 366     eof[i] = ch;
 367   }
 368   eof[i] = '\0';
 369
 370   bool found = false;
 371   for (i = i - 5; i >= 0; i--) {
 372     if (strncmp (&eof[i], "%%EOF", 5) == 0) {
 373       found = true;
 374       break;
 375     }
 376   }
 377   if (!found)
 378   {
 379     error(errSyntaxError, -1, "Document has not the mandatory ending %%EOF");
 380     errCode = errDamaged;
 381     delete[] eof;
 382     return gFalse;
 383   }
 384   delete[] eof;
 385   str->setPos(pos);
 386   return gTrue;
 387 }
 388
 389 // Check for a PDF header on this stream.  Skip past some garbage
 390 // if necessary.
 391 void PDFDoc::checkHeader() {
 392   char hdrBuf[headerSearchSize+1];
 393   char *p;
 394   char *tokptr;
 395   int i;
 396
 397   pdfMajorVersion = 0;
 398   pdfMinorVersion = 0;
 399   for (i = 0; i < headerSearchSize; ++i) {
 400     hdrBuf[i] = str->getChar();
 401   }
 402   hdrBuf[headerSearchSize] = '\0';
 403   for (i = 0; i < headerSearchSize - 5; ++i) {
 404     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
 405       break;
 406     }
 407   }
 408   if (i >= headerSearchSize - 5) {
 409     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 410     return;
 411   }
 412   str->moveStart(i);
 413   if (!(p = strtok_r(&hdrBuf[i+5], " \t\n\r", &tokptr))) {
 414     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
 415     return;
 416   }
 417   sscanf(p, "%d.%d", &pdfMajorVersion, &pdfMinorVersion);
 418   // We don't do the version check. Don't add it back in.
 419 }
 420
 421 GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword) {
 422   Object encrypt;
 423   GBool encrypted;
 424   GBool ret;
 425
 426   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
 427   if ((encrypted = encrypt.isDict())) {
 428     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
 429       if (secHdlr->isUnencrypted()) {
 430         // no encryption
 431         ret = gTrue;
 432       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
 433         // authorization succeeded
 434         xref->setEncryption(secHdlr->getPermissionFlags(),
 435                             secHdlr->getOwnerPasswordOk(),
 436                             secHdlr->getFileKey(),
 437                             secHdlr->getFileKeyLength(),
 438                             secHdlr->getEncVersion(),
 439                             secHdlr->getEncRevision(),
 440                             secHdlr->getEncAlgorithm());
 441         ret = gTrue;
 442       } else {
 443         // authorization failed
 444         ret = gFalse;
 445       }
 446     } else {
 447       // couldn't find the matching security handler
 448       ret = gFalse;
 449     }
 450   } else {
 451     // document is not encrypted
 452     ret = gTrue;
 453   }
 454   encrypt.free();
 455   return ret;
 456 }
 457
 458 std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
 459 {
 460   int num_pages = getNumPages();
 461   FormPageWidgets *page_widgets = NULL;
 462   std::vector<FormWidgetSignature*> widget_vector;
 463
 464   for (int i = 1; i <= num_pages; i++) {
 465     Page *p = getCatalog()->getPage(i);
 466     if (p) {
 467       page_widgets = p->getFormWidgets();
 468       for (int j = 0; page_widgets != NULL && j < page_widgets->getNumWidgets(); j++) {
 469         if (page_widgets->getWidget(j)->getType() == formSignature) {
 470             widget_vector.push_back(static_cast<FormWidgetSignature*>(page_widgets->getWidget(j)));
 471         }
 472       }
 473       delete page_widgets;
 474     }
 475   }
 476   return widget_vector;
 477 }
 478
 479 void PDFDoc::displayPage(OutputDev *out, int page,
 480                          double hDPI, double vDPI, int rotate,
 481                          GBool useMediaBox, GBool crop, GBool printing,
 482                          GBool (*abortCheckCbk)(void *data),
 483                          void *abortCheckCbkData,
 484                          GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 485                          void *annotDisplayDecideCbkData, GBool copyXRef) {
 486   if (globalParams->getPrintCommands()) {
 487     printf("***** page %d *****\n", page);
 488   }
 489
 490   if (getPage(page))
 491     getPage(page)->display(out, hDPI, vDPI,
 492                                     rotate, useMediaBox, crop, printing,
 493                                     abortCheckCbk, abortCheckCbkData,
 494                                     annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 495
 496 }
 497
 498 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
 499                           double hDPI, double vDPI, int rotate,
 500                           GBool useMediaBox, GBool crop, GBool printing,
 501                           GBool (*abortCheckCbk)(void *data),
 502                           void *abortCheckCbkData,
 503                           GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 504                           void *annotDisplayDecideCbkData) {
 505   int page;
 506
 507   for (page = firstPage; page <= lastPage; ++page) {
 508     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
 509                 abortCheckCbk, abortCheckCbkData,
 510                 annotDisplayDecideCbk, annotDisplayDecideCbkData);
 511   }
 512 }
 513
 514 void PDFDoc::displayPageSlice(OutputDev *out, int page,
 515                               double hDPI, double vDPI, int rotate,
 516                               GBool useMediaBox, GBool crop, GBool printing,
 517                               int sliceX, int sliceY, int sliceW, int sliceH,
 518                               GBool (*abortCheckCbk)(void *data),
 519                               void *abortCheckCbkData,
 520                               GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data),
 521                               void *annotDisplayDecideCbkData, GBool copyXRef) {
 522   if (getPage(page))
 523     getPage(page)->displaySlice(out, hDPI, vDPI,
 524                                          rotate, useMediaBox, crop,
 525                                          sliceX, sliceY, sliceW, sliceH,
 526                                          printing,
 527                                          abortCheckCbk, abortCheckCbkData,
 528                                          annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef);
 529 }
 530
 531 Links *PDFDoc::getLinks(int page) {
 532   Page *p = getPage(page);
 533   if (!p) {
 534     return new Links (NULL);
 535   }
 536   return p->getLinks();
 537 }
 538
 539 void PDFDoc::processLinks(OutputDev *out, int page) {
 540   if (getPage(page))
 541     getPage(page)->processLinks(out);
 542 }
 543
 544 Linearization *PDFDoc::getLinearization()
 545 {
 546   if (!linearization) {
 547     linearization = new Linearization(str);
 548     linearizationState = 0;
 549   }
 550   return linearization;
 551 }
 552
 553 GBool PDFDoc::checkLinearization() {
 554   if (linearization == NULL)
 555     return gFalse;
 556   if (linearizationState == 1)
 557     return gTrue;
 558   if (linearizationState == 2)
 559     return gFalse;
 560   if (!hints) {
 561     hints = new Hints(str, linearization, getXRef(), secHdlr);
 562   }
 563   for (int page = 1; page <= linearization->getNumPages(); page++) {
 564     Object obj;
 565     Ref pageRef;
 566
 567     pageRef.num = hints->getPageObjectNum(page);
 568     if (!pageRef.num) {
 569       linearizationState = 2;
 570       return gFalse;
 571     }
 572
 573     // check for bogus ref - this can happen in corrupted PDF files
 574     if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
 575       linearizationState = 2;
 576       return gFalse;
 577     }
 578
 579     pageRef.gen = xref->getEntry(pageRef.num)->gen;
 580     xref->fetch(pageRef.num, pageRef.gen, &obj);
 581     if (!obj.isDict("Page")) {
 582       obj.free();
 583       linearizationState = 2;
 584       return gFalse;
 585     }
 586     obj.free();
 587   }
 588   linearizationState = 1;
 589   return gTrue;
 590 }
 591
 592 GBool PDFDoc::isLinearized(GBool tryingToReconstruct) {
 593   if ((str->getLength()) &&
 594       (getLinearization()->getLength() == str->getLength()))
 595     return gTrue;
 596   else {
 597     if (tryingToReconstruct)
 598       return getLinearization()->getLength() > 0;
 599     else
 600       return gFalse;
 601   }
 602 }
 603
 604 void PDFDoc::setDocInfoModified(Object *infoObj)
 605 {
 606   Object infoObjRef;
 607   getDocInfoNF(&infoObjRef);
 608   xref->setModifiedObject(infoObj, infoObjRef.getRef());
 609   infoObjRef.free();
 610 }
 611
 612 void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value)
 613 {
 614   GBool removeEntry = !value || value->getLength() == 0;
 615   if (removeEntry) {
 616     delete value;
 617   }
 618
 619   Object infoObj;
 620   getDocInfo(&infoObj);
 621
 622   if (infoObj.isNull() && removeEntry) {
 623     // No info dictionary, so no entry to remove.
 624     return;
 625   }
 626
 627   createDocInfoIfNoneExists(&infoObj);
 628
 629   Object gooStrObj;
 630   if (removeEntry) {
 631     gooStrObj.initNull();
 632   } else {
 633     gooStrObj.initString(value);
 634   }
 635
 636   // gooStrObj is set to value or null by now. The latter will cause a removal.
 637   infoObj.dictSet(key, &gooStrObj);
 638
 639   if (infoObj.dictGetLength() == 0) {
 640     // Info dictionary is empty. Remove it altogether.
 641     removeDocInfo();
 642   } else {
 643     setDocInfoModified(&infoObj);
 644   }
 645
 646   infoObj.free();
 647 }
 648
 649 GooString *PDFDoc::getDocInfoStringEntry(const char *key) {
 650   Object infoObj;
 651   getDocInfo(&infoObj);
 652   if (!infoObj.isDict()) {
 653       return NULL;
 654   }
 655
 656   Object entryObj;
 657   infoObj.dictLookup(key, &entryObj);
 658
 659   GooString *result;
 660
 661   if (entryObj.isString()) {
 662     result = entryObj.takeString();
 663   } else {
 664     result = NULL;
 665   }
 666
 667   entryObj.free();
 668   infoObj.free();
 669
 670   return result;
 671 }
 672
 673 static GBool
 674 get_id (GooString *encodedidstring, GooString *id) {
 675   const char *encodedid = encodedidstring->getCString();
 676   char pdfid[pdfIdLength + 1];
 677   int n;
 678
 679   if (encodedidstring->getLength() != pdfIdLength / 2)
 680     return gFalse;
 681
 682   n = sprintf(pdfid, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
 683               encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff,
 684               encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, encodedid[7] & 0xff,
 685               encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff,
 686               encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff);
 687   if (n != pdfIdLength)
 688     return gFalse;
 689
 690   id->Set(pdfid, pdfIdLength);
 691   return gTrue;
 692 }
 693
 694 GBool PDFDoc::getID(GooString *permanent_id, GooString *update_id) {
 695   Object obj;
 696   xref->getTrailerDict()->dictLookup ("ID", &obj);
 697
 698   if (obj.isArray() && obj.arrayGetLength() == 2) {
 699     Object obj2;
 700
 701     if (permanent_id) {
 702       if (obj.arrayGet(0, &obj2)->isString()) {
 703         if (!get_id (obj2.getString(), permanent_id)) {
 704           obj2.free();
 705           return gFalse;
 706         }
 707       } else {
 708         error(errSyntaxError, -1, "Invalid permanent ID");
 709         obj2.free();
 710         return gFalse;
 711       }
 712       obj2.free();
 713     }
 714
 715     if (update_id) {
 716       if (obj.arrayGet(1, &obj2)->isString()) {
 717         if (!get_id (obj2.getString(), update_id)) {
 718           obj2.free();
 719           return gFalse;
 720         }
 721       } else {
 722         error(errSyntaxError, -1, "Invalid update ID");
 723         obj2.free();
 724         return gFalse;
 725       }
 726       obj2.free();
 727     }
 728
 729     obj.free();
 730
 731     return gTrue;
 732   }
 733   obj.free();
 734
 735   return gFalse;
 736 }
 737
 738 Hints *PDFDoc::getHints()
 739 {
 740   if (!hints && isLinearized()) {
 741     hints = new Hints(str, getLinearization(), getXRef(), secHdlr);
 742   }
 743
 744   return hints;
 745 }
 746
 747 int PDFDoc::savePageAs(GooString *name, int pageNo)
 748 {
 749   FILE *f;
 750   OutStream *outStr;
 751   XRef *yRef, *countRef;
 752   int rootNum = getXRef()->getNumObjects() + 1;
 753
 754   // Make sure that special flags are set, because we are going to read
 755   // all objects, including Unencrypted ones.
 756   xref->scanSpecialFlags();
 757
 758   Guchar *fileKey;
 759   CryptAlgorithm encAlgorithm;
 760   int keyLength;
 761   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
 762
 763   if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(pageNo)) {
 764     error(errInternal, -1, "Illegal pageNo: {0:d}({1:d})", pageNo, getNumPages() );
 765     return errOpenFile;
 766   }
 767   PDFRectangle *cropBox = NULL;
 768   if (getCatalog()->getPage(pageNo)->isCropped()) {
 769     cropBox = getCatalog()->getPage(pageNo)->getCropBox();
 770   }
 771   replacePageDict(pageNo,
 772     getCatalog()->getPage(pageNo)->getRotate(),
 773     getCatalog()->getPage(pageNo)->getMediaBox(),
 774     cropBox);
 775   Ref *refPage = getCatalog()->getPageRef(pageNo);
 776   Object page;
 777   getXRef()->fetch(refPage->num, refPage->gen, &page);
 778
 779   if (!(f = fopen(name->getCString(), "wb"))) {
 780     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 781     return errOpenFile;
 782   }
 783   outStr = new FileOutStream(f,0);
 784
 785   yRef = new XRef(getXRef()->getTrailerDict());
 786
 787   if (secHdlr != NULL && !secHdlr->isUnencrypted()) {
 788     yRef->setEncryption(secHdlr->getPermissionFlags(),
 789       secHdlr->getOwnerPasswordOk(), fileKey, keyLength, secHdlr->getEncVersion(), secHdlr->getEncRevision(), encAlgorithm);
 790   }
 791   countRef = new XRef();
 792   Object *trailerObj = getXRef()->getTrailerDict();
 793   if (trailerObj->isDict()) {
 794     markPageObjects(trailerObj->getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 795   }
 796   yRef->add(0, 65535, 0, gFalse);
 797   writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
 798
 799   // get and mark info dict
 800   Object infoObj;
 801   getXRef()->getDocInfo(&infoObj);
 802   if (infoObj.isDict()) {
 803     Dict *infoDict = infoObj.getDict();
 804     markPageObjects(infoDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 805     if (trailerObj->isDict()) {
 806       Dict *trailerDict = trailerObj->getDict();
 807       Object ref;
 808       trailerDict->lookupNF("Info", &ref);
 809       if (ref.isRef()) {
 810         yRef->add(ref.getRef().num, ref.getRef().gen, 0, gTrue);
 811         if (getXRef()->getEntry(ref.getRef().num)->type == xrefEntryCompressed) {
 812           yRef->getEntry(ref.getRef().num)->type = xrefEntryCompressed;
 813         }
 814       }
 815       ref.free();
 816     }
 817   }
 818   infoObj.free();
 819
 820   // get and mark output intents etc.
 821   Object catObj, pagesObj, resourcesObj, annotsObj, afObj;
 822   getXRef()->getCatalog(&catObj);
 823   Dict *catDict = catObj.getDict();
 824   catDict->lookup("Pages", &pagesObj);
 825   catDict->lookupNF("AcroForm", &afObj);
 826   if (!afObj.isNull()) {
 827     markAcroForm(&afObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 828     afObj.free();
 829   }
 830   Dict *pagesDict = pagesObj.getDict();
 831   pagesDict->lookup("Resources", &resourcesObj);
 832   if (resourcesObj.isDict())
 833     markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 834   markPageObjects(catDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 835
 836   Dict *pageDict = page.getDict();
 837   if (resourcesObj.isNull() && !pageDict->hasKey("Resources")) {
 838     Dict *resourceDict = getCatalog()->getPage(pageNo)->getResourceDict();
 839     if (resourceDict != NULL) {
 840       resourcesObj.initDict(resourceDict);
 841       markPageObjects(resourcesObj.getDict(), yRef, countRef, 0, refPage->num, rootNum + 2);
 842     }
 843   }
 844   markPageObjects(pageDict, yRef, countRef, 0, refPage->num, rootNum + 2);
 845   pageDict->lookupNF("Annots", &annotsObj);
 846   if (!annotsObj.isNull()) {
 847     markAnnotations(&annotsObj, yRef, countRef, 0, refPage->num, rootNum + 2);
 848     annotsObj.free();
 849   }
 850   yRef->markUnencrypted();
 851   writePageObjects(outStr, yRef, 0);
 852
 853   yRef->add(rootNum,0,outStr->getPos(),gTrue);
 854   outStr->printf("%d 0 obj\n", rootNum);
 855   outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
 856   for (int j = 0; j < catDict->getLength(); j++) {
 857     const char *key = catDict->getKey(j);
 858     if (strcmp(key, "Type") != 0 &&
 859       strcmp(key, "Catalog") != 0 &&
 860       strcmp(key, "Pages") != 0)
 861     {
 862       if (j > 0) outStr->printf(" ");
 863       Object value; catDict->getValNF(j, &value);
 864       outStr->printf("/%s ", key);
 865       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 866       value.free();
 867     }
 868   }
 869   catObj.free();
 870   pagesObj.free();
 871   outStr->printf(">>\nendobj\n");
 872
 873   yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
 874   outStr->printf("%d 0 obj\n", rootNum + 1);
 875   outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 ", rootNum + 2);
 876   if (resourcesObj.isDict()) {
 877     outStr->printf("/Resources ");
 878     writeObject(&resourcesObj, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 879     resourcesObj.free();
 880   }
 881   outStr->printf(">>\n");
 882   outStr->printf("endobj\n");
 883
 884   yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
 885   outStr->printf("%d 0 obj\n", rootNum + 2);
 886   outStr->printf("<< ");
 887   for (int n = 0; n < pageDict->getLength(); n++) {
 888     if (n > 0) outStr->printf(" ");
 889     const char *key = pageDict->getKey(n);
 890     Object value; pageDict->getValNF(n, &value);
 891     if (strcmp(key, "Parent") == 0) {
 892       outStr->printf("/Parent %d 0 R", rootNum + 1);
 893     } else {
 894       outStr->printf("/%s ", key);
 895       writeObject(&value, outStr, getXRef(), 0, NULL, cryptRC4, 0, 0, 0);
 896     }
 897     value.free();
 898   }
 899   outStr->printf(" >>\nendobj\n");
 900   page.free();
 901
 902   Goffset uxrefOffset = outStr->getPos();
 903   Ref ref;
 904   ref.num = rootNum;
 905   ref.gen = 0;
 906   Dict *trailerDict = createTrailerDict(rootNum + 3, gFalse, 0, &ref, getXRef(),
 907                                         name->getCString(), uxrefOffset);
 908   writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
 909                         uxrefOffset, outStr, getXRef());
 910   delete trailerDict;
 911
 912   outStr->close();
 913   fclose(f);
 914   delete yRef;
 915   delete countRef;
 916   delete outStr;
 917
 918   return errNone;
 919 }
 920
 921 int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
 922   FILE *f;
 923   OutStream *outStr;
 924   int res;
 925
 926   if (!(f = fopen(name->getCString(), "wb"))) {
 927     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 928     return errOpenFile;
 929   }
 930   outStr = new FileOutStream(f,0);
 931   res = saveAs(outStr, mode);
 932   delete outStr;
 933   fclose(f);
 934   return res;
 935 }
 936
 937 int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) {
 938   if (!xref->isModified() && mode == writeStandard) {
 939     // simply copy the original file
 940     saveWithoutChangesAs (outStr);
 941   } else if (mode == writeForceRewrite) {
 942     saveCompleteRewrite(outStr);
 943   } else {
 944     saveIncrementalUpdate(outStr);
 945   }
 946
 947   return errNone;
 948 }
 949
 950 int PDFDoc::saveWithoutChangesAs(GooString *name) {
 951   FILE *f;
 952   OutStream *outStr;
 953   int res;
 954
 955   if (!(f = fopen(name->getCString(), "wb"))) {
 956     error(errIO, -1, "Couldn't open file '{0:t}'", name);
 957     return errOpenFile;
 958   }
 959
 960   outStr = new FileOutStream(f,0);
 961   res = saveWithoutChangesAs(outStr);
 962   delete outStr;
 963
 964   fclose(f);
 965
 966   return res;
 967 }
 968
 969 int PDFDoc::saveWithoutChangesAs(OutStream *outStr) {
 970   int c;
 971
 972   BaseStream *copyStr = str->copy();
 973   copyStr->reset();
 974   while ((c = copyStr->getChar()) != EOF) {
 975     outStr->put(c);
 976   }
 977   copyStr->close();
 978   delete copyStr;
 979
 980   return errNone;
 981 }
 982
 983 void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
 984 {
 985   XRef *uxref;
 986   int c;
 987   //copy the original file
 988   BaseStream *copyStr = str->copy();
 989   copyStr->reset();
 990   while ((c = copyStr->getChar()) != EOF) {
 991     outStr->put(c);
 992   }
 993   copyStr->close();
 994   delete copyStr;
 995
 996   Guchar *fileKey;
 997   CryptAlgorithm encAlgorithm;
 998   int keyLength;
 999   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1000
1001   uxref = new XRef();
1002   uxref->add(0, 65535, 0, gFalse);
1003   xref->lock();
1004   for(int i=0; i<xref->getNumObjects(); i++) {
1005     if ((xref->getEntry(i)->type == xrefEntryFree) &&
1006         (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects
1007       continue;
1008
1009     if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object
1010       Ref ref;
1011       ref.num = i;
1012       ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen;
1013       if (xref->getEntry(i)->type != xrefEntryFree) {
1014         Object obj1;
1015         xref->fetch(ref.num, ref.gen, &obj1, 1);
1016         Goffset offset = writeObjectHeader(&ref, outStr);
1017         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1018         writeObjectFooter(outStr);
1019         uxref->add(ref.num, ref.gen, offset, gTrue);
1020         obj1.free();
1021       } else {
1022         uxref->add(ref.num, ref.gen, 0, gFalse);
1023       }
1024     }
1025   }
1026   xref->unlock();
1027   if (uxref->getNumObjects() == 0) { //we have nothing to update
1028     delete uxref;
1029     return;
1030   }
1031
1032   Goffset uxrefOffset = outStr->getPos();
1033   int numobjects = xref->getNumObjects();
1034   const char *fileNameA = fileName ? fileName->getCString() : NULL;
1035   Ref rootRef, uxrefStreamRef;
1036   rootRef.num = getXRef()->getRootNum();
1037   rootRef.gen = getXRef()->getRootGen();
1038
1039   // Output a xref stream if there is a xref stream already
1040   GBool xRefStream = xref->isXRefStream();
1041
1042   if (xRefStream) {
1043     // Append an entry for the xref stream itself
1044     uxrefStreamRef.num = numobjects++;
1045     uxrefStreamRef.gen = 0;
1046     uxref->add(uxrefStreamRef.num, uxrefStreamRef.gen, uxrefOffset, gTrue);
1047   }
1048
1049   Dict *trailerDict = createTrailerDict(numobjects, gTrue, getStartXRef(), &rootRef, getXRef(), fileNameA, uxrefOffset);
1050   if (xRefStream) {
1051     writeXRefStreamTrailer(trailerDict, uxref, &uxrefStreamRef, uxrefOffset, outStr, getXRef());
1052   } else {
1053     writeXRefTableTrailer(trailerDict, uxref, gFalse, uxrefOffset, outStr, getXRef());
1054   }
1055
1056   delete trailerDict;
1057   delete uxref;
1058 }
1059
1060 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
1061 {
1062   // Make sure that special flags are set, because we are going to read
1063   // all objects, including Unencrypted ones.
1064   xref->scanSpecialFlags();
1065
1066   Guchar *fileKey;
1067   CryptAlgorithm encAlgorithm;
1068   int keyLength;
1069   xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1070
1071   outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
1072   XRef *uxref = new XRef();
1073   uxref->add(0, 65535, 0, gFalse);
1074   xref->lock();
1075   for(int i=0; i<xref->getNumObjects(); i++) {
1076     Object obj1;
1077     Ref ref;
1078     XRefEntryType type = xref->getEntry(i)->type;
1079     if (type == xrefEntryFree) {
1080       ref.num = i;
1081       ref.gen = xref->getEntry(i)->gen;
1082       /* the XRef class adds a lot of irrelevant free entries, we only want the significant one
1083           and we don't want the one with num=0 because it has already been added (gen = 65535)*/
1084       if (ref.gen > 0 && ref.num > 0)
1085         uxref->add(ref.num, ref.gen, 0, gFalse);
1086     } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) {
1087       // This entry must not be written, put a free entry instead (with incremented gen)
1088       ref.num = i;
1089       ref.gen = xref->getEntry(i)->gen + 1;
1090       uxref->add(ref.num, ref.gen, 0, gFalse);
1091     } else if (type == xrefEntryUncompressed){
1092       ref.num = i;
1093       ref.gen = xref->getEntry(i)->gen;
1094       xref->fetch(ref.num, ref.gen, &obj1, 1);
1095       Goffset offset = writeObjectHeader(&ref, outStr);
1096       // Write unencrypted objects in unencrypted form
1097       if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) {
1098         writeObject(&obj1, outStr, NULL, cryptRC4, 0, 0, 0);
1099       } else {
1100         writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1101       }
1102       writeObjectFooter(outStr);
1103       uxref->add(ref.num, ref.gen, offset, gTrue);
1104       obj1.free();
1105     } else if (type == xrefEntryCompressed) {
1106       ref.num = i;
1107       ref.gen = 0; //compressed entries have gen == 0
1108       xref->fetch(ref.num, ref.gen, &obj1, 1);
1109       Goffset offset = writeObjectHeader(&ref, outStr);
1110       writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1111       writeObjectFooter(outStr);
1112       uxref->add(ref.num, ref.gen, offset, gTrue);
1113       obj1.free();
1114     }
1115   }
1116   xref->unlock();
1117   Goffset uxrefOffset = outStr->getPos();
1118   writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */,
1119                         uxref->getNumObjects(), outStr, gFalse /* complete rewrite */);
1120   delete uxref;
1121 }
1122
1123 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1124                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1125 {
1126   Object obj1;
1127   outStr->printf("<<");
1128   for (int i=0; i<dict->getLength(); i++) {
1129     GooString keyName(dict->getKey(i));
1130     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
1131     outStr->printf("/%s ", keyNameToPrint->getCString());
1132     delete keyNameToPrint;
1133     writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1134     obj1.free();
1135   }
1136   outStr->printf(">> ");
1137 }
1138
1139 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
1140 {
1141   outStr->printf("stream\r\n");
1142   str->reset();
1143   for (int c=str->getChar(); c!= EOF; c=str->getChar()) {
1144     outStr->printf("%c", c);
1145   }
1146   outStr->printf("\r\nendstream\r\n");
1147 }
1148
1149 void PDFDoc::writeRawStream (Stream* str, OutStream* outStr)
1150 {
1151   Object obj1;
1152   str->getDict()->lookup("Length", &obj1);
1153   if (!obj1.isInt() && !obj1.isInt64()) {
1154     error (errSyntaxError, -1, "PDFDoc::writeRawStream, no Length in stream dict");
1155     return;
1156   }
1157
1158   Goffset length;
1159   if (obj1.isInt())
1160     length = obj1.getInt();
1161   else
1162     length = obj1.getInt64();
1163   obj1.free();
1164
1165   outStr->printf("stream\r\n");
1166   str->unfilteredReset();
1167   for (Goffset i = 0; i < length; i++) {
1168     int c = str->getUnfilteredChar();
1169     if (unlikely(c == EOF)) {
1170       error (errSyntaxError, -1, "PDFDoc::writeRawStream: EOF reading stream");
1171       break;
1172     }
1173     outStr->printf("%c", c);
1174   }
1175   str->reset();
1176   outStr->printf("\r\nendstream\r\n");
1177 }
1178
1179 void PDFDoc::writeString (GooString* s, OutStream* outStr, Guchar *fileKey,
1180                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1181 {
1182   // Encrypt string if encryption is enabled
1183   GooString *sEnc = NULL;
1184   if (fileKey) {
1185     Object obj;
1186     EncryptStream *enc = new EncryptStream(new MemStream(s->getCString(), 0, s->getLength(), obj.initNull()),
1187                                            fileKey, encAlgorithm, keyLength, objNum, objGen);
1188     sEnc = new GooString();
1189     int c;
1190     enc->reset();
1191     while ((c = enc->getChar()) != EOF) {
1192       sEnc->append((char)c);
1193     }
1194
1195     delete enc;
1196     s = sEnc;
1197   }
1198
1199   // Write data
1200   if (s->hasUnicodeMarker()) {
1201     //unicode string don't necessary end with \0
1202     const char* c = s->getCString();
1203     outStr->printf("(");
1204     for(int i=0; i<s->getLength(); i++) {
1205       char unescaped = *(c+i)&0x000000ff;
1206       //escape if needed
1207       if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
1208         outStr->printf("%c", '\\');
1209       outStr->printf("%c", unescaped);
1210     }
1211     outStr->printf(") ");
1212   } else {
1213     const char* c = s->getCString();
1214     outStr->printf("(");
1215     for(int i=0; i<s->getLength(); i++) {
1216       char unescaped = *(c+i)&0x000000ff;
1217       //escape if needed
1218       if (unescaped == '\r')
1219         outStr->printf("\\r");
1220       else if (unescaped == '\n')
1221         outStr->printf("\\n");
1222       else {
1223         if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
1224           outStr->printf("%c", '\\');
1225         }
1226         outStr->printf("%c", unescaped);
1227       }
1228     }
1229     outStr->printf(") ");
1230   }
1231
1232   delete sEnc;
1233 }
1234
1235 Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
1236 {
1237   Goffset offset = outStr->getPos();
1238   outStr->printf("%i %i obj ", ref->num, ref->gen);
1239   return offset;
1240 }
1241
1242 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
1243                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
1244 {
1245   Array *array;
1246   Object obj1;
1247   Goffset tmp;
1248
1249   switch (obj->getType()) {
1250     case objBool:
1251       outStr->printf("%s ", obj->getBool()?"true":"false");
1252       break;
1253     case objInt:
1254       outStr->printf("%i ", obj->getInt());
1255       break;
1256     case objInt64:
1257       outStr->printf("%lli ", obj->getInt64());
1258       break;
1259     case objReal:
1260     {
1261       GooString s;
1262       s.appendf("{0:.10g}", obj->getReal());
1263       outStr->printf("%s ", s.getCString());
1264       break;
1265     }
1266     case objString:
1267       writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen);
1268       break;
1269     case objName:
1270     {
1271       GooString name(obj->getName());
1272       GooString *nameToPrint = name.sanitizedName(gFalse /* non ps mode */);
1273       outStr->printf("/%s ", nameToPrint->getCString());
1274       delete nameToPrint;
1275       break;
1276     }
1277     case objNull:
1278       outStr->printf( "null ");
1279       break;
1280     case objArray:
1281       array = obj->getArray();
1282       outStr->printf("[");
1283       for (int i=0; i<array->getLength(); i++) {
1284         writeObject(array->getNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1285         obj1.free();
1286       }
1287       outStr->printf("] ");
1288       break;
1289     case objDict:
1290       writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1291       break;
1292     case objStream:
1293       {
1294         //We can't modify stream with the current implementation (no write functions in Stream API)
1295         // => the only type of streams which that have been modified are internal streams (=strWeird)
1296         Stream *stream = obj->getStream();
1297         if (stream->getKind() == strWeird || stream->getKind() == strCrypt) {
1298           //we write the stream unencoded => TODO: write stream encoder
1299
1300           // Encrypt stream
1301           EncryptStream *encStream = NULL;
1302           GBool removeFilter = gTrue;
1303           if (stream->getKind() == strWeird && fileKey) {
1304             Object filter;
1305             stream->getDict()->lookup("Filter", &filter);
1306             if (!filter.isName("Crypt")) {
1307               if (filter.isArray()) {
1308                 for (int i = 0; i < filter.arrayGetLength(); i++) {
1309                   Object filterEle;
1310                   filter.arrayGet(i, &filterEle);
1311                   if (filterEle.isName("Crypt")) {
1312                     filterEle.free();
1313                     removeFilter = gFalse;
1314                     break;
1315                   }
1316                   filterEle.free();
1317                 }
1318                 if (removeFilter) {
1319                   encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1320                   encStream->setAutoDelete(gFalse);
1321                   stream = encStream;
1322                 }
1323               } else {
1324                 encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1325                 encStream->setAutoDelete(gFalse);
1326                 stream = encStream;
1327               }
1328             } else {
1329               removeFilter = gFalse;
1330             }
1331             filter.free();
1332           } else if (fileKey != NULL) { // Encrypt stream
1333             encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
1334             encStream->setAutoDelete(gFalse);
1335             stream = encStream;
1336           }
1337
1338           stream->reset();
1339           //recalculate stream length
1340           tmp = 0;
1341           for (int c=stream->getChar(); c!=EOF; c=stream->getChar()) {
1342             tmp++;
1343           }
1344           obj1.initInt64(tmp);
1345           stream->getDict()->set("Length", &obj1);
1346
1347           //Remove Stream encoding
1348           if (removeFilter) {
1349             stream->getDict()->remove("Filter");
1350           }
1351           stream->getDict()->remove("DecodeParms");
1352
1353           writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1354           writeStream (stream,outStr);
1355           delete encStream;
1356           obj1.free();
1357         } else {
1358           //raw stream copy
1359           FilterStream *fs = dynamic_cast<FilterStream*>(stream);
1360           if (fs) {
1361             BaseStream *bs = fs->getBaseStream();
1362             if (bs) {
1363               Goffset streamEnd;
1364                 if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
1365                   Object val;
1366                   val.initInt64(streamEnd - bs->getStart());
1367                   stream->getDict()->set("Length", &val);
1368                 }
1369               }
1370           }
1371           writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
1372           writeRawStream (stream, outStr);
1373         }
1374         break;
1375       }
1376     case objRef:
1377       outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
1378       break;
1379     case objCmd:
1380       outStr->printf("%s\n", obj->getCmd());
1381       break;
1382     case objError:
1383       outStr->printf("error\r\n");
1384       break;
1385     case objEOF:
1386       outStr->printf("eof\r\n");
1387       break;
1388     case objNone:
1389       outStr->printf("none\r\n");
1390       break;
1391     default:
1392       error(errUnimplemented, -1,"Unhandled objType : {0:d}, please report a bug with a testcase\r\n", obj->getType());
1393       break;
1394   }
1395 }
1396
1397 void PDFDoc::writeObjectFooter (OutStream* outStr)
1398 {
1399   outStr->printf("endobj\r\n");
1400 }
1401
1402 Dict *PDFDoc::createTrailerDict(int uxrefSize, GBool incrUpdate, Goffset startxRef,
1403                                 Ref *root, XRef *xRef, const char *fileName, Goffset fileSize)
1404 {
1405   Dict *trailerDict = new Dict(xRef);
1406   Object obj1;
1407   obj1.initInt(uxrefSize);
1408   trailerDict->set("Size", &obj1);
1409   obj1.free();
1410
1411   //build a new ID, as recommended in the reference, uses:
1412   // - current time
1413   // - file name
1414   // - file size
1415   // - values of entry in information dictionnary
1416   GooString message;
1417   char buffer[256];
1418   sprintf(buffer, "%i", (int)time(NULL));
1419   message.append(buffer);
1420
1421   if (fileName)
1422     message.append(fileName);
1423
1424   sprintf(buffer, "%lli", (long long)fileSize);
1425   message.append(buffer);
1426
1427   //info dict -- only use text string
1428   if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
1429     for(int i=0; i<obj1.getDict()->getLength(); i++) {
1430       Object obj2;
1431       obj1.getDict()->getVal(i, &obj2);
1432       if (obj2.isString()) {
1433         message.append(obj2.getString());
1434       }
1435       obj2.free();
1436     }
1437   }
1438   obj1.free();
1439
1440   GBool hasEncrypt = gFalse;
1441   if (!xRef->getTrailerDict()->isNone()) {
1442     Object obj2;
1443     xRef->getTrailerDict()->dictLookupNF("Encrypt", &obj2);
1444     if (!obj2.isNull()) {
1445       trailerDict->set("Encrypt", &obj2);
1446       hasEncrypt = gTrue;
1447       obj2.free();
1448     }
1449   }
1450
1451   //calculate md5 digest
1452   Guchar digest[16];
1453   md5((Guchar*)message.getCString(), message.getLength(), digest);
1454   obj1.initString(new GooString((const char*)digest, 16));
1455
1456   //create ID array
1457   Object obj2,obj3,obj5;
1458   obj2.initArray(xRef);
1459
1460   // In case of encrypted files, the ID must not be changed because it's used to calculate the key
1461   if (incrUpdate || hasEncrypt) {
1462     Object obj4;
1463     //only update the second part of the array
1464     xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
1465     if (!obj4.isArray()) {
1466       error(errSyntaxWarning, -1, "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue");
1467     } else {
1468       //Get the first part of the ID
1469       obj4.arrayGet(0,&obj3);
1470
1471       obj2.arrayAdd(&obj3);
1472       obj2.arrayAdd(&obj1);
1473       trailerDict->set("ID", &obj2);
1474     }
1475     obj4.free();
1476   } else {
1477     //new file => same values for the two identifiers
1478     obj2.arrayAdd(&obj1);
1479     obj1.initString(new GooString((const char*)digest, 16));
1480     obj2.arrayAdd(&obj1);
1481     trailerDict->set("ID", &obj2);
1482   }
1483
1484   obj1.initRef(root->num, root->gen);
1485   trailerDict->set("Root", &obj1);
1486
1487   if (incrUpdate) {
1488     obj1.initInt64(startxRef);
1489     trailerDict->set("Prev", &obj1);
1490   }
1491
1492   if (!xRef->getTrailerDict()->isNone()) {
1493     xRef->getDocInfoNF(&obj5);
1494     if (!obj5.isNull()) {
1495       trailerDict->set("Info", &obj5);
1496     }
1497   }
1498
1499   return trailerDict;
1500 }
1501
1502 void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAllEntries, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1503 {
1504   uxref->writeTableToFile( outStr, writeAllEntries );
1505   outStr->printf( "trailer\r\n");
1506   writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1507   outStr->printf( "\r\nstartxref\r\n");
1508   outStr->printf( "%lli\r\n", uxrefOffset);
1509   outStr->printf( "%%%%EOF\r\n");
1510 }
1511
1512 void PDFDoc::writeXRefStreamTrailer (Dict *trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream* outStr, XRef *xRef)
1513 {
1514   GooString stmData;
1515
1516   // Fill stmData and some trailerDict fields
1517   uxref->writeStreamToBuffer(&stmData, trailerDict, xRef);
1518
1519   // Create XRef stream object and write it
1520   Object obj1;
1521   MemStream *mStream = new MemStream( stmData.getCString(), 0,
1522                                       stmData.getLength(), obj1.initDict(trailerDict) );
1523   writeObjectHeader(uxrefStreamRef, outStr);
1524   writeObject(obj1.initStream(mStream), outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
1525   writeObjectFooter(outStr);
1526   obj1.free();
1527
1528   outStr->printf( "startxref\r\n");
1529   outStr->printf( "%lli\r\n", uxrefOffset);
1530   outStr->printf( "%%%%EOF\r\n");
1531 }
1532
1533 void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,
1534                                    int uxrefSize, OutStream* outStr, GBool incrUpdate)
1535 {
1536   const char *fileNameA = fileName ? fileName->getCString() : NULL;
1537   // file size (doesn't include the trailer)
1538   unsigned int fileSize = 0;
1539   int c;
1540   str->reset();
1541   while ((c = str->getChar()) != EOF) {
1542     fileSize++;
1543   }
1544   str->close();
1545   Ref ref;
1546   ref.num = getXRef()->getRootNum();
1547   ref.gen = getXRef()->getRootGen();
1548   Dict * trailerDict = createTrailerDict(uxrefSize, incrUpdate, getStartXRef(), &ref,
1549                                          getXRef(), fileNameA, fileSize);
1550   writeXRefTableTrailer(trailerDict, uxref, writeAllEntries, uxrefOffset, outStr, getXRef());
1551   delete trailerDict;
1552 }
1553
1554 void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
1555 {
1556    outStr->printf("%%PDF-%d.%d\n", major, minor);
1557    outStr->printf("%%\xE2\xE3\xCF\xD3\n");
1558 }
1559
1560 void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1561 {
1562   Object obj1;
1563   for (int i=0; i<dict->getLength(); i++) {
1564     const char *key = dict->getKey(i);
1565     if (strcmp(key, "Annots") != 0) {
1566       markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1567     } else {
1568       Object annotsObj;
1569       dict->getValNF(i, &annotsObj);
1570       if (!annotsObj.isNull()) {
1571         markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
1572         annotsObj.free();
1573       }
1574     }
1575     obj1.free();
1576   }
1577 }
1578
1579 void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1580 {
1581   Array *array;
1582   Object obj1;
1583
1584   switch (obj->getType()) {
1585     case objArray:
1586       array = obj->getArray();
1587       for (int i=0; i<array->getLength(); i++) {
1588         markObject(array->getNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
1589         obj1.free();
1590       }
1591       break;
1592     case objDict:
1593       markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1594       break;
1595     case objStream:
1596       {
1597         Stream *stream = obj->getStream();
1598         markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
1599       }
1600       break;
1601     case objRef:
1602       {
1603         if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
1604           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
1605             return;  // already marked as free => should be replaced
1606           }
1607           xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
1608           if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
1609             xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
1610           }
1611         }
1612         if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1613             countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
1614         {
1615           countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
1616         } else {
1617           XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
1618           entry->gen++;
1619           if (entry->gen > 9)
1620             break;
1621         }
1622         Object obj1;
1623         getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
1624         markObject(&obj1, xRef, countRef, numOffset, oldRefNum, newRefNum);
1625         obj1.free();
1626       }
1627       break;
1628     default:
1629       break;
1630   }
1631 }
1632
1633 void PDFDoc::replacePageDict(int pageNo, int rotate,
1634                              PDFRectangle *mediaBox,
1635                              PDFRectangle *cropBox)
1636 {
1637   Ref *refPage = getCatalog()->getPageRef(pageNo);
1638   Object page;
1639   getXRef()->fetch(refPage->num, refPage->gen, &page);
1640   Dict *pageDict = page.getDict();
1641   pageDict->remove("MediaBoxssdf");
1642   pageDict->remove("MediaBox");
1643   pageDict->remove("CropBox");
1644   pageDict->remove("ArtBox");
1645   pageDict->remove("BleedBox");
1646   pageDict->remove("TrimBox");
1647   pageDict->remove("Rotate");
1648   Object mediaBoxObj;
1649   mediaBoxObj.initArray(getXRef());
1650   Object murx;
1651   murx.initReal(mediaBox->x1);
1652   Object mury;
1653   mury.initReal(mediaBox->y1);
1654   Object mllx;
1655   mllx.initReal(mediaBox->x2);
1656   Object mlly;
1657   mlly.initReal(mediaBox->y2);
1658   mediaBoxObj.arrayAdd(&murx);
1659   mediaBoxObj.arrayAdd(&mury);
1660   mediaBoxObj.arrayAdd(&mllx);
1661   mediaBoxObj.arrayAdd(&mlly);
1662   pageDict->add(copyString("MediaBox"), &mediaBoxObj);
1663   if (cropBox != NULL) {
1664     Object cropBoxObj;
1665     cropBoxObj.initArray(getXRef());
1666     Object curx;
1667     curx.initReal(cropBox->x1);
1668     Object cury;
1669     cury.initReal(cropBox->y1);
1670     Object cllx;
1671     cllx.initReal(cropBox->x2);
1672     Object clly;
1673     clly.initReal(cropBox->y2);
1674     cropBoxObj.arrayAdd(&curx);
1675     cropBoxObj.arrayAdd(&cury);
1676     cropBoxObj.arrayAdd(&cllx);
1677     cropBoxObj.arrayAdd(&clly);
1678     pageDict->add(copyString("CropBox"), &cropBoxObj);
1679     cropBoxObj.getArray()->incRef();
1680     pageDict->add(copyString("TrimBox"), &cropBoxObj);
1681   } else {
1682     mediaBoxObj.getArray()->incRef();
1683     pageDict->add(copyString("TrimBox"), &mediaBoxObj);
1684   }
1685   Object rotateObj;
1686   rotateObj.initInt(rotate);
1687   pageDict->add(copyString("Rotate"), &rotateObj);
1688   getXRef()->setModifiedObject(&page, *refPage);
1689   page.free();
1690 }
1691
1692 void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
1693 {
1694   pageDict->remove("OpenAction");
1695   pageDict->remove("Outlines");
1696   pageDict->remove("StructTreeRoot");
1697
1698   for (int n = 0; n < pageDict->getLength(); n++) {
1699     const char *key = pageDict->getKey(n);
1700     Object value; pageDict->getValNF(n, &value);
1701     if (strcmp(key, "Parent") != 0 &&
1702               strcmp(key, "Pages") != 0 &&
1703               strcmp(key, "AcroForm") != 0 &&
1704               strcmp(key, "Annots") != 0 &&
1705               strcmp(key, "P") != 0 &&
1706         strcmp(key, "Root") != 0) {
1707       markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
1708     }
1709     value.free();
1710   }
1711 }
1712
1713 GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
1714   Object annots;
1715   GBool modified = gFalse;
1716   annotsObj->fetch(getXRef(), &annots);
1717   if (annots.isArray()) {
1718       Array *array = annots.getArray();
1719       for (int i=array->getLength() - 1; i >= 0; i--) {
1720         Object obj1;
1721         if (array->get(i, &obj1)->isDict()) {
1722           Object type;
1723           Dict *dict = obj1.getDict();
1724           dict->lookup("Type", &type);
1725           if (type.isName() && strcmp(type.getName(), "Annot") == 0) {
1726             Object obj2;
1727             if (dict->lookupNF("P", &obj2)->isRef()) {
1728               if (obj2.getRef().num == oldPageNum) {
1729                 Object obj3;
1730                 array->getNF(i, &obj3);
1731                 if (obj3.isRef()) {
1732                   Object *newRef = new Object();
1733                   newRef->initRef(newPageNum, 0);
1734                   dict->set("P", newRef);
1735                   getXRef()->setModifiedObject(&obj1, obj3.getRef());
1736                 }
1737                 obj3.free();
1738               } else if (obj2.getRef().num == newPageNum) {
1739                 obj1.free();
1740                 obj2.free();
1741                 type.free();
1742                 continue;
1743               } else {
1744                 Object page;
1745                 getXRef()->fetch(obj2.getRef().num, obj2.getRef().gen, &page);
1746                 if (page.isDict()) {
1747                   Object pagetype;
1748                   Dict *dict = page.getDict();
1749                   dict->lookup("Type", &pagetype);
1750                   if (!pagetype.isName() || strcmp(pagetype.getName(), "Page") != 0) {
1751                     obj1.free();
1752                     obj2.free();
1753                     type.free();
1754                     page.free();
1755                     pagetype.free();
1756                     continue;
1757                   }
1758                   pagetype.free();
1759                 }
1760                 page.free();
1761                 obj1.free();
1762                 obj2.free();
1763                 type.free();
1764                 array->remove(i);
1765                 modified = gTrue;
1766                 continue;
1767               }
1768             }
1769             obj2.free();
1770           }
1771           type.free();
1772           markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
1773         }
1774         obj1.free();
1775         array->getNF(i, &obj1);
1776         if (obj1.isRef()) {
1777           if (obj1.getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree) {
1778             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryFree) {
1779               continue;  // already marked as free => should be replaced
1780             }
1781             xRef->add(obj1.getRef().num + numOffset, obj1.getRef().gen, 0, gTrue);
1782             if (getXRef()->getEntry(obj1.getRef().num)->type == xrefEntryCompressed) {
1783               xRef->getEntry(obj1.getRef().num + numOffset)->type = xrefEntryCompressed;
1784             }
1785           }
1786           if (obj1.getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1787               countRef->getEntry(obj1.getRef().num + numOffset)->type == xrefEntryFree)
1788           {
1789             countRef->add(obj1.getRef().num + numOffset, 1, 0, gTrue);
1790           } else {
1791             XRefEntry *entry = countRef->getEntry(obj1.getRef().num + numOffset);
1792             entry->gen++;
1793           }
1794         }
1795         obj1.free();
1796       }
1797   }
1798   if (annotsObj->isRef()) {
1799     if (annotsObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree) {
1800       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryFree) {
1801         return modified;  // already marked as free => should be replaced
1802       }
1803       xRef->add(annotsObj->getRef().num + numOffset, annotsObj->getRef().gen, 0, gTrue);
1804       if (getXRef()->getEntry(annotsObj->getRef().num)->type == xrefEntryCompressed) {
1805         xRef->getEntry(annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1806       }
1807     }
1808     if (annotsObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1809         countRef->getEntry(annotsObj->getRef().num + numOffset)->type == xrefEntryFree)
1810     {
1811       countRef->add(annotsObj->getRef().num + numOffset, 1, 0, gTrue);
1812     } else {
1813       XRefEntry *entry = countRef->getEntry(annotsObj->getRef().num + numOffset);
1814       entry->gen++;
1815     }
1816     getXRef()->setModifiedObject(&annots, annotsObj->getRef());
1817   }
1818   annots.free();
1819   return modified;
1820 }
1821
1822 void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) {
1823   Object acroform;
1824   GBool modified = gFalse;
1825   afObj->fetch(getXRef(), &acroform);
1826   if (acroform.isDict()) {
1827       Dict *dict = acroform.getDict();
1828       for (int i=0; i < dict->getLength(); i++) {
1829         if (strcmp(dict->getKey(i), "Fields") == 0) {
1830           Object fields;
1831           modified = markAnnotations(dict->getValNF(i, &fields), xRef, countRef, numOffset, oldRefNum, newRefNum);
1832           fields.free();
1833         } else {
1834           Object obj;
1835           markObject(dict->getValNF(i, &obj), xRef, countRef, numOffset, oldRefNum, newRefNum);
1836           obj.free();
1837         }
1838       }
1839   }
1840   if (afObj->isRef()) {
1841     if (afObj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree) {
1842       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryFree) {
1843         return;  // already marked as free => should be replaced
1844       }
1845       xRef->add(afObj->getRef().num + numOffset, afObj->getRef().gen, 0, gTrue);
1846       if (getXRef()->getEntry(afObj->getRef().num)->type == xrefEntryCompressed) {
1847         xRef->getEntry(afObj->getRef().num + numOffset)->type = xrefEntryCompressed;
1848       }
1849     }
1850     if (afObj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
1851         countRef->getEntry(afObj->getRef().num + numOffset)->type == xrefEntryFree)
1852     {
1853       countRef->add(afObj->getRef().num + numOffset, 1, 0, gTrue);
1854     } else {
1855       XRefEntry *entry = countRef->getEntry(afObj->getRef().num + numOffset);
1856       entry->gen++;
1857     }
1858     if (modified){
1859       getXRef()->setModifiedObject(&acroform, afObj->getRef());
1860     }
1861   }
1862   acroform.free();
1863   return;
1864 }
1865
1866 Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine)
1867 {
1868   Guint objectsCount = 0; //count the number of objects in the XRef(s)
1869   Guchar *fileKey;
1870   CryptAlgorithm encAlgorithm;
1871   int keyLength;
1872   xRef->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength);
1873
1874   for (int n = numOffset; n < xRef->getNumObjects(); n++) {
1875     if (xRef->getEntry(n)->type != xrefEntryFree) {
1876       Object obj;
1877       Ref ref;
1878       ref.num = n;
1879       ref.gen = xRef->getEntry(n)->gen;
1880       objectsCount++;
1881       getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
1882       Goffset offset = writeObjectHeader(&ref, outStr);
1883       if (combine) {
1884         writeObject(&obj, outStr, getXRef(), numOffset, NULL, cryptRC4, 0, 0, 0);
1885       } else if (xRef->getEntry(n)->getFlag(XRefEntry::Unencrypted)) {
1886         writeObject(&obj, outStr, NULL, cryptRC4, 0, 0, 0);
1887       } else {
1888         writeObject(&obj, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen);
1889       }
1890       writeObjectFooter(outStr);
1891       xRef->add(ref.num, ref.gen, offset, gTrue);
1892       obj.free();
1893     }
1894   }
1895   return objectsCount;
1896 }
1897
1898 #ifndef DISABLE_OUTLINE
1899 Outline *PDFDoc::getOutline()
1900 {
1901   if (!outline) {
1902     pdfdocLocker();
1903     // read outline
1904     outline = new Outline(catalog->getOutline(), xref);
1905   }
1906
1907   return outline;
1908 }
1909 #endif
1910
1911 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
1912 {
1913   PDFDoc *doc = new PDFDoc();
1914   doc->errCode = errorCode;
1915   doc->fileName = fileNameA;
1916
1917   return doc;
1918 }
1919
1920 long long PDFDoc::strToLongLong(char *s) {
1921   long long x, d;
1922   char *p;
1923
1924   x = 0;
1925   for (p = s; *p && isdigit(*p & 0xff); ++p) {
1926     d = *p - '0';
1927     if (x > (LLONG_MAX - d) / 10) {
1928       break;
1929     }
1930     x = 10 * x + d;
1931   }
1932   return x;
1933 }
1934
1935 // Read the 'startxref' position.
1936 Goffset PDFDoc::getStartXRef(GBool tryingToReconstruct)
1937 {
1938   if (startXRefPos == -1) {
1939
1940     if (isLinearized(tryingToReconstruct)) {
1941       char buf[linearizationSearchSize+1];
1942       int c, n, i;
1943
1944       str->setPos(0);
1945       for (n = 0; n < linearizationSearchSize; ++n) {
1946         if ((c = str->getChar()) == EOF) {
1947           break;
1948         }
1949         buf[n] = c;
1950       }
1951       buf[n] = '\0';
1952
1953       // find end of first obj (linearization dictionary)
1954       startXRefPos = 0;
1955       for (i = 0; i < n; i++) {
1956         if (!strncmp("endobj", &buf[i], 6)) {
1957           i += 6;
1958           //skip whitespace
1959           while (buf[i] && Lexer::isSpace(buf[i])) ++i;
1960           startXRefPos = i;
1961           break;
1962         }
1963       }
1964     } else {
1965       char buf[xrefSearchSize+1];
1966       char *p;
1967       int c, n, i;
1968
1969       // read last xrefSearchSize bytes
1970       int segnum = 0;
1971       int maxXRefSearch = 24576;
1972       if (str->getLength() < maxXRefSearch) maxXRefSearch = str->getLength();
1973       for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) {
1974         str->setPos((xrefSearchSize - 16) * segnum + xrefSearchSize, -1);
1975         for (n = 0; n < xrefSearchSize; ++n) {
1976           if ((c = str->getChar()) == EOF) {
1977             break;
1978           }
1979           buf[n] = c;
1980         }
1981         buf[n] = '\0';
1982
1983         // find startxref
1984         for (i = n - 9; i >= 0; --i) {
1985           if (!strncmp(&buf[i], "startxref", 9)) {
1986             break;
1987           }
1988         }
1989         if (i < 0) {
1990           startXRefPos = 0;
1991         } else {
1992           for (p = &buf[i + 9]; isspace(*p); ++p);
1993           startXRefPos = strToLongLong(p);
1994           break;
1995         }
1996       }
1997     }
1998
1999   }
2000
2001   return startXRefPos;
2002 }
2003
2004 Goffset PDFDoc::getMainXRefEntriesOffset(GBool tryingToReconstruct)
2005 {
2006   Guint mainXRefEntriesOffset = 0;
2007
2008   if (isLinearized(tryingToReconstruct)) {
2009     mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset();
2010   }
2011
2012   return mainXRefEntriesOffset;
2013 }
2014
2015 int PDFDoc::getNumPages()
2016 {
2017   if (isLinearized()) {
2018     int n;
2019     if ((n = getLinearization()->getNumPages())) {
2020       return n;
2021     }
2022   }
2023
2024   return catalog->getNumPages();
2025 }
2026
2027 Page *PDFDoc::parsePage(int page)
2028 {
2029   Page *p = NULL;
2030   Object obj;
2031   Ref pageRef;
2032   Dict *pageDict;
2033
2034   pageRef.num = getHints()->getPageObjectNum(page);
2035   if (!pageRef.num) {
2036     error(errSyntaxWarning, -1, "Failed to get object num from hint tables for page {0:d}", page);
2037     return NULL;
2038   }
2039
2040   // check for bogus ref - this can happen in corrupted PDF files
2041   if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) {
2042     error(errSyntaxWarning, -1, "Invalid object num ({0:d}) for page {1:d}", pageRef.num, page);
2043     return NULL;
2044   }
2045
2046   pageRef.gen = xref->getEntry(pageRef.num)->gen;
2047   xref->fetch(pageRef.num, pageRef.gen, &obj);
2048   if (!obj.isDict("Page")) {
2049     obj.free();
2050     error(errSyntaxWarning, -1, "Object ({0:d} {1:d}) is not a pageDict", pageRef.num, pageRef.gen);
2051     return NULL;
2052   }
2053   pageDict = obj.getDict();
2054
2055   p = new Page(this, page, pageDict, pageRef,
2056                new PageAttrs(NULL, pageDict), catalog->getForm());
2057   obj.free();
2058
2059   return p;
2060 }
2061
2062 Page *PDFDoc::getPage(int page)
2063 {
2064   if ((page < 1) || page > getNumPages()) return NULL;
2065
2066   if (isLinearized() && checkLinearization()) {
2067     pdfdocLocker();
2068     if (!pageCache) {
2069       pageCache = (Page **) gmallocn(getNumPages(), sizeof(Page *));
2070       for (int i = 0; i < getNumPages(); i++) {
2071         pageCache[i] = NULL;
2072       }
2073     }
2074     if (!pageCache[page-1]) {
2075       pageCache[page-1] = parsePage(page);
2076     }
2077     if (pageCache[page-1]) {
2078        return pageCache[page-1];
2079     } else {
2080        error(errSyntaxWarning, -1, "Failed parsing page {0:d} using hint tables", page);
2081     }
2082   }
2083
2084   return catalog->getPage(page);
2085 }