source/texk/web2c/luatexdir/image/pdftoepdf.w

   1 % pdftoepdf.w
   2 %
   3 % Copyright 1996-2006 Han The Thanh <thanh@@pdftex.org>
   4 % Copyright 2006-2015 Taco Hoekwater <taco@@luatex.org>
   5 %
   6 % This file is part of LuaTeX.
   7 %
   8 % LuaTeX is free software; you can redistribute it and/or modify it under
   9 % the terms of the GNU General Public License as published by the Free
  10 % Software Foundation; either version 2 of the License, or (at your
  11 % option) any later version.
  12 %
  13 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
  14 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 % FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 % License for more details.
  17 %
  18 % You should have received a copy of the GNU General Public License along
  19 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
  20
  21 @ @c
  22
  23 #define __STDC_FORMAT_MACROS /* for PRId64 etc.  */
  24
  25 #include "image/epdf.h"
  26
  27 /*
  28     This file is mostly C and not very much C++; it's just used to interface
  29     the functions of poppler, which happens to be written in C++.
  30 */
  31
  32 extern void md5(Guchar *msg, int msgLen, Guchar *digest);
  33
  34 static GBool isInit = gFalse;
  35
  36 /* Maintain AVL tree of all PDF files for embedding */
  37
  38 static avl_table *PdfDocumentTree = NULL;
  39
  40 /* AVL sort PdfDocument into PdfDocumentTree by file_path */
  41
  42 static int CompPdfDocument(const void *pa, const void *pb, void * /*p */ )
  43 {
  44     return strcmp(((const PdfDocument *) pa)->file_path, ((const PdfDocument *) pb)->file_path);
  45 }
  46
  47 /* Returns pointer to PdfDocument structure for PDF file. */
  48
  49 static PdfDocument *findPdfDocument(char *file_path)
  50 {
  51     PdfDocument *pdf_doc, tmp;
  52     if (file_path == NULL) {
  53         normal_error("pdf backend","empty filename when loading pdf file");
  54     } else if (PdfDocumentTree == NULL) {
  55         return NULL;
  56     }
  57     tmp.file_path = file_path;
  58     pdf_doc = (PdfDocument *) avl_find(PdfDocumentTree, &tmp);
  59     return pdf_doc;
  60 }
  61
  62 #define PDF_CHECKSUM_SIZE 32
  63
  64 static char *get_file_checksum(const char *a, file_error_mode fe)
  65 {
  66     struct stat finfo;
  67     char *ck = NULL;
  68     if (stat(a, &finfo) == 0) {
  69         off_t size = finfo.st_size;
  70         time_t mtime = finfo.st_mtime;
  71         ck = (char *) malloc(PDF_CHECKSUM_SIZE);
  72         if (ck == NULL)
  73             formatted_error("pdf inclusion","out of memory while processing '%s'", a);
  74         snprintf(ck, PDF_CHECKSUM_SIZE, "%" PRIu64 "_%" PRIu64, (uint64_t) size,(uint64_t) mtime);
  75    } else {
  76         switch (fe) {
  77             case FE_FAIL:
  78                 formatted_error("pdf inclusion","could not stat() file '%s'", a);
  79                 break;
  80             case FE_RETURN_NULL:
  81                 if (ck != NULL)
  82                     free(ck);
  83                 ck = NULL;
  84                 break;
  85             default:
  86                 assert(0);
  87         }
  88     }
  89     return ck;
  90 }
  91
  92
  93 static char *get_stream_checksum (const char *str, unsigned long long str_size){
  94     /* http://www.cse.yorku.ca/~oz/hash.html */
  95     /* djb2                                  */
  96     unsigned long hash ;
  97     char *ck = NULL;
  98     unsigned int i;
  99     hash = 5381;
 100     ck = (char *) malloc(STRSTREAM_CHECKSUM_SIZE+1);
 101     if (ck == NULL)
 102         normal_error("pdf inclusion","out of memory while processing a memstream");
 103     for(i=0; i<(unsigned int)(str_size); i++) {
 104         hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
 105     }
 106     snprintf(ck,STRSTREAM_CHECKSUM_SIZE+1,"%lx",hash);
 107     ck[STRSTREAM_CHECKSUM_SIZE]='\0';
 108     return ck;
 109 }
 110
 111 /*
 112     Returns pointer to PdfDocument structure for PDF file.
 113     Creates a new PdfDocument structure if it doesn't exist yet.
 114     When fe = FE_RETURN_NULL, the function returns NULL in error case.
 115 */
 116
 117 PdfDocument *refPdfDocument(const char *file_path, file_error_mode fe)
 118 {
 119     char *checksum, *path_copy;
 120     PdfDocument *pdf_doc;
 121     PDFDoc *doc = NULL;
 122     GooString *docName = NULL;
 123     int new_flag = 0;
 124     if ((checksum = get_file_checksum(file_path, fe)) == NULL) {
 125         return (PdfDocument *) NULL;
 126     }
 127     path_copy = xstrdup(file_path);
 128     if ((pdf_doc = findPdfDocument(path_copy)) == NULL) {
 129         new_flag = 1;
 130         pdf_doc = new PdfDocument;
 131         pdf_doc->file_path = path_copy;
 132         pdf_doc->checksum = checksum;
 133         pdf_doc->doc = NULL;
 134         pdf_doc->inObjList = NULL;
 135         pdf_doc->ObjMapTree = NULL;
 136         pdf_doc->occurences = 0; /* 0 = unreferenced */
 137         pdf_doc->pc = 0;
 138     } else {
 139         if (strncmp(pdf_doc->checksum, checksum, PDF_CHECKSUM_SIZE) != 0) {
 140             formatted_error("pdf inclusion","file has changed '%s'", file_path);
 141         }
 142         free(checksum);
 143         free(path_copy);
 144     }
 145     if (pdf_doc->doc == NULL) {
 146         docName = new GooString(file_path);
 147         doc = new PDFDoc(docName); /* takes ownership of docName */
 148         pdf_doc->pc++;
 149
 150         if (!doc->isOk() || !doc->okToPrint()) {
 151             switch (fe) {
 152             case FE_FAIL:
 153                 normal_error("pdf inclusion","reading image failed");
 154                 break;
 155             case FE_RETURN_NULL:
 156                 delete doc;
 157                 /* delete docName */
 158                 if (new_flag == 1) {
 159                     if (pdf_doc->file_path != NULL)
 160                         free(pdf_doc->file_path);
 161                     if (pdf_doc->checksum != NULL)
 162                         free(pdf_doc->checksum);
 163                     delete pdf_doc;
 164                 }
 165                 return (PdfDocument *) NULL;
 166                 break;
 167             default:
 168                 assert(0);
 169             }
 170         }
 171         pdf_doc->doc = doc;
 172     }
 173     /* PDF file could be opened without problems, checksum ok. */
 174     if (PdfDocumentTree == NULL)
 175         PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
 176     if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
 177         avl_probe(PdfDocumentTree, pdf_doc);
 178     }
 179     pdf_doc->occurences++;
 180     return pdf_doc;
 181 }
 182
 183 /*
 184     Returns pointer to PdfDocument structure for a PDF stream in memory of streamsize
 185     dimension. As before, creates a new PdfDocument structure if it doesn't exist yet
 186     with file_path = file_id
 187 */
 188
 189 PdfDocument *refMemStreamPdfDocument(char *docstream, unsigned long long streamsize,const char *file_id)
 190 {
 191     char *checksum;
 192     char *file_path;
 193     PdfDocument *pdf_doc;
 194     PDFDoc *doc = NULL;
 195     Object obj;
 196     MemStream *docmemstream = NULL;
 197     /*int new_flag = 0;*/
 198     size_t  cnt = 0;
 199     checksum = get_stream_checksum(docstream, streamsize);
 200     cnt = strlen(file_id);
 201     assert(cnt>0 && cnt <STREAM_FILE_ID_LEN);
 202     file_path = (char *) malloc(cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE+1); /* 1 for \0 */
 203     assert(file_path != NULL);
 204     strcpy(file_path,STREAM_URI);
 205     strcat(file_path,file_id);
 206     strcat(file_path,checksum);
 207     file_path[cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE]='\0';
 208     if ((pdf_doc = findPdfDocument(file_path)) == NULL) {
 209         /*new_flag = 1;*/
 210         pdf_doc = new PdfDocument;
 211         pdf_doc->file_path = file_path;
 212         pdf_doc->checksum = checksum;
 213         pdf_doc->doc = NULL;
 214         pdf_doc->inObjList = NULL;
 215         pdf_doc->ObjMapTree = NULL;
 216         pdf_doc->occurences = 0; /* 0 = unreferenced */
 217         pdf_doc->pc = 0;
 218     } else {
 219         /* As is now, checksum is in file_path, so this check should be useless. */
 220         if (strncmp(pdf_doc->checksum, checksum, STRSTREAM_CHECKSUM_SIZE) != 0) {
 221             formatted_error("pdf inclusion","stream has changed '%s'", file_path);
 222         }
 223         free(file_path);
 224         free(checksum);
 225     }
 226     if (pdf_doc->doc == NULL) {
 227         docmemstream = new MemStream( docstream,0,streamsize, obj.initNull() );
 228         doc = new PDFDoc(docmemstream); /* takes ownership of docmemstream */
 229         pdf_doc->pc++;
 230         if (!doc->isOk() || !doc->okToPrint()) {
 231             normal_error("pdf inclusion","reading pdf Stream failed");
 232     }
 233         pdf_doc->doc = doc;
 234     }
 235     /* PDF file could be opened without problems, checksum ok. */
 236     if (PdfDocumentTree == NULL)
 237         PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
 238     if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
 239         avl_probe(PdfDocumentTree, pdf_doc);
 240     }
 241     pdf_doc->occurences++;
 242     return pdf_doc;
 243 }
 244
 245 /*
 246     AVL sort ObjMap into ObjMapTree by object number and generation keep the ObjMap
 247     struct small, as these are accumulated until the end
 248 */
 249
 250 struct ObjMap {
 251     Ref in;
 252     int out_num;
 253 };
 254
 255 static int CompObjMap(const void *pa, const void *pb, void * /*p */ )
 256 {
 257     const Ref *a = &(((const ObjMap *) pa)->in);
 258     const Ref *b = &(((const ObjMap *) pb)->in);
 259     if (a->num > b->num)
 260         return 1;
 261     else if (a->num < b->num)
 262         return -1;
 263     else if (a->gen == b->gen)
 264         return 0;
 265     else if (a->gen < b->gen)
 266         return -1;
 267     return 1;
 268 }
 269
 270 static ObjMap *findObjMap(PdfDocument * pdf_doc, Ref in)
 271 {
 272     ObjMap *obj_map, tmp;
 273     if (pdf_doc->ObjMapTree == NULL)
 274         return NULL;
 275     tmp.in = in;
 276     obj_map = (ObjMap *) avl_find(pdf_doc->ObjMapTree, &tmp);
 277     return obj_map;
 278 }
 279
 280 static void addObjMap(PdfDocument * pdf_doc, Ref in, int out_num)
 281 {
 282     ObjMap *obj_map = NULL;
 283     if (pdf_doc->ObjMapTree == NULL)
 284         pdf_doc->ObjMapTree = avl_create(CompObjMap, NULL, &avl_xallocator);
 285     obj_map = new ObjMap;
 286     obj_map->in = in;
 287     obj_map->out_num = out_num;
 288     avl_probe(pdf_doc->ObjMapTree, obj_map);
 289 }
 290
 291 /*
 292     When copying the Resources of the selected page, all objects are
 293     copied recursively top-down.  The findObjMap() function checks if an
 294     object has already been copied; if so, instead of copying just the
 295     new object number will be referenced.  The ObjMapTree guarantees,
 296     that during the entire LuaTeX run any object from any embedded PDF
 297     file will end up max. once in the output PDF file.  Indirect objects
 298     are not fetched during copying, but get a new object number from
 299     LuaTeX and then will be appended into a linked list.
 300 */
 301
 302 static int addInObj(PDF pdf, PdfDocument * pdf_doc, Ref ref)
 303 {
 304     ObjMap *obj_map;
 305     InObj *p, *q, *n;
 306     if (ref.num == 0) {
 307         normal_error("pdf inclusion","reference to invalid object (broken pdf)");
 308     }
 309     if ((obj_map = findObjMap(pdf_doc, ref)) != NULL)
 310         return obj_map->out_num;
 311     n = new InObj;
 312     n->ref = ref;
 313     n->next = NULL;
 314     n->num = pdf_create_obj(pdf, obj_type_others, 0);
 315     addObjMap(pdf_doc, ref, n->num);
 316     if (pdf_doc->inObjList == NULL) {
 317         pdf_doc->inObjList = n;
 318     } else {
 319         /*
 320             It is important to add new objects at the end of the list,
 321             because new objects are being added while the list is being
 322             written out by writeRefs().
 323         */
 324         for (p = pdf_doc->inObjList; p != NULL; p = p->next)
 325             q = p;
 326         q->next = n;
 327     }
 328     return n->num;
 329 }
 330
 331 /*
 332     Function converts double to pdffloat; very small and very large numbers
 333     are NOT converted to scientific notation. Here n must be a number or real
 334     conforming to the implementation limits of PDF as specified in appendix C.1
 335     of the PDF Ref. These are:
 336
 337     maximum value of ints is +2^32
 338     maximum value of reals is +2^15
 339     smalles values of reals is 1/(2^16)
 340 */
 341
 342 static pdffloat conv_double_to_pdffloat(double n)
 343 {
 344     pdffloat a;
 345     a.e = 6;
 346     a.m = i64round(n * ten_pow[a.e]);
 347     return a;
 348 }
 349
 350 static void copyObject(PDF, PdfDocument *, Object *);
 351
 352 void copyReal(PDF pdf, double d)
 353 {
 354     if (pdf->cave)
 355         pdf_out(pdf, ' ');
 356     print_pdffloat(pdf, conv_double_to_pdffloat(d));
 357     pdf->cave = true;
 358 }
 359
 360 static void copyString(PDF pdf, GooString * string)
 361 {
 362     char *p;
 363     unsigned char c;
 364     size_t i, l;
 365     p = string->getCString();
 366     l = (size_t) string->getLength();
 367     if (pdf->cave)
 368         pdf_out(pdf, ' ');
 369     if (strlen(p) == l) {
 370         pdf_out(pdf, '(');
 371         for (; *p != 0; p++) {
 372             c = (unsigned char) *p;
 373             if (c == '(' || c == ')' || c == '\\')
 374                 pdf_printf(pdf, "\\%c", c);
 375             else if (c < 0x20 || c > 0x7F)
 376                 pdf_printf(pdf, "\\%03o", (int) c);
 377             else
 378                 pdf_out(pdf, c);
 379         }
 380         pdf_out(pdf, ')');
 381     } else {
 382         pdf_out(pdf, '<');
 383         for (i = 0; i < l; i++) {
 384             c = (unsigned char) string->getChar(i);
 385             pdf_printf(pdf, "%.2x", (int) c);
 386         }
 387         pdf_out(pdf, '>');
 388     }
 389     pdf->cave = true;
 390 }
 391
 392 static void copyName(PDF pdf, char *s)
 393 {
 394     pdf_out(pdf, '/');
 395     for (; *s != 0; s++) {
 396         if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
 397             *s == '.' || *s == '-' || *s == '+')
 398             pdf_out(pdf, *s);
 399         else
 400             pdf_printf(pdf, "#%.2X", *s & 0xFF);
 401     }
 402     pdf->cave = true;
 403 }
 404
 405 static void copyArray(PDF pdf, PdfDocument * pdf_doc, Array * array)
 406 {
 407     int i, l;
 408     Object obj1;
 409     pdf_begin_array(pdf);
 410     for (i = 0, l = array->getLength(); i < l; ++i) {
 411         array->getNF(i, &obj1);
 412         copyObject(pdf, pdf_doc, &obj1);
 413         obj1.free();
 414     }
 415     pdf_end_array(pdf);
 416 }
 417
 418 static void copyDict(PDF pdf, PdfDocument * pdf_doc, Dict * dict)
 419 {
 420     int i, l;
 421     Object obj1;
 422     pdf_begin_dict(pdf);
 423     for (i = 0, l = dict->getLength(); i < l; ++i) {
 424         copyName(pdf, dict->getKey(i));
 425         dict->getValNF(i, &obj1);
 426         copyObject(pdf, pdf_doc, &obj1);
 427         obj1.free();
 428     }
 429     pdf_end_dict(pdf);
 430 }
 431
 432 static void copyStreamStream(PDF pdf, Stream * str)
 433 {
 434     int c, i, len = 1024;
 435     str->reset();
 436     i = len;
 437     while ((c = str->getChar()) != EOF) {
 438         if (i == len) {
 439             pdf_room(pdf, len);
 440             i = 0;
 441         }
 442         pdf_quick_out(pdf, c);
 443         i++;
 444     }
 445 }
 446
 447 static void copyStream(PDF pdf, PdfDocument * pdf_doc, Stream * stream)
 448 {
 449     copyDict(pdf, pdf_doc, stream->getDict());
 450     pdf_begin_stream(pdf);
 451     copyStreamStream(pdf, stream->getUndecodedStream());
 452     pdf_end_stream(pdf);
 453 }
 454
 455 static void copyObject(PDF pdf, PdfDocument * pdf_doc, Object * obj)
 456 {
 457     switch (obj->getType()) {
 458     case objBool:
 459         pdf_add_bool(pdf, (int) obj->getBool());
 460         break;
 461     case objInt:
 462         pdf_add_int(pdf, obj->getInt());
 463         break;
 464     case objReal:
 465         copyReal(pdf, obj->getReal());
 466         break;
 467     /*
 468     case objNum:
 469         GBool isNum() { return type == objInt || type == objReal; }
 470         break;
 471     */
 472     case objString:
 473         copyString(pdf, obj->getString());
 474         break;
 475     case objName:
 476         copyName(pdf, obj->getName());
 477         break;
 478     case objNull:
 479         pdf_add_null(pdf);
 480         break;
 481     case objArray:
 482         copyArray(pdf, pdf_doc, obj->getArray());
 483         break;
 484     case objDict:
 485         copyDict(pdf, pdf_doc, obj->getDict());
 486         break;
 487     case objStream:
 488         copyStream(pdf, pdf_doc, obj->getStream());
 489         break;
 490     case objRef:
 491         pdf_add_ref(pdf, addInObj(pdf, pdf_doc, obj->getRef()));
 492         break;
 493     case objCmd:
 494     case objError:
 495     case objEOF:
 496     case objNone:
 497         formatted_error("pdf inclusion","type '%s' cannot be copied", obj->getTypeName());
 498         break;
 499     default:
 500         /* poppler doesn't have any other types */
 501         assert(0);
 502     }
 503 }
 504
 505 static void writeRefs(PDF pdf, PdfDocument * pdf_doc)
 506 {
 507     InObj *r, *n;
 508     Object obj1;
 509     XRef *xref;
 510     PDFDoc *doc = pdf_doc->doc;
 511     xref = doc->getXRef();
 512     for (r = pdf_doc->inObjList; r != NULL;) {
 513         xref->fetch(r->ref.num, r->ref.gen, &obj1);
 514         if (obj1.isStream())
 515             pdf_begin_obj(pdf, r->num, OBJSTM_NEVER);
 516         else
 517             pdf_begin_obj(pdf, r->num, 2);
 518         copyObject(pdf, pdf_doc, &obj1);
 519         obj1.free();
 520         pdf_end_obj(pdf);
 521         n = r->next;
 522         delete r;
 523         pdf_doc->inObjList = r = n;
 524     }
 525 }
 526
 527 /* get the pagebox coordinates according to the pagebox_spec */
 528
 529 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
 530 {
 531     switch (pagebox_spec) {
 532         case PDF_BOX_SPEC_MEDIA:
 533             return page->getMediaBox();
 534             break;
 535         case PDF_BOX_SPEC_CROP:
 536             return page->getCropBox();
 537             break;
 538         case PDF_BOX_SPEC_BLEED:
 539             return page->getBleedBox();
 540             break;
 541         case PDF_BOX_SPEC_TRIM:
 542             return page->getTrimBox();
 543             break;
 544         case PDF_BOX_SPEC_ART:
 545             return page->getArtBox();
 546             break;
 547         default:
 548             return page->getMediaBox();
 549             break;
 550     }
 551 }
 552
 553 /*
 554     Reads various information about the PDF and sets it up for later inclusion.
 555     This will fail if the PDF version of the PDF is higher than minor_pdf_version_wanted
 556     or page_name is given and can not be found. It makes no sense to give page_name and
 557     page_num. Returns the page number.
 558 */
 559
 560 void flush_pdf_info(image_dict * idict)
 561 {
 562     if (img_keepopen(idict)) {
 563         unrefPdfDocument(img_filepath(idict));
 564     }
 565 }
 566
 567 /*
 568     void flush_pdfstream_info(image_dict * idict)
 569     {
 570         if (img_pdfstream_ptr(idict) != NULL) {
 571             xfree(img_pdfstream_stream(idict));
 572             xfree(img_pdfstream_ptr(idict));
 573             img_pdfstream_stream(idict) = NULL;
 574             img_pdfstream_ptr(idict) = NULL;
 575         }
 576     }
 577 */
 578
 579 void read_pdf_info(image_dict * idict)
 580 {
 581     PdfDocument *pdf_doc = NULL;
 582     PDFDoc *doc = NULL;
 583     Catalog *catalog;
 584     Page *page;
 585     int rotate;
 586     PDFRectangle *pagebox;
 587     int pdf_major_version_found, pdf_minor_version_found;
 588     float xsize, ysize, xorig, yorig;
 589     if (isInit == gFalse) {
 590         if (!(globalParams))
 591             globalParams = new GlobalParams();
 592         globalParams->setErrQuiet(gFalse);
 593         isInit = gTrue;
 594     }
 595     if (img_type(idict) == IMG_TYPE_PDF)
 596         pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
 597     else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
 598         pdf_doc = findPdfDocument(img_filepath(idict)) ;
 599         pdf_doc->occurences++;
 600     } else {
 601         normal_error("pdf inclusion","unknown document");
 602     }
 603     doc = pdf_doc->doc;
 604     catalog = doc->getCatalog();
 605     /*
 606         Check PDF version. This works only for PDF 1.x but since any versions of
 607         PDF newer than 1.x will not be backwards compatible to PDF 1.x, we will
 608         then have to changed drastically anyway.
 609     */
 610     pdf_major_version_found = doc->getPDFMajorVersion();
 611     pdf_minor_version_found = doc->getPDFMinorVersion();
 612     if ((pdf_major_version_found > 1) || (pdf_minor_version_found > img_pdfminorversion(idict))) {
 613         const char *msg = "PDF inclusion: found PDF version '%d.%d', but at most version '1.%d' allowed";
 614         if (img_errorlevel(idict) > 0) {
 615             formatted_error("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
 616         } else {
 617             formatted_warning("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
 618         }
 619     }
 620     img_totalpages(idict) = catalog->getNumPages();
 621     if (img_pagename(idict)) {
 622         /* get page by name */
 623         GooString name(img_pagename(idict));
 624         LinkDest *link = doc->findDest(&name);
 625         if (link == NULL || !link->isOk())
 626             formatted_error("pdf inclusion","invalid destination '%s'",img_pagename(idict));
 627         Ref ref = link->getPageRef();
 628         img_pagenum(idict) = catalog->findPage(ref.num, ref.gen);
 629         if (img_pagenum(idict) == 0)
 630             formatted_error("pdf inclusion","destination is not a page '%s'",img_pagename(idict));
 631         delete link;
 632     } else {
 633         /* get page by number */
 634         if (img_pagenum(idict) <= 0
 635             || img_pagenum(idict) > img_totalpages(idict))
 636             formatted_error("pdf inclusion","required page '%i' does not exist",(int) img_pagenum(idict));
 637     }
 638     /* get the required page */
 639     page = catalog->getPage(img_pagenum(idict));
 640     /* get the pagebox coordinates (media, crop,...) to use. */
 641     pagebox = get_pagebox(page, img_pagebox(idict));
 642     if (pagebox->x2 > pagebox->x1) {
 643         xorig = pagebox->x1;
 644         xsize = pagebox->x2 - pagebox->x1;
 645     } else {
 646         xorig = pagebox->x2;
 647         xsize = pagebox->x1 - pagebox->x2;
 648     }
 649     if (pagebox->y2 > pagebox->y1) {
 650         yorig = pagebox->y1;
 651         ysize = pagebox->y2 - pagebox->y1;
 652     } else {
 653         yorig = pagebox->y2;
 654         ysize = pagebox->y1 - pagebox->y2;
 655     }
 656     /* The following 4 parameters are raw. Do _not_ modify by /Rotate! */
 657     img_xsize(idict) = bp2sp(xsize);
 658     img_ysize(idict) = bp2sp(ysize);
 659     img_xorig(idict) = bp2sp(xorig);
 660     img_yorig(idict) = bp2sp(yorig);
 661     /*
 662         Handle /Rotate parameter. Only multiples of 90 deg. are allowed (PDF Ref. v1.3,
 663         p. 78). We also accept negative angles. Beware: PDF counts clockwise! */
 664     rotate = page->getRotate();
 665     switch (((rotate % 360) + 360) % 360) {
 666         case 0:
 667             img_rotation(idict) = 0;
 668             break;
 669         case 90:
 670             img_rotation(idict) = 3;
 671             break;
 672         case 180:
 673             img_rotation(idict) = 2;
 674             break;
 675         case 270:
 676             img_rotation(idict) = 1;
 677             break;
 678         default:
 679             formatted_warning("pdf inclusion","/Rotate parameter in PDF file not multiple of 90 degrees");
 680     }
 681     /* currently unused info whether PDF contains a /Group */
 682     if (page->getGroup() != NULL)
 683         img_set_group(idict);
 684     /*
 685         LuaTeX pre 0.85 versions did this:
 686
 687         if (readtype == IMG_CLOSEINBETWEEN) {
 688             unrefPdfDocument(img_filepath(idict));
 689         }
 690
 691         and also unref'd in the finalizer zo we got an extra unrefs when garbage was
 692         collected. However it is more efficient to keep the file open so we do that
 693         now. The (slower) alternative is to unref here (which in most cases forcing a
 694         close of the file) but then we must not call flush_pdf_info.
 695
 696         A close (unref) can be forced by nilling the dict object at the lua end and
 697         forcing a collectgarbage("collect") after that.
 698
 699     */
 700     if (! img_keepopen(idict)) {
 701         unrefPdfDocument(img_filepath(idict));
 702     }
 703 }
 704
 705 /*
 706     Write the current epf_doc. Here the included PDF is copied, so most errors
 707     that can happen during PDF inclusion will arise here.
 708 */
 709
 710 void write_epdf(PDF pdf, image_dict * idict)
 711 {
 712     PdfDocument *pdf_doc = NULL;
 713     PDFDoc *doc = NULL;
 714     Catalog *catalog;
 715     Page *page;
 716     Ref *pageref;
 717     Dict *pageDict;
 718     Object obj1, contents, pageobj, pagesobj1, pagesobj2, *op1, *op2, *optmp;
 719     PDFRectangle *pagebox;
 720     int i, l;
 721     double bbox[4];
 722     char s[256];
 723     const char *pagedictkeys[] = {
 724         "Group", "LastModified", "Metadata", "PieceInfo", "Resources", "SeparationInfo", NULL
 725     };
 726     /* open PDF file */
 727     if (img_type(idict) == IMG_TYPE_PDF) {
 728         pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
 729     } else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
 730         pdf_doc = findPdfDocument(img_filepath(idict)) ;
 731         pdf_doc->occurences++;
 732     } else {
 733         normal_error("pdf inclusion","unknown document");
 734     }
 735     doc = pdf_doc->doc;
 736     catalog = doc->getCatalog();
 737     page = catalog->getPage(img_pagenum(idict));
 738     pageref = catalog->getPageRef(img_pagenum(idict));
 739     doc->getXRef()->fetch(pageref->num, pageref->gen, &pageobj);
 740     pageDict = pageobj.getDict();
 741     /* write the Page header */
 742     pdf_begin_obj(pdf, img_objnum(idict), OBJSTM_NEVER);
 743     pdf_begin_dict(pdf);
 744     pdf_dict_add_name(pdf, "Type", "XObject");
 745     pdf_dict_add_name(pdf, "Subtype", "Form");
 746     if (img_attr(idict) != NULL && strlen(img_attr(idict)) > 0)
 747         pdf_printf(pdf, "\n%s\n", img_attr(idict));
 748     pdf_dict_add_int(pdf, "FormType", 1);
 749     /* write additional information */
 750     pdf_dict_add_img_filename(pdf, idict);
 751     snprintf(s, 30, "%s.PageNumber", pdfkeyprefix);
 752     pdf_dict_add_int(pdf, s, (int) img_pagenum(idict));
 753     doc->getDocInfoNF(&obj1);
 754     if (obj1.isRef()) {
 755         /* the info dict must be indirect (PDF Ref p. 61) */
 756         snprintf(s, 30, "%s.InfoDict", pdfkeyprefix);
 757         pdf_dict_add_ref(pdf, s, addInObj(pdf, pdf_doc, obj1.getRef()));
 758     }
 759     obj1.free();
 760     if (img_is_bbox(idict)) {
 761         bbox[0] = sp2bp(img_bbox(idict)[0]);
 762         bbox[1] = sp2bp(img_bbox(idict)[1]);
 763         bbox[2] = sp2bp(img_bbox(idict)[2]);
 764         bbox[3] = sp2bp(img_bbox(idict)[3]);
 765     } else {
 766         /* get the pagebox coordinates (media, crop,...) to use. */
 767         pagebox = get_pagebox(page, img_pagebox(idict));
 768         bbox[0] = pagebox->x1;
 769         bbox[1] = pagebox->y1;
 770         bbox[2] = pagebox->x2;
 771         bbox[3] = pagebox->y2;
 772     }
 773     pdf_add_name(pdf, "BBox");
 774     pdf_begin_array(pdf);
 775     copyReal(pdf, bbox[0]);
 776     copyReal(pdf, bbox[1]);
 777     copyReal(pdf, bbox[2]);
 778     copyReal(pdf, bbox[3]);
 779     pdf_end_array(pdf);
 780     /*
 781         Now all relevant parts of the Page dictionary are copied. Metadata validity
 782         check is needed(as a stream it must be indirect).
 783     */
 784     pageDict->lookupNF("Metadata", &obj1);
 785     if (!obj1.isNull() && !obj1.isRef())
 786         formatted_warning("pdf inclusion","/Metadata must be indirect object");
 787     obj1.free();
 788     /* copy selected items in Page dictionary */
 789     for (i = 0; pagedictkeys[i] != NULL; i++) {
 790         pageDict->lookupNF(pagedictkeys[i], &obj1);
 791         if (!obj1.isNull()) {
 792             pdf_add_name(pdf, pagedictkeys[i]);
 793             /* preserves indirection */
 794             copyObject(pdf, pdf_doc, &obj1);
 795         }
 796         obj1.free();
 797     }
 798     /*
 799         If there are no Resources in the Page dict of the embedded page,
 800         try to inherit the Resources from the Pages tree of the embedded
 801         PDF file, climbing up the tree until the Resources are found.
 802         (This fixes a problem with Scribus 1.3.3.14.)
 803     */
 804     pageDict->lookupNF("Resources", &obj1);
 805     if (obj1.isNull()) {
 806         op1 = &pagesobj1;
 807         op2 = &pagesobj2;
 808         pageDict->lookup("Parent", op1);
 809         while (op1->isDict()) {
 810             obj1.free();
 811             op1->dictLookupNF("Resources", &obj1);
 812             if (!obj1.isNull()) {
 813                 pdf_add_name(pdf, "Resources");
 814                 copyObject(pdf, pdf_doc, &obj1);
 815                 break;
 816             }
 817             op1->dictLookup("Parent", op2);
 818             optmp = op1;
 819             op1 = op2;
 820             op2 = optmp;
 821             op2->free();
 822         };
 823         if (!op1->isDict())
 824             formatted_warning("pdf inclusion","Page /Resources missing");
 825         op1->free();
 826     }
 827     obj1.free();
 828     /* Write the Page contents. */
 829     page->getContents(&contents);
 830     if (contents.isStream()) {
 831         /*
 832             Variant A: get stream and recompress under control of \pdfcompresslevel
 833
 834             pdf_begin_stream();
 835             copyStreamStream(contents->getStream());
 836             pdf_end_stream();
 837
 838             Variant B: copy stream without recompressing
 839         */
 840         contents.streamGetDict()->lookup("F", &obj1);
 841         if (!obj1.isNull()) {
 842             normal_error("pdf inclusion","unsupported external stream");
 843         }
 844         obj1.free();
 845         contents.streamGetDict()->lookup("Length", &obj1);
 846         pdf_add_name(pdf, "Length");
 847         copyObject(pdf, pdf_doc, &obj1);
 848         obj1.free();
 849         contents.streamGetDict()->lookup("Filter", &obj1);
 850         if (!obj1.isNull()) {
 851             pdf_add_name(pdf, "Filter");
 852             copyObject(pdf, pdf_doc, &obj1);
 853             obj1.free();
 854             contents.streamGetDict()->lookup("DecodeParms", &obj1);
 855             if (!obj1.isNull()) {
 856                 pdf_add_name(pdf, "DecodeParms");
 857                 copyObject(pdf, pdf_doc, &obj1);
 858             }
 859         }
 860         obj1.free();
 861         pdf_end_dict(pdf);
 862         pdf_begin_stream(pdf);
 863         copyStreamStream(pdf, contents.getStream()->getUndecodedStream());
 864         pdf_end_stream(pdf);
 865         pdf_end_obj(pdf);
 866     } else if (contents.isArray()) {
 867         pdf_dict_add_streaminfo(pdf);
 868         pdf_end_dict(pdf);
 869         pdf_begin_stream(pdf);
 870         for (i = 0, l = contents.arrayGetLength(); i < l; ++i) {
 871             copyStreamStream(pdf, (contents.arrayGet(i, &obj1))->getStream());
 872             obj1.free();
 873             if (i < (l - 1)) {
 874                 /*
 875                     Put a space between streams to be on the safe side (streams
 876                     should have a trailing space here, but one never knows)
 877                 */
 878                 pdf_out(pdf, ' ');
 879             }
 880         }
 881         pdf_end_stream(pdf);
 882         pdf_end_obj(pdf);
 883     } else {
 884         /* the contents are optional, but we need to include an empty stream */
 885         pdf_dict_add_streaminfo(pdf);
 886         pdf_end_dict(pdf);
 887         pdf_begin_stream(pdf);
 888         pdf_end_stream(pdf);
 889         pdf_end_obj(pdf);
 890     }
 891     /* write out all indirect objects */
 892     writeRefs(pdf, pdf_doc);
 893     contents.free();
 894     pageobj.free();
 895     /*
 896         unrefPdfDocument() must come after contents.free() and pageobj.free()!
 897         TH: The next line makes repeated pdf inclusion unacceptably slow
 898
 899         unrefPdfDocument(img_filepath(idict));
 900     */
 901 }
 902
 903 /* Deallocate a PdfDocument with all its resources. */
 904
 905 static void deletePdfDocumentPdfDoc(PdfDocument * pdf_doc)
 906 {
 907     InObj *r, *n;
 908     /* this may be probably needed for an emergency destroyPdfDocument() */
 909     for (r = pdf_doc->inObjList; r != NULL; r = n) {
 910         n = r->next;
 911         delete r;
 912     }
 913     delete pdf_doc->doc;
 914     pdf_doc->doc = NULL;
 915     pdf_doc->pc++;
 916 }
 917
 918 static void destroyPdfDocument(void *pa, void * /*pb */ )
 919 {
 920     PdfDocument *pdf_doc = (PdfDocument *) pa;
 921     deletePdfDocumentPdfDoc(pdf_doc);
 922     /* TODO: delete rest of pdf_doc */
 923 }
 924
 925 /*
 926     Called when an image has been written and its resources in image_tab are
 927     freed and it's not referenced anymore.
 928 */
 929
 930 void unrefPdfDocument(char *file_path)
 931 {
 932     PdfDocument *pdf_doc = findPdfDocument(file_path);
 933     if (pdf_doc->occurences > 0) {
 934         pdf_doc->occurences--;
 935         if (pdf_doc->occurences == 0) {
 936             deletePdfDocumentPdfDoc(pdf_doc);
 937         }
 938     } else {
 939         /*
 940             We either have a mismatch in ref and unref or we're somehow out of sync
 941             which can happen when we mess with the same file in lua and tex.
 942         */
 943         formatted_warning("pdf inclusion","there can be a mismatch in opening and closing file '%s'",file_path);
 944     }
 945 }
 946
 947 /*
 948     For completeness, but it isn't currently used (unreferencing is done by mean
 949     of file_path.
 950 */
 951
 952 void unrefMemStreamPdfDocument(char *file_id)
 953 {
 954   (void) unrefPdfDocument(file_id);
 955
 956 }
 957
 958 /*
 959     Called when PDF embedding system is finalized.  We now deallocate all remaining
 960     PdfDocuments.
 961 */
 962
 963 void epdf_free()
 964 {
 965     if (PdfDocumentTree != NULL)
 966         avl_destroy(PdfDocumentTree, destroyPdfDocument);
 967     PdfDocumentTree = NULL;
 968     if (isInit == gTrue)
 969         delete globalParams;
 970     isInit = gFalse;
 971 }