source/texk/web2c/luatexdir/image/pdftoepdf.w

   1 % pdftoepdf.w
   2 %
   3 % Copyright 1996-2006 Han The Thanh <thanh@@pdftex.org>
   4 % Copyright 2006-2015 Taco Hoekwater <taco@@luatex.org>
   5 %
   6 % This file is part of LuaTeX.
   7 %
   8 % LuaTeX is free software; you can redistribute it and/or modify it under
   9 % the terms of the GNU General Public License as published by the Free
  10 % Software Foundation; either version 2 of the License, or (at your
  11 % option) any later version.
  12 %
  13 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
  14 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 % FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 % License for more details.
  17 %
  18 % You should have received a copy of the GNU General Public License along
  19 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
  20
  21 @ @c
  22
  23 #define __STDC_FORMAT_MACROS /* for PRId64 etc.  */
  24
  25 #include "image/epdf.h"
  26
  27 /*
  28     This file is mostly C and not very much C++; it's just used to interface
  29     the functions of poppler, which happens to be written in C++.
  30 */
  31
  32 extern void md5(Guchar *msg, int msgLen, Guchar *digest);
  33
  34 static GBool isInit = gFalse;
  35
  36 /* Maintain AVL tree of all PDF files for embedding */
  37
  38 static avl_table *PdfDocumentTree = NULL;
  39
  40 /* AVL sort PdfDocument into PdfDocumentTree by file_path */
  41
  42 static int CompPdfDocument(const void *pa, const void *pb, void * /*p */ )
  43 {
  44     return strcmp(((const PdfDocument *) pa)->file_path, ((const PdfDocument *) pb)->file_path);
  45 }
  46
  47 /* Returns pointer to PdfDocument structure for PDF file. */
  48
  49 static PdfDocument *findPdfDocument(char *file_path)
  50 {
  51     PdfDocument *pdf_doc, tmp;
  52     if (file_path == NULL) {
  53         normal_error("pdf backend","empty filename when loading pdf file");
  54     } else if (PdfDocumentTree == NULL) {
  55         return NULL;
  56     }
  57     tmp.file_path = file_path;
  58     pdf_doc = (PdfDocument *) avl_find(PdfDocumentTree, &tmp);
  59     return pdf_doc;
  60 }
  61
  62 #define PDF_CHECKSUM_SIZE 32
  63
  64 static char *get_file_checksum(const char *a, file_error_mode fe)
  65 {
  66     struct stat finfo;
  67     char *ck = NULL;
  68     if (stat(a, &finfo) == 0) {
  69         off_t size = finfo.st_size;
  70         time_t mtime = finfo.st_mtime;
  71         ck = (char *) malloc(PDF_CHECKSUM_SIZE);
  72         if (ck == NULL)
  73             formatted_error("pdf inclusion","out of memory while processing '%s'", a);
  74         snprintf(ck, PDF_CHECKSUM_SIZE, "%" PRIu64 "_%" PRIu64, (uint64_t) size,(uint64_t) mtime);
  75    } else {
  76         switch (fe) {
  77             case FE_FAIL:
  78                 formatted_error("pdf inclusion","could not stat() file '%s'", a);
  79                 break;
  80             case FE_RETURN_NULL:
  81                 if (ck != NULL)
  82                     free(ck);
  83                 ck = NULL;
  84                 break;
  85             default:
  86                 assert(0);
  87         }
  88     }
  89     return ck;
  90 }
  91
  92
  93 static char *get_stream_checksum (const char *str, unsigned long long str_size){
  94     /* http://www.cse.yorku.ca/~oz/hash.html */
  95     /* djb2                                  */
  96     unsigned long hash ;
  97     char *ck = NULL;
  98     unsigned int i;
  99     hash = 5381;
 100     ck = (char *) malloc(STRSTREAM_CHECKSUM_SIZE+1);
 101     if (ck == NULL)
 102         normal_error("pdf inclusion","out of memory while processing a memstream");
 103     for(i=0; i<(unsigned int)(str_size); i++) {
 104         hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
 105     }
 106     snprintf(ck,STRSTREAM_CHECKSUM_SIZE+1,"%lx",hash);
 107     ck[STRSTREAM_CHECKSUM_SIZE]='\0';
 108     return ck;
 109 }
 110
 111 /*
 112     Returns pointer to PdfDocument structure for PDF file.
 113     Creates a new PdfDocument structure if it doesn't exist yet.
 114     When fe = FE_RETURN_NULL, the function returns NULL in error case.
 115 */
 116
 117 PdfDocument *refPdfDocument(const char *file_path, file_error_mode fe)
 118 {
 119     char *checksum, *path_copy;
 120     PdfDocument *pdf_doc;
 121     PDFDoc *doc = NULL;
 122     GooString *docName = NULL;
 123     int new_flag = 0;
 124     if ((checksum = get_file_checksum(file_path, fe)) == NULL) {
 125         return (PdfDocument *) NULL;
 126     }
 127     path_copy = xstrdup(file_path);
 128     if ((pdf_doc = findPdfDocument(path_copy)) == NULL) {
 129         new_flag = 1;
 130         pdf_doc = new PdfDocument;
 131         pdf_doc->file_path = path_copy;
 132         pdf_doc->checksum = checksum;
 133         pdf_doc->doc = NULL;
 134         pdf_doc->inObjList = NULL;
 135         pdf_doc->ObjMapTree = NULL;
 136         pdf_doc->occurences = 0; /* 0 = unreferenced */
 137         pdf_doc->pc = 0;
 138     } else {
 139         if (strncmp(pdf_doc->checksum, checksum, PDF_CHECKSUM_SIZE) != 0) {
 140             formatted_error("pdf inclusion","file has changed '%s'", file_path);
 141         }
 142         free(checksum);
 143         free(path_copy);
 144     }
 145     if (pdf_doc->doc == NULL) {
 146         docName = new GooString(file_path);
 147         doc = new PDFDoc(docName); /* takes ownership of docName */
 148         pdf_doc->pc++;
 149
 150         if (!doc->isOk() || !doc->okToPrint()) {
 151             switch (fe) {
 152             case FE_FAIL:
 153                 normal_error("pdf inclusion","reading image failed");
 154                 break;
 155             case FE_RETURN_NULL:
 156                 delete doc;
 157                 /* delete docName */
 158                 if (new_flag == 1) {
 159                     if (pdf_doc->file_path != NULL)
 160                         free(pdf_doc->file_path);
 161                     if (pdf_doc->checksum != NULL)
 162                         free(pdf_doc->checksum);
 163                     delete pdf_doc;
 164                 }
 165                 return (PdfDocument *) NULL;
 166                 break;
 167             default:
 168                 assert(0);
 169             }
 170         }
 171         pdf_doc->doc = doc;
 172     }
 173     /* PDF file could be opened without problems, checksum ok. */
 174     if (PdfDocumentTree == NULL)
 175         PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
 176     if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
 177         avl_probe(PdfDocumentTree, pdf_doc);
 178     }
 179     pdf_doc->occurences++;
 180     return pdf_doc;
 181 }
 182
 183 /*
 184     Returns pointer to PdfDocument structure for a PDF stream in memory of streamsize
 185     dimension. As before, creates a new PdfDocument structure if it doesn't exist yet
 186     with file_path = file_id
 187 */
 188
 189 PdfDocument *refMemStreamPdfDocument(char *docstream, unsigned long long streamsize,const char *file_id)
 190 {
 191     char *checksum;
 192     char *file_path;
 193     PdfDocument *pdf_doc;
 194     PDFDoc *doc = NULL;
 195     Object obj;
 196     MemStream *docmemstream = NULL;
 197     /*int new_flag = 0;*/
 198     size_t  cnt = 0;
 199     checksum = get_stream_checksum(docstream, streamsize);
 200     cnt = strlen(file_id);
 201     assert(cnt>0 && cnt <STREAM_FILE_ID_LEN);
 202     file_path = (char *) malloc(cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE+1); /* 1 for \0 */
 203     assert(file_path != NULL);
 204     strcpy(file_path,STREAM_URI);
 205     strcat(file_path,file_id);
 206     strcat(file_path,checksum);
 207     file_path[cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE]='\0';
 208     if ((pdf_doc = findPdfDocument(file_path)) == NULL) {
 209         /*new_flag = 1;*/
 210         pdf_doc = new PdfDocument;
 211         pdf_doc->file_path = file_path;
 212         pdf_doc->checksum = checksum;
 213         pdf_doc->doc = NULL;
 214         pdf_doc->inObjList = NULL;
 215         pdf_doc->ObjMapTree = NULL;
 216         pdf_doc->occurences = 0; /* 0 = unreferenced */
 217         pdf_doc->pc = 0;
 218     } else {
 219         /* As is now, checksum is in file_path, so this check should be useless. */
 220         if (strncmp(pdf_doc->checksum, checksum, STRSTREAM_CHECKSUM_SIZE) != 0) {
 221             formatted_error("pdf inclusion","stream has changed '%s'", file_path);
 222         }
 223         free(file_path);
 224         free(checksum);
 225     }
 226     if (pdf_doc->doc == NULL) {
 227         docmemstream = new MemStream( docstream,0,streamsize, obj.initNull() );
 228         doc = new PDFDoc(docmemstream); /* takes ownership of docmemstream */
 229         pdf_doc->pc++;
 230         if (!doc->isOk() || !doc->okToPrint()) {
 231             normal_error("pdf inclusion","reading pdf Stream failed");
 232     }
 233         pdf_doc->doc = doc;
 234     }
 235     /* PDF file could be opened without problems, checksum ok. */
 236     if (PdfDocumentTree == NULL)
 237         PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
 238     if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
 239         avl_probe(PdfDocumentTree, pdf_doc);
 240     }
 241     pdf_doc->occurences++;
 242     return pdf_doc;
 243 }
 244
 245 /*
 246     AVL sort ObjMap into ObjMapTree by object number and generation keep the ObjMap
 247     struct small, as these are accumulated until the end
 248 */
 249
 250 struct ObjMap {
 251     Ref in;
 252     int out_num;
 253 };
 254
 255 static int CompObjMap(const void *pa, const void *pb, void * /*p */ )
 256 {
 257     const Ref *a = &(((const ObjMap *) pa)->in);
 258     const Ref *b = &(((const ObjMap *) pb)->in);
 259     if (a->num > b->num)
 260         return 1;
 261     else if (a->num < b->num)
 262         return -1;
 263     else if (a->gen == b->gen)
 264         return 0;
 265     else if (a->gen < b->gen)
 266         return -1;
 267     return 1;
 268 }
 269
 270 static ObjMap *findObjMap(PdfDocument * pdf_doc, Ref in)
 271 {
 272     ObjMap *obj_map, tmp;
 273     if (pdf_doc->ObjMapTree == NULL)
 274         return NULL;
 275     tmp.in = in;
 276     obj_map = (ObjMap *) avl_find(pdf_doc->ObjMapTree, &tmp);
 277     return obj_map;
 278 }
 279
 280 static void addObjMap(PdfDocument * pdf_doc, Ref in, int out_num)
 281 {
 282     ObjMap *obj_map = NULL;
 283     if (pdf_doc->ObjMapTree == NULL)
 284         pdf_doc->ObjMapTree = avl_create(CompObjMap, NULL, &avl_xallocator);
 285     obj_map = new ObjMap;
 286     obj_map->in = in;
 287     obj_map->out_num = out_num;
 288     avl_probe(pdf_doc->ObjMapTree, obj_map);
 289 }
 290
 291 /*
 292     When copying the Resources of the selected page, all objects are
 293     copied recursively top-down.  The findObjMap() function checks if an
 294     object has already been copied; if so, instead of copying just the
 295     new object number will be referenced.  The ObjMapTree guarantees,
 296     that during the entire LuaTeX run any object from any embedded PDF
 297     file will end up max. once in the output PDF file.  Indirect objects
 298     are not fetched during copying, but get a new object number from
 299     LuaTeX and then will be appended into a linked list.
 300 */
 301
 302 static int addInObj(PDF pdf, PdfDocument * pdf_doc, Ref ref)
 303 {
 304     ObjMap *obj_map;
 305     InObj *p, *q, *n;
 306     if (ref.num == 0) {
 307         normal_error("pdf inclusion","reference to invalid object (broken pdf)");
 308     }
 309     if ((obj_map = findObjMap(pdf_doc, ref)) != NULL)
 310         return obj_map->out_num;
 311     n = new InObj;
 312     n->ref = ref;
 313     n->next = NULL;
 314     n->num = pdf_create_obj(pdf, obj_type_others, 0);
 315     addObjMap(pdf_doc, ref, n->num);
 316     if (pdf_doc->inObjList == NULL) {
 317         pdf_doc->inObjList = n;
 318     } else {
 319         /*
 320             It is important to add new objects at the end of the list,
 321             because new objects are being added while the list is being
 322             written out by writeRefs().
 323         */
 324         for (p = pdf_doc->inObjList; p != NULL; p = p->next)
 325             q = p;
 326         q->next = n;
 327     }
 328     return n->num;
 329 }
 330
 331 /*
 332     Function converts double to pdffloat; very small and very large numbers
 333     are NOT converted to scientific notation. Here n must be a number or real
 334     conforming to the implementation limits of PDF as specified in appendix C.1
 335     of the PDF Ref. These are:
 336
 337     maximum value of ints is +2^32
 338     maximum value of reals is +2^15
 339     smalles values of reals is 1/(2^16)
 340 */
 341
 342 static pdffloat conv_double_to_pdffloat(double n)
 343 {
 344     pdffloat a;
 345     a.e = 6;
 346     a.m = i64round(n * ten_pow[a.e]);
 347     return a;
 348 }
 349
 350 static void copyObject(PDF, PdfDocument *, Object *);
 351
 352 void copyReal(PDF pdf, double d)
 353 {
 354     if (pdf->cave)
 355         pdf_out(pdf, ' ');
 356     print_pdffloat(pdf, conv_double_to_pdffloat(d));
 357     pdf->cave = true;
 358 }
 359
 360 static void copyString(PDF pdf, GooString * string)
 361 {
 362     char *p;
 363     unsigned char c;
 364     size_t i, l;
 365     p = string->getCString();
 366     l = (size_t) string->getLength();
 367     if (pdf->cave)
 368         pdf_out(pdf, ' ');
 369     if (strlen(p) == l) {
 370         pdf_out(pdf, '(');
 371         for (; *p != 0; p++) {
 372             c = (unsigned char) *p;
 373             if (c == '(' || c == ')' || c == '\\')
 374                 pdf_printf(pdf, "\\%c", c);
 375             else if (c < 0x20 || c > 0x7F)
 376                 pdf_printf(pdf, "\\%03o", (int) c);
 377             else
 378                 pdf_out(pdf, c);
 379         }
 380         pdf_out(pdf, ')');
 381     } else {
 382         pdf_out(pdf, '<');
 383         for (i = 0; i < l; i++) {
 384             c = (unsigned char) string->getChar(i);
 385             pdf_printf(pdf, "%.2x", (int) c);
 386         }
 387         pdf_out(pdf, '>');
 388     }
 389     pdf->cave = true;
 390 }
 391
 392 static void copyName(PDF pdf, char *s)
 393 {
 394     pdf_out(pdf, '/');
 395     for (; *s != 0; s++) {
 396         if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
 397             *s == '.' || *s == '-' || *s == '+')
 398             pdf_out(pdf, *s);
 399         else
 400             pdf_printf(pdf, "#%.2X", *s & 0xFF);
 401     }
 402     pdf->cave = true;
 403 }
 404
 405 static void copyArray(PDF pdf, PdfDocument * pdf_doc, Array * array)
 406 {
 407     int i, l;
 408     Object obj1;
 409     pdf_begin_array(pdf);
 410     for (i = 0, l = array->getLength(); i < l; ++i) {
 411         array->getNF(i, &obj1);
 412         copyObject(pdf, pdf_doc, &obj1);
 413         obj1.free();
 414     }
 415     pdf_end_array(pdf);
 416 }
 417
 418 static void copyDict(PDF pdf, PdfDocument * pdf_doc, Dict * dict)
 419 {
 420     int i, l;
 421     Object obj1;
 422     pdf_begin_dict(pdf);
 423     for (i = 0, l = dict->getLength(); i < l; ++i) {
 424         copyName(pdf, dict->getKey(i));
 425         dict->getValNF(i, &obj1);
 426         copyObject(pdf, pdf_doc, &obj1);
 427         obj1.free();
 428     }
 429     pdf_end_dict(pdf);
 430 }
 431
 432 static void copyStreamStream(PDF pdf, Stream * str)
 433 {
 434     int c, i, len = 1024;
 435     str->reset();
 436     i = len;
 437     while ((c = str->getChar()) != EOF) {
 438         if (i == len) {
 439             pdf_room(pdf, len);
 440             i = 0;
 441         }
 442         pdf_quick_out(pdf, c);
 443         i++;
 444     }
 445 }
 446
 447 static void copyStream(PDF pdf, PdfDocument * pdf_doc, Stream * stream)
 448 {
 449     copyDict(pdf, pdf_doc, stream->getDict());
 450     pdf_begin_stream(pdf);
 451     copyStreamStream(pdf, stream->getUndecodedStream());
 452     pdf_end_stream(pdf);
 453 }
 454
 455 static void copyObject(PDF pdf, PdfDocument * pdf_doc, Object * obj)
 456 {
 457     switch (obj->getType()) {
 458     case objBool:
 459         pdf_add_bool(pdf, (int) obj->getBool());
 460         break;
 461     case objInt:
 462         pdf_add_int(pdf, obj->getInt());
 463         break;
 464     case objReal:
 465         copyReal(pdf, obj->getReal());
 466         break;
 467     /*
 468     case objNum:
 469         GBool isNum() { return type == objInt || type == objReal; }
 470         break;
 471     */
 472     case objString:
 473         copyString(pdf, obj->getString());
 474         break;
 475     case objName:
 476         copyName(pdf, obj->getName());
 477         break;
 478     case objNull:
 479         pdf_add_null(pdf);
 480         break;
 481     case objArray:
 482         copyArray(pdf, pdf_doc, obj->getArray());
 483         break;
 484     case objDict:
 485         copyDict(pdf, pdf_doc, obj->getDict());
 486         break;
 487     case objStream:
 488         copyStream(pdf, pdf_doc, obj->getStream());
 489         break;
 490     case objRef:
 491         pdf_add_ref(pdf, addInObj(pdf, pdf_doc, obj->getRef()));
 492         break;
 493     case objCmd:
 494     case objError:
 495     case objEOF:
 496     case objNone:
 497         formatted_error("pdf inclusion","type '%s' cannot be copied", obj->getTypeName());
 498         break;
 499     default:
 500         /* poppler doesn't have any other types */
 501         assert(0);
 502     }
 503 }
 504
 505 static void writeRefs(PDF pdf, PdfDocument * pdf_doc)
 506 {
 507     InObj *r, *n;
 508     Object obj1;
 509     XRef *xref;
 510     PDFDoc *doc = pdf_doc->doc;
 511     xref = doc->getXRef();
 512     for (r = pdf_doc->inObjList; r != NULL;) {
 513         xref->fetch(r->ref.num, r->ref.gen, &obj1);
 514         if (obj1.isStream())
 515             pdf_begin_obj(pdf, r->num, OBJSTM_NEVER);
 516         else
 517             pdf_begin_obj(pdf, r->num, 2);
 518         copyObject(pdf, pdf_doc, &obj1);
 519         obj1.free();
 520         pdf_end_obj(pdf);
 521         n = r->next;
 522         delete r;
 523         pdf_doc->inObjList = r = n;
 524     }
 525 }
 526
 527 /* get the pagebox coordinates according to the pagebox_spec */
 528
 529 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
 530 {
 531     switch (pagebox_spec) {
 532         case PDF_BOX_SPEC_MEDIA:
 533             return page->getMediaBox();
 534             break;
 535         case PDF_BOX_SPEC_CROP:
 536             return page->getCropBox();
 537             break;
 538         case PDF_BOX_SPEC_BLEED:
 539             return page->getBleedBox();
 540             break;
 541         case PDF_BOX_SPEC_TRIM:
 542             return page->getTrimBox();
 543             break;
 544         case PDF_BOX_SPEC_ART:
 545             return page->getArtBox();
 546             break;
 547         default:
 548             return page->getMediaBox();
 549             break;
 550     }
 551 }
 552
 553 /*
 554     Reads various information about the PDF and sets it up for later inclusion.
 555     This will fail if the PDF version of the PDF is higher than minor_pdf_version_wanted
 556     or page_name is given and can not be found. It makes no sense to give page_name and
 557     page_num. Returns the page number.
 558 */
 559
 560 void flush_pdf_info(image_dict * idict)
 561 {
 562     if (img_keepopen(idict)) {
 563         unrefPdfDocument(img_filepath(idict));
 564     }
 565 }
 566
 567 /*
 568     void flush_pdfstream_info(image_dict * idict)
 569     {
 570         if (img_pdfstream_ptr(idict) != NULL) {
 571             xfree(img_pdfstream_stream(idict));
 572             xfree(img_pdfstream_ptr(idict));
 573             img_pdfstream_stream(idict) = NULL;
 574             img_pdfstream_ptr(idict) = NULL;
 575         }
 576     }
 577 */
 578
 579 void read_pdf_info(image_dict * idict)
 580 {
 581     PdfDocument *pdf_doc = NULL;
 582     PDFDoc *doc = NULL;
 583     Catalog *catalog;
 584     Page *page;
 585     int rotate;
 586     PDFRectangle *pagebox;
 587     int pdf_major_version_found, pdf_minor_version_found;
 588     float xsize, ysize, xorig, yorig;
 589     if (isInit == gFalse) {
 590         if (!(globalParams))
 591             globalParams = new GlobalParams();
 592         globalParams->setErrQuiet(gFalse);
 593         isInit = gTrue;
 594     }
 595     if (img_type(idict) == IMG_TYPE_PDF)
 596         pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
 597     else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
 598         pdf_doc = findPdfDocument(img_filepath(idict)) ;
 599         if (pdf_doc == NULL )
 600            normal_error("pdf inclusion", "memstream not initialized");
 601         if (pdf_doc->doc == NULL)
 602            normal_error("pdf inclusion", "memstream document is empty");
 603         pdf_doc->occurences++;
 604     } else {
 605         normal_error("pdf inclusion","unknown document");
 606     }
 607     doc = pdf_doc->doc;
 608     catalog = doc->getCatalog();
 609     /*
 610         Check PDF version. This works only for PDF 1.x but since any versions of
 611         PDF newer than 1.x will not be backwards compatible to PDF 1.x, we will
 612         then have to changed drastically anyway.
 613     */
 614     pdf_major_version_found = doc->getPDFMajorVersion();
 615     pdf_minor_version_found = doc->getPDFMinorVersion();
 616     if ((pdf_major_version_found > 1) || (pdf_minor_version_found > img_pdfminorversion(idict))) {
 617         const char *msg = "PDF inclusion: found PDF version '%d.%d', but at most version '1.%d' allowed";
 618         if (img_errorlevel(idict) > 0) {
 619             formatted_error("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
 620         } else {
 621             formatted_warning("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
 622         }
 623     }
 624     img_totalpages(idict) = catalog->getNumPages();
 625     if (img_pagename(idict)) {
 626         /* get page by name */
 627         GooString name(img_pagename(idict));
 628         LinkDest *link = doc->findDest(&name);
 629         if (link == NULL || !link->isOk())
 630             formatted_error("pdf inclusion","invalid destination '%s'",img_pagename(idict));
 631         Ref ref = link->getPageRef();
 632         img_pagenum(idict) = catalog->findPage(ref.num, ref.gen);
 633         if (img_pagenum(idict) == 0)
 634             formatted_error("pdf inclusion","destination is not a page '%s'",img_pagename(idict));
 635         delete link;
 636     } else {
 637         /* get page by number */
 638         if (img_pagenum(idict) <= 0
 639             || img_pagenum(idict) > img_totalpages(idict))
 640             formatted_error("pdf inclusion","required page '%i' does not exist",(int) img_pagenum(idict));
 641     }
 642     /* get the required page */
 643     page = catalog->getPage(img_pagenum(idict));
 644     /* get the pagebox coordinates (media, crop,...) to use. */
 645     pagebox = get_pagebox(page, img_pagebox(idict));
 646     if (pagebox->x2 > pagebox->x1) {
 647         xorig = pagebox->x1;
 648         xsize = pagebox->x2 - pagebox->x1;
 649     } else {
 650         xorig = pagebox->x2;
 651         xsize = pagebox->x1 - pagebox->x2;
 652     }
 653     if (pagebox->y2 > pagebox->y1) {
 654         yorig = pagebox->y1;
 655         ysize = pagebox->y2 - pagebox->y1;
 656     } else {
 657         yorig = pagebox->y2;
 658         ysize = pagebox->y1 - pagebox->y2;
 659     }
 660     /* The following 4 parameters are raw. Do _not_ modify by /Rotate! */
 661     img_xsize(idict) = bp2sp(xsize);
 662     img_ysize(idict) = bp2sp(ysize);
 663     img_xorig(idict) = bp2sp(xorig);
 664     img_yorig(idict) = bp2sp(yorig);
 665     /*
 666         Handle /Rotate parameter. Only multiples of 90 deg. are allowed (PDF Ref. v1.3,
 667         p. 78). We also accept negative angles. Beware: PDF counts clockwise! */
 668     rotate = page->getRotate();
 669     switch (((rotate % 360) + 360) % 360) {
 670         case 0:
 671             img_rotation(idict) = 0;
 672             break;
 673         case 90:
 674             img_rotation(idict) = 3;
 675             break;
 676         case 180:
 677             img_rotation(idict) = 2;
 678             break;
 679         case 270:
 680             img_rotation(idict) = 1;
 681             break;
 682         default:
 683             formatted_warning("pdf inclusion","/Rotate parameter in PDF file not multiple of 90 degrees");
 684     }
 685     /* currently unused info whether PDF contains a /Group */
 686     if (page->getGroup() != NULL)
 687         img_set_group(idict);
 688     /*
 689         LuaTeX pre 0.85 versions did this:
 690
 691         if (readtype == IMG_CLOSEINBETWEEN) {
 692             unrefPdfDocument(img_filepath(idict));
 693         }
 694
 695         and also unref'd in the finalizer zo we got an extra unrefs when garbage was
 696         collected. However it is more efficient to keep the file open so we do that
 697         now. The (slower) alternative is to unref here (which in most cases forcing a
 698         close of the file) but then we must not call flush_pdf_info.
 699
 700         A close (unref) can be forced by nilling the dict object at the lua end and
 701         forcing a collectgarbage("collect") after that.
 702
 703     */
 704     if (! img_keepopen(idict)) {
 705         unrefPdfDocument(img_filepath(idict));
 706     }
 707 }
 708
 709 /*
 710     Write the current epf_doc. Here the included PDF is copied, so most errors
 711     that can happen during PDF inclusion will arise here.
 712 */
 713
 714 void write_epdf(PDF pdf, image_dict * idict, int suppress_optional_info)
 715 {
 716     PdfDocument *pdf_doc = NULL;
 717     PDFDoc *doc = NULL;
 718     Catalog *catalog;
 719     Page *page;
 720     Ref *pageref;
 721     Dict *pageDict;
 722     Object obj1, contents, pageobj, pagesobj1, pagesobj2, *op1, *op2, *optmp;
 723     PDFRectangle *pagebox;
 724     int i, l;
 725     double bbox[4];
 726     /* char s[256]; */
 727     const char *pagedictkeys[] = {
 728         "Group", "LastModified", "Metadata", "PieceInfo", "Resources", "SeparationInfo", NULL
 729     };
 730     /* open PDF file */
 731     if (img_type(idict) == IMG_TYPE_PDF) {
 732         pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
 733     } else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
 734         pdf_doc = findPdfDocument(img_filepath(idict)) ;
 735         pdf_doc->occurences++;
 736     } else {
 737         normal_error("pdf inclusion","unknown document");
 738     }
 739     doc = pdf_doc->doc;
 740     catalog = doc->getCatalog();
 741     page = catalog->getPage(img_pagenum(idict));
 742     pageref = catalog->getPageRef(img_pagenum(idict));
 743     doc->getXRef()->fetch(pageref->num, pageref->gen, &pageobj);
 744     pageDict = pageobj.getDict();
 745     /* write the Page header */
 746     pdf_begin_obj(pdf, img_objnum(idict), OBJSTM_NEVER);
 747     pdf_begin_dict(pdf);
 748     pdf_dict_add_name(pdf, "Type", "XObject");
 749     pdf_dict_add_name(pdf, "Subtype", "Form");
 750     if (img_attr(idict) != NULL && strlen(img_attr(idict)) > 0) {
 751         pdf_printf(pdf, "\n%s\n", img_attr(idict));
 752     }
 753     pdf_dict_add_int(pdf, "FormType", 1);
 754     /* write additional information */
 755     pdf_dict_add_img_filename(pdf, idict);
 756     if ((suppress_optional_info & 4) == 0) {
 757         pdf_dict_add_int(pdf, "PTEX.PageNumber", (int) img_pagenum(idict));
 758     }
 759     if ((suppress_optional_info & 8) == 0) {
 760         doc->getDocInfoNF(&obj1);
 761         if (obj1.isRef()) {
 762             /* the info dict must be indirect (PDF Ref p. 61) */
 763             pdf_dict_add_ref(pdf, "PTEX.InfoDict", addInObj(pdf, pdf_doc, obj1.getRef()));
 764         }
 765         obj1.free();
 766     }
 767     if (img_is_bbox(idict)) {
 768         bbox[0] = sp2bp(img_bbox(idict)[0]);
 769         bbox[1] = sp2bp(img_bbox(idict)[1]);
 770         bbox[2] = sp2bp(img_bbox(idict)[2]);
 771         bbox[3] = sp2bp(img_bbox(idict)[3]);
 772     } else {
 773         /* get the pagebox coordinates (media, crop,...) to use. */
 774         pagebox = get_pagebox(page, img_pagebox(idict));
 775         bbox[0] = pagebox->x1;
 776         bbox[1] = pagebox->y1;
 777         bbox[2] = pagebox->x2;
 778         bbox[3] = pagebox->y2;
 779     }
 780     pdf_add_name(pdf, "BBox");
 781     pdf_begin_array(pdf);
 782     copyReal(pdf, bbox[0]);
 783     copyReal(pdf, bbox[1]);
 784     copyReal(pdf, bbox[2]);
 785     copyReal(pdf, bbox[3]);
 786     pdf_end_array(pdf);
 787     /*
 788         Now all relevant parts of the Page dictionary are copied. Metadata validity
 789         check is needed(as a stream it must be indirect).
 790     */
 791     pageDict->lookupNF("Metadata", &obj1);
 792     if (!obj1.isNull() && !obj1.isRef())
 793         formatted_warning("pdf inclusion","/Metadata must be indirect object");
 794     obj1.free();
 795     /* copy selected items in Page dictionary */
 796     for (i = 0; pagedictkeys[i] != NULL; i++) {
 797         pageDict->lookupNF(pagedictkeys[i], &obj1);
 798         if (!obj1.isNull()) {
 799             pdf_add_name(pdf, pagedictkeys[i]);
 800             /* preserves indirection */
 801             copyObject(pdf, pdf_doc, &obj1);
 802         }
 803         obj1.free();
 804     }
 805     /*
 806         If there are no Resources in the Page dict of the embedded page,
 807         try to inherit the Resources from the Pages tree of the embedded
 808         PDF file, climbing up the tree until the Resources are found.
 809         (This fixes a problem with Scribus 1.3.3.14.)
 810     */
 811     pageDict->lookupNF("Resources", &obj1);
 812     if (obj1.isNull()) {
 813         op1 = &pagesobj1;
 814         op2 = &pagesobj2;
 815         pageDict->lookup("Parent", op1);
 816         while (op1->isDict()) {
 817             obj1.free();
 818             op1->dictLookupNF("Resources", &obj1);
 819             if (!obj1.isNull()) {
 820                 pdf_add_name(pdf, "Resources");
 821                 copyObject(pdf, pdf_doc, &obj1);
 822                 break;
 823             }
 824             op1->dictLookup("Parent", op2);
 825             optmp = op1;
 826             op1 = op2;
 827             op2 = optmp;
 828             op2->free();
 829         };
 830         if (!op1->isDict())
 831             formatted_warning("pdf inclusion","Page /Resources missing");
 832         op1->free();
 833     }
 834     obj1.free();
 835     /* Write the Page contents. */
 836     page->getContents(&contents);
 837     if (contents.isStream()) {
 838         /*
 839             Variant A: get stream and recompress under control of \pdfcompresslevel
 840
 841             pdf_begin_stream();
 842             copyStreamStream(contents->getStream());
 843             pdf_end_stream();
 844
 845             Variant B: copy stream without recompressing
 846         */
 847         contents.streamGetDict()->lookup("F", &obj1);
 848         if (!obj1.isNull()) {
 849             normal_error("pdf inclusion","unsupported external stream");
 850         }
 851         obj1.free();
 852         contents.streamGetDict()->lookup("Length", &obj1);
 853         pdf_add_name(pdf, "Length");
 854         copyObject(pdf, pdf_doc, &obj1);
 855         obj1.free();
 856         contents.streamGetDict()->lookup("Filter", &obj1);
 857         if (!obj1.isNull()) {
 858             pdf_add_name(pdf, "Filter");
 859             copyObject(pdf, pdf_doc, &obj1);
 860             obj1.free();
 861             contents.streamGetDict()->lookup("DecodeParms", &obj1);
 862             if (!obj1.isNull()) {
 863                 pdf_add_name(pdf, "DecodeParms");
 864                 copyObject(pdf, pdf_doc, &obj1);
 865             }
 866         }
 867         obj1.free();
 868         pdf_end_dict(pdf);
 869         pdf_begin_stream(pdf);
 870         copyStreamStream(pdf, contents.getStream()->getUndecodedStream());
 871         pdf_end_stream(pdf);
 872         pdf_end_obj(pdf);
 873     } else if (contents.isArray()) {
 874         pdf_dict_add_streaminfo(pdf);
 875         pdf_end_dict(pdf);
 876         pdf_begin_stream(pdf);
 877         for (i = 0, l = contents.arrayGetLength(); i < l; ++i) {
 878             copyStreamStream(pdf, (contents.arrayGet(i, &obj1))->getStream());
 879             obj1.free();
 880             if (i < (l - 1)) {
 881                 /*
 882                     Put a space between streams to be on the safe side (streams
 883                     should have a trailing space here, but one never knows)
 884                 */
 885                 pdf_out(pdf, ' ');
 886             }
 887         }
 888         pdf_end_stream(pdf);
 889         pdf_end_obj(pdf);
 890     } else {
 891         /* the contents are optional, but we need to include an empty stream */
 892         pdf_dict_add_streaminfo(pdf);
 893         pdf_end_dict(pdf);
 894         pdf_begin_stream(pdf);
 895         pdf_end_stream(pdf);
 896         pdf_end_obj(pdf);
 897     }
 898     /* write out all indirect objects */
 899     writeRefs(pdf, pdf_doc);
 900     contents.free();
 901     pageobj.free();
 902     /*
 903         unrefPdfDocument() must come after contents.free() and pageobj.free()!
 904         TH: The next line makes repeated pdf inclusion unacceptably slow
 905
 906         unrefPdfDocument(img_filepath(idict));
 907     */
 908 }
 909
 910 /* Deallocate a PdfDocument with all its resources. */
 911
 912 static void deletePdfDocumentPdfDoc(PdfDocument * pdf_doc)
 913 {
 914     InObj *r, *n;
 915     /* this may be probably needed for an emergency destroyPdfDocument() */
 916     for (r = pdf_doc->inObjList; r != NULL; r = n) {
 917         n = r->next;
 918         delete r;
 919     }
 920     delete pdf_doc->doc;
 921     pdf_doc->doc = NULL;
 922     pdf_doc->pc++;
 923 }
 924
 925 static void destroyPdfDocument(void *pa, void * /*pb */ )
 926 {
 927     PdfDocument *pdf_doc = (PdfDocument *) pa;
 928     deletePdfDocumentPdfDoc(pdf_doc);
 929     /* TODO: delete rest of pdf_doc */
 930 }
 931
 932 /*
 933     Called when an image has been written and its resources in image_tab are
 934     freed and it's not referenced anymore.
 935 */
 936
 937 void unrefPdfDocument(char *file_path)
 938 {
 939     PdfDocument *pdf_doc = findPdfDocument(file_path);
 940     if (pdf_doc->occurences > 0) {
 941         pdf_doc->occurences--;
 942         if (pdf_doc->occurences == 0) {
 943             deletePdfDocumentPdfDoc(pdf_doc);
 944         }
 945     } else {
 946         /*
 947             We either have a mismatch in ref and unref or we're somehow out of sync
 948             which can happen when we mess with the same file in lua and tex.
 949         */
 950         formatted_warning("pdf inclusion","there can be a mismatch in opening and closing file '%s'",file_path);
 951     }
 952 }
 953
 954 /*
 955     For completeness, but it isn't currently used (unreferencing is done by mean
 956     of file_path.
 957 */
 958
 959 void unrefMemStreamPdfDocument(char *file_id)
 960 {
 961   (void) unrefPdfDocument(file_id);
 962
 963 }
 964
 965 /*
 966     Called when PDF embedding system is finalized.  We now deallocate all remaining
 967     PdfDocuments.
 968 */
 969
 970 void epdf_free()
 971 {
 972     if (PdfDocumentTree != NULL)
 973         avl_destroy(PdfDocumentTree, destroyPdfDocument);
 974     PdfDocumentTree = NULL;
 975     if (isInit == gTrue)
 976         delete globalParams;
 977     isInit = gFalse;
 978 }