fix getsup (HH)
[luatex.git] / source / texk / web2c / luatexdir / image / pdftoepdf.w
blob0f3e75d002c98517daba27a1813ee4a18c75efef
1 % pdftoepdf.w
3 % Copyright 1996-2006 Han The Thanh <thanh@@pdftex.org>
4 % Copyright 2006-2015 Taco Hoekwater <taco@@luatex.org>
6 % This file is part of LuaTeX.
8 % LuaTeX is free software; you can redistribute it and/or modify it under
9 % the terms of the GNU General Public License as published by the Free
10 % Software Foundation; either version 2 of the License, or (at your
11 % option) any later version.
13 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
14 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 % License for more details.
18 % You should have received a copy of the GNU General Public License along
19 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
21 @ @c
23 #define __STDC_FORMAT_MACROS /* for PRId64 etc. */
25 #include "image/epdf.h"
28 This file is mostly C and not very much C++; it's just used to interface
29 the functions of poppler, which happens to be written in C++.
32 extern void md5(Guchar *msg, int msgLen, Guchar *digest);
34 static GBool isInit = gFalse;
36 /* Maintain AVL tree of all PDF files for embedding */
38 static avl_table *PdfDocumentTree = NULL;
40 /* AVL sort PdfDocument into PdfDocumentTree by file_path */
42 static int CompPdfDocument(const void *pa, const void *pb, void * /*p */ )
44 return strcmp(((const PdfDocument *) pa)->file_path, ((const PdfDocument *) pb)->file_path);
47 /* Returns pointer to PdfDocument structure for PDF file. */
49 static PdfDocument *findPdfDocument(char *file_path)
51 PdfDocument *pdf_doc, tmp;
52 if (file_path == NULL) {
53 normal_error("pdf backend","empty filename when loading pdf file");
54 } else if (PdfDocumentTree == NULL) {
55 return NULL;
57 tmp.file_path = file_path;
58 pdf_doc = (PdfDocument *) avl_find(PdfDocumentTree, &tmp);
59 return pdf_doc;
62 #define PDF_CHECKSUM_SIZE 32
64 static char *get_file_checksum(const char *a, file_error_mode fe)
66 struct stat finfo;
67 char *ck = NULL;
68 if (stat(a, &finfo) == 0) {
69 off_t size = finfo.st_size;
70 time_t mtime = finfo.st_mtime;
71 ck = (char *) malloc(PDF_CHECKSUM_SIZE);
72 if (ck == NULL)
73 formatted_error("pdf inclusion","out of memory while processing '%s'", a);
74 snprintf(ck, PDF_CHECKSUM_SIZE, "%" PRIu64 "_%" PRIu64, (uint64_t) size,(uint64_t) mtime);
75 } else {
76 switch (fe) {
77 case FE_FAIL:
78 formatted_error("pdf inclusion","could not stat() file '%s'", a);
79 break;
80 case FE_RETURN_NULL:
81 if (ck != NULL)
82 free(ck);
83 ck = NULL;
84 break;
85 default:
86 assert(0);
89 return ck;
93 static char *get_stream_checksum (const char *str, unsigned long long str_size){
94 /* http://www.cse.yorku.ca/~oz/hash.html */
95 /* djb2 */
96 unsigned long hash ;
97 char *ck = NULL;
98 unsigned int i;
99 hash = 5381;
100 ck = (char *) malloc(STRSTREAM_CHECKSUM_SIZE+1);
101 if (ck == NULL)
102 normal_error("pdf inclusion","out of memory while processing a memstream");
103 for(i=0; i<(unsigned int)(str_size); i++) {
104 hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
106 snprintf(ck,STRSTREAM_CHECKSUM_SIZE+1,"%lx",hash);
107 ck[STRSTREAM_CHECKSUM_SIZE]='\0';
108 return ck;
112 Returns pointer to PdfDocument structure for PDF file.
113 Creates a new PdfDocument structure if it doesn't exist yet.
114 When fe = FE_RETURN_NULL, the function returns NULL in error case.
117 PdfDocument *refPdfDocument(const char *file_path, file_error_mode fe)
119 char *checksum, *path_copy;
120 PdfDocument *pdf_doc;
121 PDFDoc *doc = NULL;
122 GooString *docName = NULL;
123 int new_flag = 0;
124 if ((checksum = get_file_checksum(file_path, fe)) == NULL) {
125 return (PdfDocument *) NULL;
127 path_copy = xstrdup(file_path);
128 if ((pdf_doc = findPdfDocument(path_copy)) == NULL) {
129 new_flag = 1;
130 pdf_doc = new PdfDocument;
131 pdf_doc->file_path = path_copy;
132 pdf_doc->checksum = checksum;
133 pdf_doc->doc = NULL;
134 pdf_doc->inObjList = NULL;
135 pdf_doc->ObjMapTree = NULL;
136 pdf_doc->occurences = 0; /* 0 = unreferenced */
137 pdf_doc->pc = 0;
138 } else {
139 if (strncmp(pdf_doc->checksum, checksum, PDF_CHECKSUM_SIZE) != 0) {
140 formatted_error("pdf inclusion","file has changed '%s'", file_path);
142 free(checksum);
143 free(path_copy);
145 if (pdf_doc->doc == NULL) {
146 docName = new GooString(file_path);
147 doc = new PDFDoc(docName); /* takes ownership of docName */
148 pdf_doc->pc++;
150 if (!doc->isOk() || !doc->okToPrint()) {
151 switch (fe) {
152 case FE_FAIL:
153 normal_error("pdf inclusion","reading image failed");
154 break;
155 case FE_RETURN_NULL:
156 delete doc;
157 /* delete docName */
158 if (new_flag == 1) {
159 if (pdf_doc->file_path != NULL)
160 free(pdf_doc->file_path);
161 if (pdf_doc->checksum != NULL)
162 free(pdf_doc->checksum);
163 delete pdf_doc;
165 return (PdfDocument *) NULL;
166 break;
167 default:
168 assert(0);
171 pdf_doc->doc = doc;
173 /* PDF file could be opened without problems, checksum ok. */
174 if (PdfDocumentTree == NULL)
175 PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
176 if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
177 avl_probe(PdfDocumentTree, pdf_doc);
179 pdf_doc->occurences++;
180 return pdf_doc;
184 Returns pointer to PdfDocument structure for a PDF stream in memory of streamsize
185 dimension. As before, creates a new PdfDocument structure if it doesn't exist yet
186 with file_path = file_id
189 PdfDocument *refMemStreamPdfDocument(char *docstream, unsigned long long streamsize,const char *file_id)
191 char *checksum;
192 char *file_path;
193 PdfDocument *pdf_doc;
194 PDFDoc *doc = NULL;
195 Object obj;
196 MemStream *docmemstream = NULL;
197 /*int new_flag = 0;*/
198 size_t cnt = 0;
199 checksum = get_stream_checksum(docstream, streamsize);
200 cnt = strlen(file_id);
201 assert(cnt>0 && cnt <STREAM_FILE_ID_LEN);
202 file_path = (char *) malloc(cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE+1); /* 1 for \0 */
203 assert(file_path != NULL);
204 strcpy(file_path,STREAM_URI);
205 strcat(file_path,file_id);
206 strcat(file_path,checksum);
207 file_path[cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE]='\0';
208 if ((pdf_doc = findPdfDocument(file_path)) == NULL) {
209 /*new_flag = 1;*/
210 pdf_doc = new PdfDocument;
211 pdf_doc->file_path = file_path;
212 pdf_doc->checksum = checksum;
213 pdf_doc->doc = NULL;
214 pdf_doc->inObjList = NULL;
215 pdf_doc->ObjMapTree = NULL;
216 pdf_doc->occurences = 0; /* 0 = unreferenced */
217 pdf_doc->pc = 0;
218 } else {
219 /* As is now, checksum is in file_path, so this check should be useless. */
220 if (strncmp(pdf_doc->checksum, checksum, STRSTREAM_CHECKSUM_SIZE) != 0) {
221 formatted_error("pdf inclusion","stream has changed '%s'", file_path);
223 free(file_path);
224 free(checksum);
226 if (pdf_doc->doc == NULL) {
227 docmemstream = new MemStream( docstream,0,streamsize, obj.initNull() );
228 doc = new PDFDoc(docmemstream); /* takes ownership of docmemstream */
229 pdf_doc->pc++;
230 if (!doc->isOk() || !doc->okToPrint()) {
231 normal_error("pdf inclusion","reading pdf Stream failed");
233 pdf_doc->doc = doc;
235 /* PDF file could be opened without problems, checksum ok. */
236 if (PdfDocumentTree == NULL)
237 PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
238 if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
239 avl_probe(PdfDocumentTree, pdf_doc);
241 pdf_doc->occurences++;
242 return pdf_doc;
246 AVL sort ObjMap into ObjMapTree by object number and generation keep the ObjMap
247 struct small, as these are accumulated until the end
250 struct ObjMap {
251 Ref in;
252 int out_num;
255 static int CompObjMap(const void *pa, const void *pb, void * /*p */ )
257 const Ref *a = &(((const ObjMap *) pa)->in);
258 const Ref *b = &(((const ObjMap *) pb)->in);
259 if (a->num > b->num)
260 return 1;
261 else if (a->num < b->num)
262 return -1;
263 else if (a->gen == b->gen)
264 return 0;
265 else if (a->gen < b->gen)
266 return -1;
267 return 1;
270 static ObjMap *findObjMap(PdfDocument * pdf_doc, Ref in)
272 ObjMap *obj_map, tmp;
273 if (pdf_doc->ObjMapTree == NULL)
274 return NULL;
275 tmp.in = in;
276 obj_map = (ObjMap *) avl_find(pdf_doc->ObjMapTree, &tmp);
277 return obj_map;
280 static void addObjMap(PdfDocument * pdf_doc, Ref in, int out_num)
282 ObjMap *obj_map = NULL;
283 if (pdf_doc->ObjMapTree == NULL)
284 pdf_doc->ObjMapTree = avl_create(CompObjMap, NULL, &avl_xallocator);
285 obj_map = new ObjMap;
286 obj_map->in = in;
287 obj_map->out_num = out_num;
288 avl_probe(pdf_doc->ObjMapTree, obj_map);
292 When copying the Resources of the selected page, all objects are
293 copied recursively top-down. The findObjMap() function checks if an
294 object has already been copied; if so, instead of copying just the
295 new object number will be referenced. The ObjMapTree guarantees,
296 that during the entire LuaTeX run any object from any embedded PDF
297 file will end up max. once in the output PDF file. Indirect objects
298 are not fetched during copying, but get a new object number from
299 LuaTeX and then will be appended into a linked list.
302 static int addInObj(PDF pdf, PdfDocument * pdf_doc, Ref ref)
304 ObjMap *obj_map;
305 InObj *p, *q, *n;
306 if (ref.num == 0) {
307 normal_error("pdf inclusion","reference to invalid object (broken pdf)");
309 if ((obj_map = findObjMap(pdf_doc, ref)) != NULL)
310 return obj_map->out_num;
311 n = new InObj;
312 n->ref = ref;
313 n->next = NULL;
314 n->num = pdf_create_obj(pdf, obj_type_others, 0);
315 addObjMap(pdf_doc, ref, n->num);
316 if (pdf_doc->inObjList == NULL) {
317 pdf_doc->inObjList = n;
318 } else {
320 It is important to add new objects at the end of the list,
321 because new objects are being added while the list is being
322 written out by writeRefs().
324 for (p = pdf_doc->inObjList; p != NULL; p = p->next)
325 q = p;
326 q->next = n;
328 return n->num;
332 Function converts double to pdffloat; very small and very large numbers
333 are NOT converted to scientific notation. Here n must be a number or real
334 conforming to the implementation limits of PDF as specified in appendix C.1
335 of the PDF Ref. These are:
337 maximum value of ints is +2^32
338 maximum value of reals is +2^15
339 smalles values of reals is 1/(2^16)
342 static pdffloat conv_double_to_pdffloat(double n)
344 pdffloat a;
345 a.e = 6;
346 a.m = i64round(n * ten_pow[a.e]);
347 return a;
350 static void copyObject(PDF, PdfDocument *, Object *);
352 void copyReal(PDF pdf, double d)
354 if (pdf->cave)
355 pdf_out(pdf, ' ');
356 print_pdffloat(pdf, conv_double_to_pdffloat(d));
357 pdf->cave = true;
360 static void copyString(PDF pdf, GooString * string)
362 char *p;
363 unsigned char c;
364 size_t i, l;
365 p = string->getCString();
366 l = (size_t) string->getLength();
367 if (pdf->cave)
368 pdf_out(pdf, ' ');
369 if (strlen(p) == l) {
370 pdf_out(pdf, '(');
371 for (; *p != 0; p++) {
372 c = (unsigned char) *p;
373 if (c == '(' || c == ')' || c == '\\')
374 pdf_printf(pdf, "\\%c", c);
375 else if (c < 0x20 || c > 0x7F)
376 pdf_printf(pdf, "\\%03o", (int) c);
377 else
378 pdf_out(pdf, c);
380 pdf_out(pdf, ')');
381 } else {
382 pdf_out(pdf, '<');
383 for (i = 0; i < l; i++) {
384 c = (unsigned char) string->getChar(i);
385 pdf_printf(pdf, "%.2x", (int) c);
387 pdf_out(pdf, '>');
389 pdf->cave = true;
392 static void copyName(PDF pdf, char *s)
394 pdf_out(pdf, '/');
395 for (; *s != 0; s++) {
396 if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
397 *s == '.' || *s == '-' || *s == '+')
398 pdf_out(pdf, *s);
399 else
400 pdf_printf(pdf, "#%.2X", *s & 0xFF);
402 pdf->cave = true;
405 static void copyArray(PDF pdf, PdfDocument * pdf_doc, Array * array)
407 int i, l;
408 Object obj1;
409 pdf_begin_array(pdf);
410 for (i = 0, l = array->getLength(); i < l; ++i) {
411 array->getNF(i, &obj1);
412 copyObject(pdf, pdf_doc, &obj1);
413 obj1.free();
415 pdf_end_array(pdf);
418 static void copyDict(PDF pdf, PdfDocument * pdf_doc, Dict * dict)
420 int i, l;
421 Object obj1;
422 pdf_begin_dict(pdf);
423 for (i = 0, l = dict->getLength(); i < l; ++i) {
424 copyName(pdf, dict->getKey(i));
425 dict->getValNF(i, &obj1);
426 copyObject(pdf, pdf_doc, &obj1);
427 obj1.free();
429 pdf_end_dict(pdf);
432 static void copyStreamStream(PDF pdf, Stream * str)
434 int c, i, len = 1024;
435 str->reset();
436 i = len;
437 while ((c = str->getChar()) != EOF) {
438 if (i == len) {
439 pdf_room(pdf, len);
440 i = 0;
442 pdf_quick_out(pdf, c);
443 i++;
447 static void copyStream(PDF pdf, PdfDocument * pdf_doc, Stream * stream)
449 copyDict(pdf, pdf_doc, stream->getDict());
450 pdf_begin_stream(pdf);
451 copyStreamStream(pdf, stream->getUndecodedStream());
452 pdf_end_stream(pdf);
455 static void copyObject(PDF pdf, PdfDocument * pdf_doc, Object * obj)
457 switch (obj->getType()) {
458 case objBool:
459 pdf_add_bool(pdf, (int) obj->getBool());
460 break;
461 case objInt:
462 pdf_add_int(pdf, obj->getInt());
463 break;
464 case objReal:
465 copyReal(pdf, obj->getReal());
466 break;
468 case objNum:
469 GBool isNum() { return type == objInt || type == objReal; }
470 break;
472 case objString:
473 copyString(pdf, obj->getString());
474 break;
475 case objName:
476 copyName(pdf, obj->getName());
477 break;
478 case objNull:
479 pdf_add_null(pdf);
480 break;
481 case objArray:
482 copyArray(pdf, pdf_doc, obj->getArray());
483 break;
484 case objDict:
485 copyDict(pdf, pdf_doc, obj->getDict());
486 break;
487 case objStream:
488 copyStream(pdf, pdf_doc, obj->getStream());
489 break;
490 case objRef:
491 pdf_add_ref(pdf, addInObj(pdf, pdf_doc, obj->getRef()));
492 break;
493 case objCmd:
494 case objError:
495 case objEOF:
496 case objNone:
497 formatted_error("pdf inclusion","type '%s' cannot be copied", obj->getTypeName());
498 break;
499 default:
500 /* poppler doesn't have any other types */
501 assert(0);
505 static void writeRefs(PDF pdf, PdfDocument * pdf_doc)
507 InObj *r, *n;
508 Object obj1;
509 XRef *xref;
510 PDFDoc *doc = pdf_doc->doc;
511 xref = doc->getXRef();
512 for (r = pdf_doc->inObjList; r != NULL;) {
513 xref->fetch(r->ref.num, r->ref.gen, &obj1);
514 if (obj1.isStream())
515 pdf_begin_obj(pdf, r->num, OBJSTM_NEVER);
516 else
517 pdf_begin_obj(pdf, r->num, 2);
518 copyObject(pdf, pdf_doc, &obj1);
519 obj1.free();
520 pdf_end_obj(pdf);
521 n = r->next;
522 delete r;
523 pdf_doc->inObjList = r = n;
527 /* get the pagebox coordinates according to the pagebox_spec */
529 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
531 switch (pagebox_spec) {
532 case PDF_BOX_SPEC_MEDIA:
533 return page->getMediaBox();
534 break;
535 case PDF_BOX_SPEC_CROP:
536 return page->getCropBox();
537 break;
538 case PDF_BOX_SPEC_BLEED:
539 return page->getBleedBox();
540 break;
541 case PDF_BOX_SPEC_TRIM:
542 return page->getTrimBox();
543 break;
544 case PDF_BOX_SPEC_ART:
545 return page->getArtBox();
546 break;
547 default:
548 return page->getMediaBox();
549 break;
554 Reads various information about the PDF and sets it up for later inclusion.
555 This will fail if the PDF version of the PDF is higher than minor_pdf_version_wanted
556 or page_name is given and can not be found. It makes no sense to give page_name and
557 page_num. Returns the page number.
560 void flush_pdf_info(image_dict * idict)
562 if (img_keepopen(idict)) {
563 unrefPdfDocument(img_filepath(idict));
568 void flush_pdfstream_info(image_dict * idict)
570 if (img_pdfstream_ptr(idict) != NULL) {
571 xfree(img_pdfstream_stream(idict));
572 xfree(img_pdfstream_ptr(idict));
573 img_pdfstream_stream(idict) = NULL;
574 img_pdfstream_ptr(idict) = NULL;
579 void read_pdf_info(image_dict * idict)
581 PdfDocument *pdf_doc = NULL;
582 PDFDoc *doc = NULL;
583 Catalog *catalog;
584 Page *page;
585 int rotate;
586 PDFRectangle *pagebox;
587 int pdf_major_version_found, pdf_minor_version_found;
588 float xsize, ysize, xorig, yorig;
589 if (isInit == gFalse) {
590 if (!(globalParams))
591 globalParams = new GlobalParams();
592 globalParams->setErrQuiet(gFalse);
593 isInit = gTrue;
595 if (img_type(idict) == IMG_TYPE_PDF)
596 pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
597 else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
598 pdf_doc = findPdfDocument(img_filepath(idict)) ;
599 if (pdf_doc == NULL )
600 normal_error("pdf inclusion", "memstream not initialized");
601 if (pdf_doc->doc == NULL)
602 normal_error("pdf inclusion", "memstream document is empty");
603 pdf_doc->occurences++;
604 } else {
605 normal_error("pdf inclusion","unknown document");
607 doc = pdf_doc->doc;
608 catalog = doc->getCatalog();
610 Check PDF version. This works only for PDF 1.x but since any versions of
611 PDF newer than 1.x will not be backwards compatible to PDF 1.x, we will
612 then have to changed drastically anyway.
614 pdf_major_version_found = doc->getPDFMajorVersion();
615 pdf_minor_version_found = doc->getPDFMinorVersion();
616 if ((pdf_major_version_found > 1) || (pdf_minor_version_found > img_pdfminorversion(idict))) {
617 const char *msg = "PDF inclusion: found PDF version '%d.%d', but at most version '1.%d' allowed";
618 if (img_errorlevel(idict) > 0) {
619 formatted_error("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
620 } else {
621 formatted_warning("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
624 img_totalpages(idict) = catalog->getNumPages();
625 if (img_pagename(idict)) {
626 /* get page by name */
627 GooString name(img_pagename(idict));
628 LinkDest *link = doc->findDest(&name);
629 if (link == NULL || !link->isOk())
630 formatted_error("pdf inclusion","invalid destination '%s'",img_pagename(idict));
631 Ref ref = link->getPageRef();
632 img_pagenum(idict) = catalog->findPage(ref.num, ref.gen);
633 if (img_pagenum(idict) == 0)
634 formatted_error("pdf inclusion","destination is not a page '%s'",img_pagename(idict));
635 delete link;
636 } else {
637 /* get page by number */
638 if (img_pagenum(idict) <= 0
639 || img_pagenum(idict) > img_totalpages(idict))
640 formatted_error("pdf inclusion","required page '%i' does not exist",(int) img_pagenum(idict));
642 /* get the required page */
643 page = catalog->getPage(img_pagenum(idict));
644 /* get the pagebox coordinates (media, crop,...) to use. */
645 pagebox = get_pagebox(page, img_pagebox(idict));
646 if (pagebox->x2 > pagebox->x1) {
647 xorig = pagebox->x1;
648 xsize = pagebox->x2 - pagebox->x1;
649 } else {
650 xorig = pagebox->x2;
651 xsize = pagebox->x1 - pagebox->x2;
653 if (pagebox->y2 > pagebox->y1) {
654 yorig = pagebox->y1;
655 ysize = pagebox->y2 - pagebox->y1;
656 } else {
657 yorig = pagebox->y2;
658 ysize = pagebox->y1 - pagebox->y2;
660 /* The following 4 parameters are raw. Do _not_ modify by /Rotate! */
661 img_xsize(idict) = bp2sp(xsize);
662 img_ysize(idict) = bp2sp(ysize);
663 img_xorig(idict) = bp2sp(xorig);
664 img_yorig(idict) = bp2sp(yorig);
666 Handle /Rotate parameter. Only multiples of 90 deg. are allowed (PDF Ref. v1.3,
667 p. 78). We also accept negative angles. Beware: PDF counts clockwise! */
668 rotate = page->getRotate();
669 switch (((rotate % 360) + 360) % 360) {
670 case 0:
671 img_rotation(idict) = 0;
672 break;
673 case 90:
674 img_rotation(idict) = 3;
675 break;
676 case 180:
677 img_rotation(idict) = 2;
678 break;
679 case 270:
680 img_rotation(idict) = 1;
681 break;
682 default:
683 formatted_warning("pdf inclusion","/Rotate parameter in PDF file not multiple of 90 degrees");
685 /* currently unused info whether PDF contains a /Group */
686 if (page->getGroup() != NULL)
687 img_set_group(idict);
689 LuaTeX pre 0.85 versions did this:
691 if (readtype == IMG_CLOSEINBETWEEN) {
692 unrefPdfDocument(img_filepath(idict));
695 and also unref'd in the finalizer zo we got an extra unrefs when garbage was
696 collected. However it is more efficient to keep the file open so we do that
697 now. The (slower) alternative is to unref here (which in most cases forcing a
698 close of the file) but then we must not call flush_pdf_info.
700 A close (unref) can be forced by nilling the dict object at the lua end and
701 forcing a collectgarbage("collect") after that.
704 if (! img_keepopen(idict)) {
705 unrefPdfDocument(img_filepath(idict));
710 Write the current epf_doc. Here the included PDF is copied, so most errors
711 that can happen during PDF inclusion will arise here.
714 void write_epdf(PDF pdf, image_dict * idict, int suppress_optional_info)
716 PdfDocument *pdf_doc = NULL;
717 PDFDoc *doc = NULL;
718 Catalog *catalog;
719 Page *page;
720 Ref *pageref;
721 Dict *pageDict;
722 Object obj1, contents, pageobj, pagesobj1, pagesobj2, *op1, *op2, *optmp;
723 PDFRectangle *pagebox;
724 int i, l;
725 double bbox[4];
726 /* char s[256]; */
727 const char *pagedictkeys[] = {
728 "Group", "LastModified", "Metadata", "PieceInfo", "Resources", "SeparationInfo", NULL
730 /* open PDF file */
731 if (img_type(idict) == IMG_TYPE_PDF) {
732 pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
733 } else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
734 pdf_doc = findPdfDocument(img_filepath(idict)) ;
735 pdf_doc->occurences++;
736 } else {
737 normal_error("pdf inclusion","unknown document");
739 doc = pdf_doc->doc;
740 catalog = doc->getCatalog();
741 page = catalog->getPage(img_pagenum(idict));
742 pageref = catalog->getPageRef(img_pagenum(idict));
743 doc->getXRef()->fetch(pageref->num, pageref->gen, &pageobj);
744 pageDict = pageobj.getDict();
745 /* write the Page header */
746 pdf_begin_obj(pdf, img_objnum(idict), OBJSTM_NEVER);
747 pdf_begin_dict(pdf);
748 pdf_dict_add_name(pdf, "Type", "XObject");
749 pdf_dict_add_name(pdf, "Subtype", "Form");
750 if (img_attr(idict) != NULL && strlen(img_attr(idict)) > 0) {
751 pdf_printf(pdf, "\n%s\n", img_attr(idict));
753 pdf_dict_add_int(pdf, "FormType", 1);
754 /* write additional information */
755 pdf_dict_add_img_filename(pdf, idict);
756 if ((suppress_optional_info & 4) == 0) {
757 pdf_dict_add_int(pdf, "PTEX.PageNumber", (int) img_pagenum(idict));
759 if ((suppress_optional_info & 8) == 0) {
760 doc->getDocInfoNF(&obj1);
761 if (obj1.isRef()) {
762 /* the info dict must be indirect (PDF Ref p. 61) */
763 pdf_dict_add_ref(pdf, "PTEX.InfoDict", addInObj(pdf, pdf_doc, obj1.getRef()));
765 obj1.free();
767 if (img_is_bbox(idict)) {
768 bbox[0] = sp2bp(img_bbox(idict)[0]);
769 bbox[1] = sp2bp(img_bbox(idict)[1]);
770 bbox[2] = sp2bp(img_bbox(idict)[2]);
771 bbox[3] = sp2bp(img_bbox(idict)[3]);
772 } else {
773 /* get the pagebox coordinates (media, crop,...) to use. */
774 pagebox = get_pagebox(page, img_pagebox(idict));
775 bbox[0] = pagebox->x1;
776 bbox[1] = pagebox->y1;
777 bbox[2] = pagebox->x2;
778 bbox[3] = pagebox->y2;
780 pdf_add_name(pdf, "BBox");
781 pdf_begin_array(pdf);
782 copyReal(pdf, bbox[0]);
783 copyReal(pdf, bbox[1]);
784 copyReal(pdf, bbox[2]);
785 copyReal(pdf, bbox[3]);
786 pdf_end_array(pdf);
788 Now all relevant parts of the Page dictionary are copied. Metadata validity
789 check is needed(as a stream it must be indirect).
791 pageDict->lookupNF("Metadata", &obj1);
792 if (!obj1.isNull() && !obj1.isRef())
793 formatted_warning("pdf inclusion","/Metadata must be indirect object");
794 obj1.free();
795 /* copy selected items in Page dictionary */
796 for (i = 0; pagedictkeys[i] != NULL; i++) {
797 pageDict->lookupNF(pagedictkeys[i], &obj1);
798 if (!obj1.isNull()) {
799 pdf_add_name(pdf, pagedictkeys[i]);
800 /* preserves indirection */
801 copyObject(pdf, pdf_doc, &obj1);
803 obj1.free();
806 If there are no Resources in the Page dict of the embedded page,
807 try to inherit the Resources from the Pages tree of the embedded
808 PDF file, climbing up the tree until the Resources are found.
809 (This fixes a problem with Scribus 1.3.3.14.)
811 pageDict->lookupNF("Resources", &obj1);
812 if (obj1.isNull()) {
813 op1 = &pagesobj1;
814 op2 = &pagesobj2;
815 pageDict->lookup("Parent", op1);
816 while (op1->isDict()) {
817 obj1.free();
818 op1->dictLookupNF("Resources", &obj1);
819 if (!obj1.isNull()) {
820 pdf_add_name(pdf, "Resources");
821 copyObject(pdf, pdf_doc, &obj1);
822 break;
824 op1->dictLookup("Parent", op2);
825 optmp = op1;
826 op1 = op2;
827 op2 = optmp;
828 op2->free();
830 if (!op1->isDict())
831 formatted_warning("pdf inclusion","Page /Resources missing");
832 op1->free();
834 obj1.free();
835 /* Write the Page contents. */
836 page->getContents(&contents);
837 if (contents.isStream()) {
839 Variant A: get stream and recompress under control of \pdfcompresslevel
841 pdf_begin_stream();
842 copyStreamStream(contents->getStream());
843 pdf_end_stream();
845 Variant B: copy stream without recompressing
847 contents.streamGetDict()->lookup("F", &obj1);
848 if (!obj1.isNull()) {
849 normal_error("pdf inclusion","unsupported external stream");
851 obj1.free();
852 contents.streamGetDict()->lookup("Length", &obj1);
853 pdf_add_name(pdf, "Length");
854 copyObject(pdf, pdf_doc, &obj1);
855 obj1.free();
856 contents.streamGetDict()->lookup("Filter", &obj1);
857 if (!obj1.isNull()) {
858 pdf_add_name(pdf, "Filter");
859 copyObject(pdf, pdf_doc, &obj1);
860 obj1.free();
861 contents.streamGetDict()->lookup("DecodeParms", &obj1);
862 if (!obj1.isNull()) {
863 pdf_add_name(pdf, "DecodeParms");
864 copyObject(pdf, pdf_doc, &obj1);
867 obj1.free();
868 pdf_end_dict(pdf);
869 pdf_begin_stream(pdf);
870 copyStreamStream(pdf, contents.getStream()->getUndecodedStream());
871 pdf_end_stream(pdf);
872 pdf_end_obj(pdf);
873 } else if (contents.isArray()) {
874 pdf_dict_add_streaminfo(pdf);
875 pdf_end_dict(pdf);
876 pdf_begin_stream(pdf);
877 for (i = 0, l = contents.arrayGetLength(); i < l; ++i) {
878 copyStreamStream(pdf, (contents.arrayGet(i, &obj1))->getStream());
879 obj1.free();
880 if (i < (l - 1)) {
882 Put a space between streams to be on the safe side (streams
883 should have a trailing space here, but one never knows)
885 pdf_out(pdf, ' ');
888 pdf_end_stream(pdf);
889 pdf_end_obj(pdf);
890 } else {
891 /* the contents are optional, but we need to include an empty stream */
892 pdf_dict_add_streaminfo(pdf);
893 pdf_end_dict(pdf);
894 pdf_begin_stream(pdf);
895 pdf_end_stream(pdf);
896 pdf_end_obj(pdf);
898 /* write out all indirect objects */
899 writeRefs(pdf, pdf_doc);
900 contents.free();
901 pageobj.free();
903 unrefPdfDocument() must come after contents.free() and pageobj.free()!
904 TH: The next line makes repeated pdf inclusion unacceptably slow
906 unrefPdfDocument(img_filepath(idict));
910 /* Deallocate a PdfDocument with all its resources. */
912 static void deletePdfDocumentPdfDoc(PdfDocument * pdf_doc)
914 InObj *r, *n;
915 /* this may be probably needed for an emergency destroyPdfDocument() */
916 for (r = pdf_doc->inObjList; r != NULL; r = n) {
917 n = r->next;
918 delete r;
920 delete pdf_doc->doc;
921 pdf_doc->doc = NULL;
922 pdf_doc->pc++;
925 static void destroyPdfDocument(void *pa, void * /*pb */ )
927 PdfDocument *pdf_doc = (PdfDocument *) pa;
928 deletePdfDocumentPdfDoc(pdf_doc);
929 /* TODO: delete rest of pdf_doc */
933 Called when an image has been written and its resources in image_tab are
934 freed and it's not referenced anymore.
937 void unrefPdfDocument(char *file_path)
939 PdfDocument *pdf_doc = findPdfDocument(file_path);
940 if (pdf_doc->occurences > 0) {
941 pdf_doc->occurences--;
942 if (pdf_doc->occurences == 0) {
943 deletePdfDocumentPdfDoc(pdf_doc);
945 } else {
947 We either have a mismatch in ref and unref or we're somehow out of sync
948 which can happen when we mess with the same file in lua and tex.
950 formatted_warning("pdf inclusion","there can be a mismatch in opening and closing file '%s'",file_path);
955 For completeness, but it isn't currently used (unreferencing is done by mean
956 of file_path.
959 void unrefMemStreamPdfDocument(char *file_id)
961 (void) unrefPdfDocument(file_id);
966 Called when PDF embedding system is finalized. We now deallocate all remaining
967 PdfDocuments.
970 void epdf_free()
972 if (PdfDocumentTree != NULL)
973 avl_destroy(PdfDocumentTree, destroyPdfDocument);
974 PdfDocumentTree = NULL;
975 if (isInit == gTrue)
976 delete globalParams;
977 isInit = gFalse;