beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / image / pdftoepdf.w
blobb9d496ac9f0452dd84cce28d6dfc7a5309699098
1 % pdftoepdf.w
3 % Copyright 1996-2006 Han The Thanh <thanh@@pdftex.org>
4 % Copyright 2006-2015 Taco Hoekwater <taco@@luatex.org>
6 % This file is part of LuaTeX.
8 % LuaTeX is free software; you can redistribute it and/or modify it under
9 % the terms of the GNU General Public License as published by the Free
10 % Software Foundation; either version 2 of the License, or (at your
11 % option) any later version.
13 % LuaTeX is distributed in the hope that it will be useful, but WITHOUT
14 % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 % FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 % License for more details.
18 % You should have received a copy of the GNU General Public License along
19 % with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
21 @ @c
23 #define __STDC_FORMAT_MACROS /* for PRId64 etc. */
25 #include "image/epdf.h"
28 This file is mostly C and not very much C++; it's just used to interface
29 the functions of poppler, which happens to be written in C++.
32 extern void md5(Guchar *msg, int msgLen, Guchar *digest);
34 static GBool isInit = gFalse;
36 /* Maintain AVL tree of all PDF files for embedding */
38 static avl_table *PdfDocumentTree = NULL;
40 /* AVL sort PdfDocument into PdfDocumentTree by file_path */
42 static int CompPdfDocument(const void *pa, const void *pb, void * /*p */ )
44 return strcmp(((const PdfDocument *) pa)->file_path, ((const PdfDocument *) pb)->file_path);
47 /* Returns pointer to PdfDocument structure for PDF file. */
49 static PdfDocument *findPdfDocument(char *file_path)
51 PdfDocument *pdf_doc, tmp;
52 if (file_path == NULL) {
53 normal_error("pdf backend","empty filename when loading pdf file");
54 } else if (PdfDocumentTree == NULL) {
55 return NULL;
57 tmp.file_path = file_path;
58 pdf_doc = (PdfDocument *) avl_find(PdfDocumentTree, &tmp);
59 return pdf_doc;
62 #define PDF_CHECKSUM_SIZE 32
64 static char *get_file_checksum(const char *a, file_error_mode fe)
66 struct stat finfo;
67 char *ck = NULL;
68 if (stat(a, &finfo) == 0) {
69 off_t size = finfo.st_size;
70 time_t mtime = finfo.st_mtime;
71 ck = (char *) malloc(PDF_CHECKSUM_SIZE);
72 if (ck == NULL)
73 formatted_error("pdf inclusion","out of memory while processing '%s'", a);
74 snprintf(ck, PDF_CHECKSUM_SIZE, "%" PRIu64 "_%" PRIu64, (uint64_t) size,(uint64_t) mtime);
75 } else {
76 switch (fe) {
77 case FE_FAIL:
78 formatted_error("pdf inclusion","could not stat() file '%s'", a);
79 break;
80 case FE_RETURN_NULL:
81 if (ck != NULL)
82 free(ck);
83 ck = NULL;
84 break;
85 default:
86 assert(0);
89 return ck;
93 static char *get_stream_checksum (const char *str, unsigned long long str_size){
94 /* http://www.cse.yorku.ca/~oz/hash.html */
95 /* djb2 */
96 unsigned long hash ;
97 char *ck = NULL;
98 unsigned int i;
99 hash = 5381;
100 ck = (char *) malloc(STRSTREAM_CHECKSUM_SIZE+1);
101 if (ck == NULL)
102 normal_error("pdf inclusion","out of memory while processing a memstream");
103 for(i=0; i<(unsigned int)(str_size); i++) {
104 hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
106 snprintf(ck,STRSTREAM_CHECKSUM_SIZE+1,"%lx",hash);
107 ck[STRSTREAM_CHECKSUM_SIZE]='\0';
108 return ck;
112 Returns pointer to PdfDocument structure for PDF file.
113 Creates a new PdfDocument structure if it doesn't exist yet.
114 When fe = FE_RETURN_NULL, the function returns NULL in error case.
117 PdfDocument *refPdfDocument(const char *file_path, file_error_mode fe)
119 char *checksum, *path_copy;
120 PdfDocument *pdf_doc;
121 PDFDoc *doc = NULL;
122 GooString *docName = NULL;
123 int new_flag = 0;
124 if ((checksum = get_file_checksum(file_path, fe)) == NULL) {
125 return (PdfDocument *) NULL;
127 path_copy = xstrdup(file_path);
128 if ((pdf_doc = findPdfDocument(path_copy)) == NULL) {
129 new_flag = 1;
130 pdf_doc = new PdfDocument;
131 pdf_doc->file_path = path_copy;
132 pdf_doc->checksum = checksum;
133 pdf_doc->doc = NULL;
134 pdf_doc->inObjList = NULL;
135 pdf_doc->ObjMapTree = NULL;
136 pdf_doc->occurences = 0; /* 0 = unreferenced */
137 pdf_doc->pc = 0;
138 } else {
139 if (strncmp(pdf_doc->checksum, checksum, PDF_CHECKSUM_SIZE) != 0) {
140 formatted_error("pdf inclusion","file has changed '%s'", file_path);
142 free(checksum);
143 free(path_copy);
145 if (pdf_doc->doc == NULL) {
146 docName = new GooString(file_path);
147 doc = new PDFDoc(docName); /* takes ownership of docName */
148 pdf_doc->pc++;
150 if (!doc->isOk() || !doc->okToPrint()) {
151 switch (fe) {
152 case FE_FAIL:
153 normal_error("pdf inclusion","reading image failed");
154 break;
155 case FE_RETURN_NULL:
156 delete doc;
157 /* delete docName */
158 if (new_flag == 1) {
159 if (pdf_doc->file_path != NULL)
160 free(pdf_doc->file_path);
161 if (pdf_doc->checksum != NULL)
162 free(pdf_doc->checksum);
163 delete pdf_doc;
165 return (PdfDocument *) NULL;
166 break;
167 default:
168 assert(0);
171 pdf_doc->doc = doc;
173 /* PDF file could be opened without problems, checksum ok. */
174 if (PdfDocumentTree == NULL)
175 PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
176 if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
177 avl_probe(PdfDocumentTree, pdf_doc);
179 pdf_doc->occurences++;
180 return pdf_doc;
184 Returns pointer to PdfDocument structure for a PDF stream in memory of streamsize
185 dimension. As before, creates a new PdfDocument structure if it doesn't exist yet
186 with file_path = file_id
189 PdfDocument *refMemStreamPdfDocument(char *docstream, unsigned long long streamsize,const char *file_id)
191 char *checksum;
192 char *file_path;
193 PdfDocument *pdf_doc;
194 PDFDoc *doc = NULL;
195 Object obj;
196 MemStream *docmemstream = NULL;
197 /*int new_flag = 0;*/
198 size_t cnt = 0;
199 checksum = get_stream_checksum(docstream, streamsize);
200 cnt = strlen(file_id);
201 assert(cnt>0 && cnt <STREAM_FILE_ID_LEN);
202 file_path = (char *) malloc(cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE+1); /* 1 for \0 */
203 assert(file_path != NULL);
204 strcpy(file_path,STREAM_URI);
205 strcat(file_path,file_id);
206 strcat(file_path,checksum);
207 file_path[cnt+STREAM_URI_LEN+STRSTREAM_CHECKSUM_SIZE]='\0';
208 if ((pdf_doc = findPdfDocument(file_path)) == NULL) {
209 /*new_flag = 1;*/
210 pdf_doc = new PdfDocument;
211 pdf_doc->file_path = file_path;
212 pdf_doc->checksum = checksum;
213 pdf_doc->doc = NULL;
214 pdf_doc->inObjList = NULL;
215 pdf_doc->ObjMapTree = NULL;
216 pdf_doc->occurences = 0; /* 0 = unreferenced */
217 pdf_doc->pc = 0;
218 } else {
219 /* As is now, checksum is in file_path, so this check should be useless. */
220 if (strncmp(pdf_doc->checksum, checksum, STRSTREAM_CHECKSUM_SIZE) != 0) {
221 formatted_error("pdf inclusion","stream has changed '%s'", file_path);
223 free(file_path);
224 free(checksum);
226 if (pdf_doc->doc == NULL) {
227 docmemstream = new MemStream( docstream,0,streamsize, obj.initNull() );
228 doc = new PDFDoc(docmemstream); /* takes ownership of docmemstream */
229 pdf_doc->pc++;
230 if (!doc->isOk() || !doc->okToPrint()) {
231 normal_error("pdf inclusion","reading pdf Stream failed");
233 pdf_doc->doc = doc;
235 /* PDF file could be opened without problems, checksum ok. */
236 if (PdfDocumentTree == NULL)
237 PdfDocumentTree = avl_create(CompPdfDocument, NULL, &avl_xallocator);
238 if ((PdfDocument *) avl_find(PdfDocumentTree, pdf_doc) == NULL) {
239 avl_probe(PdfDocumentTree, pdf_doc);
241 pdf_doc->occurences++;
242 return pdf_doc;
246 AVL sort ObjMap into ObjMapTree by object number and generation keep the ObjMap
247 struct small, as these are accumulated until the end
250 struct ObjMap {
251 Ref in;
252 int out_num;
255 static int CompObjMap(const void *pa, const void *pb, void * /*p */ )
257 const Ref *a = &(((const ObjMap *) pa)->in);
258 const Ref *b = &(((const ObjMap *) pb)->in);
259 if (a->num > b->num)
260 return 1;
261 else if (a->num < b->num)
262 return -1;
263 else if (a->gen == b->gen)
264 return 0;
265 else if (a->gen < b->gen)
266 return -1;
267 return 1;
270 static ObjMap *findObjMap(PdfDocument * pdf_doc, Ref in)
272 ObjMap *obj_map, tmp;
273 if (pdf_doc->ObjMapTree == NULL)
274 return NULL;
275 tmp.in = in;
276 obj_map = (ObjMap *) avl_find(pdf_doc->ObjMapTree, &tmp);
277 return obj_map;
280 static void addObjMap(PdfDocument * pdf_doc, Ref in, int out_num)
282 ObjMap *obj_map = NULL;
283 if (pdf_doc->ObjMapTree == NULL)
284 pdf_doc->ObjMapTree = avl_create(CompObjMap, NULL, &avl_xallocator);
285 obj_map = new ObjMap;
286 obj_map->in = in;
287 obj_map->out_num = out_num;
288 avl_probe(pdf_doc->ObjMapTree, obj_map);
292 When copying the Resources of the selected page, all objects are
293 copied recursively top-down. The findObjMap() function checks if an
294 object has already been copied; if so, instead of copying just the
295 new object number will be referenced. The ObjMapTree guarantees,
296 that during the entire LuaTeX run any object from any embedded PDF
297 file will end up max. once in the output PDF file. Indirect objects
298 are not fetched during copying, but get a new object number from
299 LuaTeX and then will be appended into a linked list.
302 static int addInObj(PDF pdf, PdfDocument * pdf_doc, Ref ref)
304 ObjMap *obj_map;
305 InObj *p, *q, *n;
306 if (ref.num == 0) {
307 normal_error("pdf inclusion","reference to invalid object (broken pdf)");
309 if ((obj_map = findObjMap(pdf_doc, ref)) != NULL)
310 return obj_map->out_num;
311 n = new InObj;
312 n->ref = ref;
313 n->next = NULL;
314 n->num = pdf_create_obj(pdf, obj_type_others, 0);
315 addObjMap(pdf_doc, ref, n->num);
316 if (pdf_doc->inObjList == NULL) {
317 pdf_doc->inObjList = n;
318 } else {
320 It is important to add new objects at the end of the list,
321 because new objects are being added while the list is being
322 written out by writeRefs().
324 for (p = pdf_doc->inObjList; p != NULL; p = p->next)
325 q = p;
326 q->next = n;
328 return n->num;
332 Function converts double to pdffloat; very small and very large numbers
333 are NOT converted to scientific notation. Here n must be a number or real
334 conforming to the implementation limits of PDF as specified in appendix C.1
335 of the PDF Ref. These are:
337 maximum value of ints is +2^32
338 maximum value of reals is +2^15
339 smalles values of reals is 1/(2^16)
342 static pdffloat conv_double_to_pdffloat(double n)
344 pdffloat a;
345 a.e = 6;
346 a.m = i64round(n * ten_pow[a.e]);
347 return a;
350 static void copyObject(PDF, PdfDocument *, Object *);
352 void copyReal(PDF pdf, double d)
354 if (pdf->cave)
355 pdf_out(pdf, ' ');
356 print_pdffloat(pdf, conv_double_to_pdffloat(d));
357 pdf->cave = true;
360 static void copyString(PDF pdf, GooString * string)
362 char *p;
363 unsigned char c;
364 size_t i, l;
365 p = string->getCString();
366 l = (size_t) string->getLength();
367 if (pdf->cave)
368 pdf_out(pdf, ' ');
369 if (strlen(p) == l) {
370 pdf_out(pdf, '(');
371 for (; *p != 0; p++) {
372 c = (unsigned char) *p;
373 if (c == '(' || c == ')' || c == '\\')
374 pdf_printf(pdf, "\\%c", c);
375 else if (c < 0x20 || c > 0x7F)
376 pdf_printf(pdf, "\\%03o", (int) c);
377 else
378 pdf_out(pdf, c);
380 pdf_out(pdf, ')');
381 } else {
382 pdf_out(pdf, '<');
383 for (i = 0; i < l; i++) {
384 c = (unsigned char) string->getChar(i);
385 pdf_printf(pdf, "%.2x", (int) c);
387 pdf_out(pdf, '>');
389 pdf->cave = true;
392 static void copyName(PDF pdf, char *s)
394 pdf_out(pdf, '/');
395 for (; *s != 0; s++) {
396 if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
397 *s == '.' || *s == '-' || *s == '+')
398 pdf_out(pdf, *s);
399 else
400 pdf_printf(pdf, "#%.2X", *s & 0xFF);
402 pdf->cave = true;
405 static void copyArray(PDF pdf, PdfDocument * pdf_doc, Array * array)
407 int i, l;
408 Object obj1;
409 pdf_begin_array(pdf);
410 for (i = 0, l = array->getLength(); i < l; ++i) {
411 array->getNF(i, &obj1);
412 copyObject(pdf, pdf_doc, &obj1);
413 obj1.free();
415 pdf_end_array(pdf);
418 static void copyDict(PDF pdf, PdfDocument * pdf_doc, Dict * dict)
420 int i, l;
421 Object obj1;
422 pdf_begin_dict(pdf);
423 for (i = 0, l = dict->getLength(); i < l; ++i) {
424 copyName(pdf, dict->getKey(i));
425 dict->getValNF(i, &obj1);
426 copyObject(pdf, pdf_doc, &obj1);
427 obj1.free();
429 pdf_end_dict(pdf);
432 static void copyStreamStream(PDF pdf, Stream * str)
434 int c, i, len = 1024;
435 str->reset();
436 i = len;
437 while ((c = str->getChar()) != EOF) {
438 if (i == len) {
439 pdf_room(pdf, len);
440 i = 0;
442 pdf_quick_out(pdf, c);
443 i++;
447 static void copyStream(PDF pdf, PdfDocument * pdf_doc, Stream * stream)
449 copyDict(pdf, pdf_doc, stream->getDict());
450 pdf_begin_stream(pdf);
451 copyStreamStream(pdf, stream->getUndecodedStream());
452 pdf_end_stream(pdf);
455 static void copyObject(PDF pdf, PdfDocument * pdf_doc, Object * obj)
457 switch (obj->getType()) {
458 case objBool:
459 pdf_add_bool(pdf, (int) obj->getBool());
460 break;
461 case objInt:
462 pdf_add_int(pdf, obj->getInt());
463 break;
464 case objReal:
465 copyReal(pdf, obj->getReal());
466 break;
468 case objNum:
469 GBool isNum() { return type == objInt || type == objReal; }
470 break;
472 case objString:
473 copyString(pdf, obj->getString());
474 break;
475 case objName:
476 copyName(pdf, obj->getName());
477 break;
478 case objNull:
479 pdf_add_null(pdf);
480 break;
481 case objArray:
482 copyArray(pdf, pdf_doc, obj->getArray());
483 break;
484 case objDict:
485 copyDict(pdf, pdf_doc, obj->getDict());
486 break;
487 case objStream:
488 copyStream(pdf, pdf_doc, obj->getStream());
489 break;
490 case objRef:
491 pdf_add_ref(pdf, addInObj(pdf, pdf_doc, obj->getRef()));
492 break;
493 case objCmd:
494 case objError:
495 case objEOF:
496 case objNone:
497 formatted_error("pdf inclusion","type '%s' cannot be copied", obj->getTypeName());
498 break;
499 default:
500 /* poppler doesn't have any other types */
501 assert(0);
505 static void writeRefs(PDF pdf, PdfDocument * pdf_doc)
507 InObj *r, *n;
508 Object obj1;
509 XRef *xref;
510 PDFDoc *doc = pdf_doc->doc;
511 xref = doc->getXRef();
512 for (r = pdf_doc->inObjList; r != NULL;) {
513 xref->fetch(r->ref.num, r->ref.gen, &obj1);
514 if (obj1.isStream())
515 pdf_begin_obj(pdf, r->num, OBJSTM_NEVER);
516 else
517 pdf_begin_obj(pdf, r->num, 2);
518 copyObject(pdf, pdf_doc, &obj1);
519 obj1.free();
520 pdf_end_obj(pdf);
521 n = r->next;
522 delete r;
523 pdf_doc->inObjList = r = n;
527 /* get the pagebox coordinates according to the pagebox_spec */
529 static PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
531 switch (pagebox_spec) {
532 case PDF_BOX_SPEC_MEDIA:
533 return page->getMediaBox();
534 break;
535 case PDF_BOX_SPEC_CROP:
536 return page->getCropBox();
537 break;
538 case PDF_BOX_SPEC_BLEED:
539 return page->getBleedBox();
540 break;
541 case PDF_BOX_SPEC_TRIM:
542 return page->getTrimBox();
543 break;
544 case PDF_BOX_SPEC_ART:
545 return page->getArtBox();
546 break;
547 default:
548 return page->getMediaBox();
549 break;
554 Reads various information about the PDF and sets it up for later inclusion.
555 This will fail if the PDF version of the PDF is higher than minor_pdf_version_wanted
556 or page_name is given and can not be found. It makes no sense to give page_name and
557 page_num. Returns the page number.
560 void flush_pdf_info(image_dict * idict)
562 if (img_keepopen(idict)) {
563 unrefPdfDocument(img_filepath(idict));
568 void flush_pdfstream_info(image_dict * idict)
570 if (img_pdfstream_ptr(idict) != NULL) {
571 xfree(img_pdfstream_stream(idict));
572 xfree(img_pdfstream_ptr(idict));
573 img_pdfstream_stream(idict) = NULL;
574 img_pdfstream_ptr(idict) = NULL;
579 void read_pdf_info(image_dict * idict)
581 PdfDocument *pdf_doc = NULL;
582 PDFDoc *doc = NULL;
583 Catalog *catalog;
584 Page *page;
585 int rotate;
586 PDFRectangle *pagebox;
587 int pdf_major_version_found, pdf_minor_version_found;
588 float xsize, ysize, xorig, yorig;
589 if (isInit == gFalse) {
590 if (!(globalParams))
591 globalParams = new GlobalParams();
592 globalParams->setErrQuiet(gFalse);
593 isInit = gTrue;
595 if (img_type(idict) == IMG_TYPE_PDF)
596 pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
597 else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
598 pdf_doc = findPdfDocument(img_filepath(idict)) ;
599 pdf_doc->occurences++;
600 } else {
601 normal_error("pdf inclusion","unknown document");
603 doc = pdf_doc->doc;
604 catalog = doc->getCatalog();
606 Check PDF version. This works only for PDF 1.x but since any versions of
607 PDF newer than 1.x will not be backwards compatible to PDF 1.x, we will
608 then have to changed drastically anyway.
610 pdf_major_version_found = doc->getPDFMajorVersion();
611 pdf_minor_version_found = doc->getPDFMinorVersion();
612 if ((pdf_major_version_found > 1) || (pdf_minor_version_found > img_pdfminorversion(idict))) {
613 const char *msg = "PDF inclusion: found PDF version '%d.%d', but at most version '1.%d' allowed";
614 if (img_errorlevel(idict) > 0) {
615 formatted_error("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
616 } else {
617 formatted_warning("pdf inclusion",msg, pdf_major_version_found, pdf_minor_version_found, img_pdfminorversion(idict));
620 img_totalpages(idict) = catalog->getNumPages();
621 if (img_pagename(idict)) {
622 /* get page by name */
623 GooString name(img_pagename(idict));
624 LinkDest *link = doc->findDest(&name);
625 if (link == NULL || !link->isOk())
626 formatted_error("pdf inclusion","invalid destination '%s'",img_pagename(idict));
627 Ref ref = link->getPageRef();
628 img_pagenum(idict) = catalog->findPage(ref.num, ref.gen);
629 if (img_pagenum(idict) == 0)
630 formatted_error("pdf inclusion","destination is not a page '%s'",img_pagename(idict));
631 delete link;
632 } else {
633 /* get page by number */
634 if (img_pagenum(idict) <= 0
635 || img_pagenum(idict) > img_totalpages(idict))
636 formatted_error("pdf inclusion","required page '%i' does not exist",(int) img_pagenum(idict));
638 /* get the required page */
639 page = catalog->getPage(img_pagenum(idict));
640 /* get the pagebox coordinates (media, crop,...) to use. */
641 pagebox = get_pagebox(page, img_pagebox(idict));
642 if (pagebox->x2 > pagebox->x1) {
643 xorig = pagebox->x1;
644 xsize = pagebox->x2 - pagebox->x1;
645 } else {
646 xorig = pagebox->x2;
647 xsize = pagebox->x1 - pagebox->x2;
649 if (pagebox->y2 > pagebox->y1) {
650 yorig = pagebox->y1;
651 ysize = pagebox->y2 - pagebox->y1;
652 } else {
653 yorig = pagebox->y2;
654 ysize = pagebox->y1 - pagebox->y2;
656 /* The following 4 parameters are raw. Do _not_ modify by /Rotate! */
657 img_xsize(idict) = bp2sp(xsize);
658 img_ysize(idict) = bp2sp(ysize);
659 img_xorig(idict) = bp2sp(xorig);
660 img_yorig(idict) = bp2sp(yorig);
662 Handle /Rotate parameter. Only multiples of 90 deg. are allowed (PDF Ref. v1.3,
663 p. 78). We also accept negative angles. Beware: PDF counts clockwise! */
664 rotate = page->getRotate();
665 switch (((rotate % 360) + 360) % 360) {
666 case 0:
667 img_rotation(idict) = 0;
668 break;
669 case 90:
670 img_rotation(idict) = 3;
671 break;
672 case 180:
673 img_rotation(idict) = 2;
674 break;
675 case 270:
676 img_rotation(idict) = 1;
677 break;
678 default:
679 formatted_warning("pdf inclusion","/Rotate parameter in PDF file not multiple of 90 degrees");
681 /* currently unused info whether PDF contains a /Group */
682 if (page->getGroup() != NULL)
683 img_set_group(idict);
685 LuaTeX pre 0.85 versions did this:
687 if (readtype == IMG_CLOSEINBETWEEN) {
688 unrefPdfDocument(img_filepath(idict));
691 and also unref'd in the finalizer zo we got an extra unrefs when garbage was
692 collected. However it is more efficient to keep the file open so we do that
693 now. The (slower) alternative is to unref here (which in most cases forcing a
694 close of the file) but then we must not call flush_pdf_info.
696 A close (unref) can be forced by nilling the dict object at the lua end and
697 forcing a collectgarbage("collect") after that.
700 if (! img_keepopen(idict)) {
701 unrefPdfDocument(img_filepath(idict));
706 Write the current epf_doc. Here the included PDF is copied, so most errors
707 that can happen during PDF inclusion will arise here.
710 void write_epdf(PDF pdf, image_dict * idict)
712 PdfDocument *pdf_doc = NULL;
713 PDFDoc *doc = NULL;
714 Catalog *catalog;
715 Page *page;
716 Ref *pageref;
717 Dict *pageDict;
718 Object obj1, contents, pageobj, pagesobj1, pagesobj2, *op1, *op2, *optmp;
719 PDFRectangle *pagebox;
720 int i, l;
721 double bbox[4];
722 char s[256];
723 const char *pagedictkeys[] = {
724 "Group", "LastModified", "Metadata", "PieceInfo", "Resources", "SeparationInfo", NULL
726 /* open PDF file */
727 if (img_type(idict) == IMG_TYPE_PDF) {
728 pdf_doc = refPdfDocument(img_filepath(idict), FE_FAIL);
729 } else if (img_type(idict) == IMG_TYPE_PDFMEMSTREAM) {
730 pdf_doc = findPdfDocument(img_filepath(idict)) ;
731 pdf_doc->occurences++;
732 } else {
733 normal_error("pdf inclusion","unknown document");
735 doc = pdf_doc->doc;
736 catalog = doc->getCatalog();
737 page = catalog->getPage(img_pagenum(idict));
738 pageref = catalog->getPageRef(img_pagenum(idict));
739 doc->getXRef()->fetch(pageref->num, pageref->gen, &pageobj);
740 pageDict = pageobj.getDict();
741 /* write the Page header */
742 pdf_begin_obj(pdf, img_objnum(idict), OBJSTM_NEVER);
743 pdf_begin_dict(pdf);
744 pdf_dict_add_name(pdf, "Type", "XObject");
745 pdf_dict_add_name(pdf, "Subtype", "Form");
746 if (img_attr(idict) != NULL && strlen(img_attr(idict)) > 0)
747 pdf_printf(pdf, "\n%s\n", img_attr(idict));
748 pdf_dict_add_int(pdf, "FormType", 1);
749 /* write additional information */
750 pdf_dict_add_img_filename(pdf, idict);
751 snprintf(s, 30, "%s.PageNumber", pdfkeyprefix);
752 pdf_dict_add_int(pdf, s, (int) img_pagenum(idict));
753 doc->getDocInfoNF(&obj1);
754 if (obj1.isRef()) {
755 /* the info dict must be indirect (PDF Ref p. 61) */
756 snprintf(s, 30, "%s.InfoDict", pdfkeyprefix);
757 pdf_dict_add_ref(pdf, s, addInObj(pdf, pdf_doc, obj1.getRef()));
759 obj1.free();
760 if (img_is_bbox(idict)) {
761 bbox[0] = sp2bp(img_bbox(idict)[0]);
762 bbox[1] = sp2bp(img_bbox(idict)[1]);
763 bbox[2] = sp2bp(img_bbox(idict)[2]);
764 bbox[3] = sp2bp(img_bbox(idict)[3]);
765 } else {
766 /* get the pagebox coordinates (media, crop,...) to use. */
767 pagebox = get_pagebox(page, img_pagebox(idict));
768 bbox[0] = pagebox->x1;
769 bbox[1] = pagebox->y1;
770 bbox[2] = pagebox->x2;
771 bbox[3] = pagebox->y2;
773 pdf_add_name(pdf, "BBox");
774 pdf_begin_array(pdf);
775 copyReal(pdf, bbox[0]);
776 copyReal(pdf, bbox[1]);
777 copyReal(pdf, bbox[2]);
778 copyReal(pdf, bbox[3]);
779 pdf_end_array(pdf);
781 Now all relevant parts of the Page dictionary are copied. Metadata validity
782 check is needed(as a stream it must be indirect).
784 pageDict->lookupNF("Metadata", &obj1);
785 if (!obj1.isNull() && !obj1.isRef())
786 formatted_warning("pdf inclusion","/Metadata must be indirect object");
787 obj1.free();
788 /* copy selected items in Page dictionary */
789 for (i = 0; pagedictkeys[i] != NULL; i++) {
790 pageDict->lookupNF(pagedictkeys[i], &obj1);
791 if (!obj1.isNull()) {
792 pdf_add_name(pdf, pagedictkeys[i]);
793 /* preserves indirection */
794 copyObject(pdf, pdf_doc, &obj1);
796 obj1.free();
799 If there are no Resources in the Page dict of the embedded page,
800 try to inherit the Resources from the Pages tree of the embedded
801 PDF file, climbing up the tree until the Resources are found.
802 (This fixes a problem with Scribus 1.3.3.14.)
804 pageDict->lookupNF("Resources", &obj1);
805 if (obj1.isNull()) {
806 op1 = &pagesobj1;
807 op2 = &pagesobj2;
808 pageDict->lookup("Parent", op1);
809 while (op1->isDict()) {
810 obj1.free();
811 op1->dictLookupNF("Resources", &obj1);
812 if (!obj1.isNull()) {
813 pdf_add_name(pdf, "Resources");
814 copyObject(pdf, pdf_doc, &obj1);
815 break;
817 op1->dictLookup("Parent", op2);
818 optmp = op1;
819 op1 = op2;
820 op2 = optmp;
821 op2->free();
823 if (!op1->isDict())
824 formatted_warning("pdf inclusion","Page /Resources missing");
825 op1->free();
827 obj1.free();
828 /* Write the Page contents. */
829 page->getContents(&contents);
830 if (contents.isStream()) {
832 Variant A: get stream and recompress under control of \pdfcompresslevel
834 pdf_begin_stream();
835 copyStreamStream(contents->getStream());
836 pdf_end_stream();
838 Variant B: copy stream without recompressing
840 contents.streamGetDict()->lookup("F", &obj1);
841 if (!obj1.isNull()) {
842 normal_error("pdf inclusion","unsupported external stream");
844 obj1.free();
845 contents.streamGetDict()->lookup("Length", &obj1);
846 pdf_add_name(pdf, "Length");
847 copyObject(pdf, pdf_doc, &obj1);
848 obj1.free();
849 contents.streamGetDict()->lookup("Filter", &obj1);
850 if (!obj1.isNull()) {
851 pdf_add_name(pdf, "Filter");
852 copyObject(pdf, pdf_doc, &obj1);
853 obj1.free();
854 contents.streamGetDict()->lookup("DecodeParms", &obj1);
855 if (!obj1.isNull()) {
856 pdf_add_name(pdf, "DecodeParms");
857 copyObject(pdf, pdf_doc, &obj1);
860 obj1.free();
861 pdf_end_dict(pdf);
862 pdf_begin_stream(pdf);
863 copyStreamStream(pdf, contents.getStream()->getUndecodedStream());
864 pdf_end_stream(pdf);
865 pdf_end_obj(pdf);
866 } else if (contents.isArray()) {
867 pdf_dict_add_streaminfo(pdf);
868 pdf_end_dict(pdf);
869 pdf_begin_stream(pdf);
870 for (i = 0, l = contents.arrayGetLength(); i < l; ++i) {
871 copyStreamStream(pdf, (contents.arrayGet(i, &obj1))->getStream());
872 obj1.free();
873 if (i < (l - 1)) {
875 Put a space between streams to be on the safe side (streams
876 should have a trailing space here, but one never knows)
878 pdf_out(pdf, ' ');
881 pdf_end_stream(pdf);
882 pdf_end_obj(pdf);
883 } else {
884 /* the contents are optional, but we need to include an empty stream */
885 pdf_dict_add_streaminfo(pdf);
886 pdf_end_dict(pdf);
887 pdf_begin_stream(pdf);
888 pdf_end_stream(pdf);
889 pdf_end_obj(pdf);
891 /* write out all indirect objects */
892 writeRefs(pdf, pdf_doc);
893 contents.free();
894 pageobj.free();
896 unrefPdfDocument() must come after contents.free() and pageobj.free()!
897 TH: The next line makes repeated pdf inclusion unacceptably slow
899 unrefPdfDocument(img_filepath(idict));
903 /* Deallocate a PdfDocument with all its resources. */
905 static void deletePdfDocumentPdfDoc(PdfDocument * pdf_doc)
907 InObj *r, *n;
908 /* this may be probably needed for an emergency destroyPdfDocument() */
909 for (r = pdf_doc->inObjList; r != NULL; r = n) {
910 n = r->next;
911 delete r;
913 delete pdf_doc->doc;
914 pdf_doc->doc = NULL;
915 pdf_doc->pc++;
918 static void destroyPdfDocument(void *pa, void * /*pb */ )
920 PdfDocument *pdf_doc = (PdfDocument *) pa;
921 deletePdfDocumentPdfDoc(pdf_doc);
922 /* TODO: delete rest of pdf_doc */
926 Called when an image has been written and its resources in image_tab are
927 freed and it's not referenced anymore.
930 void unrefPdfDocument(char *file_path)
932 PdfDocument *pdf_doc = findPdfDocument(file_path);
933 if (pdf_doc->occurences > 0) {
934 pdf_doc->occurences--;
935 if (pdf_doc->occurences == 0) {
936 deletePdfDocumentPdfDoc(pdf_doc);
938 } else {
940 We either have a mismatch in ref and unref or we're somehow out of sync
941 which can happen when we mess with the same file in lua and tex.
943 formatted_warning("pdf inclusion","there can be a mismatch in opening and closing file '%s'",file_path);
948 For completeness, but it isn't currently used (unreferencing is done by mean
949 of file_path.
952 void unrefMemStreamPdfDocument(char *file_id)
954 (void) unrefPdfDocument(file_id);
959 Called when PDF embedding system is finalized. We now deallocate all remaining
960 PdfDocuments.
963 void epdf_free()
965 if (PdfDocumentTree != NULL)
966 avl_destroy(PdfDocumentTree, destroyPdfDocument);
967 PdfDocumentTree = NULL;
968 if (isInit == gTrue)
969 delete globalParams;
970 isInit = gFalse;