beta-0.89.2
[luatex.git] / source / libs / poppler / poppler-src / poppler / StructElement.cc
blobc6688208fee7caad629ec766e211597af7198f5b
1 //========================================================================
2 //
3 // StructElement.cc
4 //
5 // This file is licensed under the GPLv2 or later
6 //
7 // Copyright 2013, 2014 Igalia S.L.
8 // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com>
9 // Copyright 2014 Albert Astals Cid <aacid@kde.org>
10 // Copyright 2015 Dmytro Morgun <lztoad@gmail.com>
12 //========================================================================
14 #ifdef USE_GCC_PRAGMAS
15 #pragma interface
16 #endif
18 #include "StructElement.h"
19 #include "StructTreeRoot.h"
20 #include "GlobalParams.h"
21 #include "UnicodeMap.h"
22 #include "PDFDoc.h"
23 #include "Dict.h"
25 #include <assert.h>
27 class GfxState;
30 static GBool isPlacementName(Object *value)
32 return value->isName("Block")
33 || value->isName("Inline")
34 || value->isName("Before")
35 || value->isName("Start")
36 || value->isName("End");
39 static GBool isWritingModeName(Object *value)
41 return value->isName("LrTb")
42 || value->isName("RlTb")
43 || value->isName("TbRl");
46 static GBool isBorderStyleName(Object *value)
48 return value->isName("None")
49 || value->isName("Hidden")
50 || value->isName("Dotted")
51 || value->isName("Dashed")
52 || value->isName("Solid")
53 || value->isName("Double")
54 || value->isName("Groove")
55 || value->isName("Ridge")
56 || value->isName("Inset")
57 || value->isName("Outset");
60 static GBool isTextAlignName(Object *value)
62 return value->isName("Start")
63 || value->isName("End")
64 || value->isName("Center")
65 || value->isName("Justify");
68 static GBool isBlockAlignName(Object *value)
70 return value->isName("Before")
71 || value->isName("Middle")
72 || value->isName("After")
73 || value->isName("Justify");
76 static GBool isInlineAlignName(Object *value)
78 return value->isName("Start")
79 || value->isName("End")
80 || value->isName("Center");
83 static GBool isNumber(Object *value)
85 return value->isNum();
88 static GBool isLineHeight(Object *value)
90 return value->isName("Normal")
91 || value->isName("Auto")
92 || isNumber(value);
95 static GBool isTextDecorationName(Object *value)
97 return value->isName("None")
98 || value->isName("Underline")
99 || value->isName("Overline")
100 || value->isName("LineThrough");
103 static GBool isRubyAlignName(Object *value)
105 return value->isName("Start")
106 || value->isName("End")
107 || value->isName("Center")
108 || value->isName("Justify")
109 || value->isName("Distribute");
112 static GBool isRubyPositionName(Object *value)
114 return value->isName("Before")
115 || value->isName("After")
116 || value->isName("Warichu")
117 || value->isName("Inline");
120 static GBool isGlyphOrientationName(Object *value)
122 return value->isName("Auto")
123 || value->isName("90")
124 || value->isName("180")
125 || value->isName("270")
126 || value->isName("360")
127 || value->isName("-90")
128 || value->isName("-180");
131 static GBool isListNumberingName(Object *value)
133 return value->isName("None")
134 || value->isName("Disc")
135 || value->isName("Circle")
136 || value->isName("Square")
137 || value->isName("Decimal")
138 || value->isName("UpperRoman")
139 || value->isName("LowerRoman")
140 || value->isName("UpperAlpha")
141 || value->isName("LowerAlpha");
144 static GBool isFieldRoleName(Object *value)
146 return value->isName("rb")
147 || value->isName("cb")
148 || value->isName("pb")
149 || value->isName("tv");
152 static GBool isFieldCheckedName(Object *value)
154 return value->isName("on")
155 || value->isName("off")
156 || value->isName("neutral");
159 static GBool isTableScopeName(Object *value)
161 return value->isName("Row")
162 || value->isName("Column")
163 || value->isName("Both");
166 static GBool isRGBColor(Object *value)
168 if (!(value->isArray() && value->arrayGetLength() == 3))
169 return gFalse;
171 GBool okay = gTrue;
172 for (int i = 0; i < 3; i++) {
173 Object obj;
174 if (!value->arrayGet(i, &obj)->isNum()) {
175 okay = gFalse;
176 obj.free();
177 break;
179 if (obj.getNum() < 0.0 || obj.getNum() > 1.0) {
180 okay = gFalse;
181 obj.free();
182 break;
184 obj.free();
187 return okay;
190 static GBool isNatural(Object *value)
192 return (value->isInt() && value->getInt() > 0)
193 || (value->isInt64() && value->getInt64() > 0);
196 static GBool isPositive(Object *value)
198 return value->isNum() && value->getNum() >= 0.0;
201 static GBool isNumberOrAuto(Object *value)
203 return isNumber(value) || value->isName("Auto");
206 static GBool isTextString(Object *value)
208 // XXX: Shall isName() also be checked?
209 return value->isString();
213 #define ARRAY_CHECKER(name, checkItem, length, allowSingle, allowNulls) \
214 static GBool name(Object *value) { \
215 if (!value->isArray()) \
216 return allowSingle ? checkItem(value) : gFalse; \
218 if (length && value->arrayGetLength() != length) \
219 return gFalse; \
221 GBool okay = gTrue; \
222 for (int i = 0; i < value->arrayGetLength(); i++) { \
223 Object obj; \
224 value->arrayGet(i, &obj); \
225 if ((!allowNulls && obj.isNull()) || !checkItem(&obj)) { \
226 okay = gFalse; \
227 obj.free(); \
228 break; \
230 obj.free(); \
232 return okay; \
235 ARRAY_CHECKER(isRGBColorOrOptionalArray4, isRGBColor, 4, gTrue, gTrue );
236 ARRAY_CHECKER(isPositiveOrOptionalArray4, isPositive, 4, gTrue, gTrue );
237 ARRAY_CHECKER(isPositiveOrArray4, isPositive, 4, gTrue, gFalse);
238 ARRAY_CHECKER(isBorderStyle, isBorderStyleName, 4, gTrue, gTrue );
239 ARRAY_CHECKER(isNumberArray4, isNumber, 4, gFalse, gFalse);
240 ARRAY_CHECKER(isNumberOrArrayN, isNumber, 0, gTrue, gFalse);
241 ARRAY_CHECKER(isTableHeaders, isTextString, 0, gFalse, gFalse);
244 // Type of functions used to do type-checking on attribute values
245 typedef GBool (*AttributeCheckFunc)(Object*);
247 // Maps attributes to their names and whether the attribute can be inherited.
248 struct AttributeMapEntry {
249 Attribute::Type type;
250 const char *name;
251 const Object *defval;
252 GBool inherit;
253 AttributeCheckFunc check;
256 struct AttributeDefaults {
257 Object Inline;
258 Object LrTb;
259 Object Normal;
260 Object Distribute;
261 Object off;
262 Object Zero;
263 Object Auto;
264 Object Start;
265 Object None;
266 Object Before;
267 Object Nat1;
269 AttributeDefaults() {
270 Inline.initName("Inline");
271 LrTb.initName("LrTb");
272 Normal.initName("Normal");
273 Distribute.initName("Distribute");
274 off.initName("off");
276 Zero.initReal(0.0);
277 Auto.initName("Auto");
278 Start.initName("Start");
279 None.initName("None");
280 Before.initName("Before");
281 Nat1.initInt(1);
284 ~AttributeDefaults() {
285 Inline.free();
286 LrTb.free();
287 Normal.free();
288 Distribute.free();
289 off.free();
290 Zero.free();
291 Auto.free();
292 Start.free();
293 None.free();
294 Before.free();
295 Nat1.free();
299 static const AttributeDefaults attributeDefaults;
302 #define ATTR_LIST_END \
303 { Attribute::Unknown, NULL, NULL, gFalse, NULL }
305 #define ATTR_WITH_DEFAULT(name, inherit, check, defval) \
306 { Attribute::name, \
307 #name, \
308 &attributeDefaults.defval, \
309 inherit, \
310 check }
312 #define ATTR(name, inherit, check) \
313 { Attribute::name, \
314 #name, \
315 NULL, \
316 inherit, \
317 check }
319 static const AttributeMapEntry attributeMapCommonShared[] =
321 ATTR_WITH_DEFAULT(Placement, gFalse, isPlacementName, Inline),
322 ATTR_WITH_DEFAULT(WritingMode, gTrue, isWritingModeName, LrTb),
323 ATTR (BackgroundColor, gFalse, isRGBColor),
324 ATTR (BorderColor, gTrue, isRGBColorOrOptionalArray4),
325 ATTR_WITH_DEFAULT(BorderStyle, gFalse, isBorderStyle, None),
326 ATTR (BorderThickness, gTrue, isPositiveOrOptionalArray4),
327 ATTR_WITH_DEFAULT(Padding, gFalse, isPositiveOrArray4, Zero),
328 ATTR (Color, gTrue, isRGBColor),
329 ATTR_LIST_END
332 static const AttributeMapEntry attributeMapCommonBlock[] =
334 ATTR_WITH_DEFAULT(SpaceBefore, gFalse, isPositive, Zero),
335 ATTR_WITH_DEFAULT(SpaceAfter, gFalse, isPositive, Zero),
336 ATTR_WITH_DEFAULT(StartIndent, gTrue, isNumber, Zero),
337 ATTR_WITH_DEFAULT(EndIndent, gTrue, isNumber, Zero),
338 ATTR_WITH_DEFAULT(TextIndent, gTrue, isNumber, Zero),
339 ATTR_WITH_DEFAULT(TextAlign, gTrue, isTextAlignName, Start),
340 ATTR (BBox, gFalse, isNumberArray4),
341 ATTR_WITH_DEFAULT(Width, gFalse, isNumberOrAuto, Auto),
342 ATTR_WITH_DEFAULT(Height, gFalse, isNumberOrAuto, Auto),
343 ATTR_WITH_DEFAULT(BlockAlign, gTrue, isBlockAlignName, Before),
344 ATTR_WITH_DEFAULT(InlineAlign, gTrue, isInlineAlignName, Start),
345 ATTR_LIST_END
348 static const AttributeMapEntry attributeMapCommonInline[] =
350 ATTR_WITH_DEFAULT(BaselineShift, gFalse, isNumber, Zero),
351 ATTR_WITH_DEFAULT(LineHeight, gTrue, isLineHeight, Normal),
352 ATTR (TextDecorationColor, gTrue, isRGBColor),
353 ATTR (TextDecorationThickness, gTrue, isPositive),
354 ATTR_WITH_DEFAULT(TextDecorationType, gFalse, isTextDecorationName, None),
355 ATTR_WITH_DEFAULT(GlyphOrientationVertical, gTrue, isGlyphOrientationName, Auto),
356 ATTR_LIST_END
359 static const AttributeMapEntry attributeMapCommonRubyText[] =
361 ATTR_WITH_DEFAULT(RubyPosition, gTrue, isRubyPositionName, Before),
362 ATTR_WITH_DEFAULT(RubyAlign, gTrue, isRubyAlignName, Distribute),
363 ATTR_LIST_END
366 static const AttributeMapEntry attributeMapCommonColumns[] =
368 ATTR_WITH_DEFAULT(ColumnCount, gFalse, isNatural, Nat1),
369 ATTR (ColumnGap, gFalse, isNumberOrArrayN),
370 ATTR (ColumnWidths, gFalse, isNumberOrArrayN),
371 ATTR_LIST_END
374 static const AttributeMapEntry attributeMapCommonList[] = {
375 ATTR_WITH_DEFAULT(ListNumbering, gTrue, isListNumberingName, None),
376 ATTR_LIST_END
379 static const AttributeMapEntry attributeMapCommonPrintField[] =
381 ATTR (Role, gFalse, isFieldRoleName),
382 ATTR_WITH_DEFAULT(checked, gFalse, isFieldCheckedName, off),
383 ATTR (Desc, gFalse, isTextString),
384 ATTR_LIST_END
387 static const AttributeMapEntry attributeMapCommonTable[] =
389 ATTR(Headers, gFalse, isTableHeaders),
390 ATTR(Scope, gFalse, isTableScopeName),
391 ATTR(Summary, gFalse, isTextString),
392 ATTR_LIST_END
395 static const AttributeMapEntry attributeMapCommonTableCell[] =
397 ATTR_WITH_DEFAULT(RowSpan, gFalse, isNatural, Nat1),
398 ATTR_WITH_DEFAULT(ColSpan, gFalse, isNatural, Nat1),
399 ATTR_WITH_DEFAULT(TBorderStyle, gTrue, isBorderStyle, None),
400 ATTR_WITH_DEFAULT(TPadding, gTrue, isPositiveOrArray4, Zero),
401 ATTR_LIST_END
404 #undef ATTR_WITH_DEFAULT
405 #undef ATTR
408 static const AttributeMapEntry *attributeMapAll[] = {
409 attributeMapCommonShared,
410 attributeMapCommonBlock,
411 attributeMapCommonInline,
412 attributeMapCommonRubyText,
413 attributeMapCommonColumns,
414 attributeMapCommonList,
415 attributeMapCommonPrintField,
416 attributeMapCommonTable,
417 attributeMapCommonTableCell,
418 NULL,
421 static const AttributeMapEntry *attributeMapShared[] = {
422 attributeMapCommonShared,
423 NULL,
426 static const AttributeMapEntry *attributeMapBlock[] = {
427 attributeMapCommonShared,
428 attributeMapCommonBlock,
429 NULL,
432 static const AttributeMapEntry *attributeMapInline[] = {
433 attributeMapCommonShared,
434 attributeMapCommonInline,
435 NULL,
438 static const AttributeMapEntry *attributeMapTableCell[] = {
439 attributeMapCommonShared,
440 attributeMapCommonBlock,
441 attributeMapCommonTable,
442 attributeMapCommonTableCell,
443 NULL,
446 static const AttributeMapEntry *attributeMapRubyText[] = {
447 attributeMapCommonShared,
448 attributeMapCommonInline,
449 attributeMapCommonRubyText,
450 NULL,
453 static const AttributeMapEntry *attributeMapColumns[] = {
454 attributeMapCommonShared,
455 attributeMapCommonInline,
456 attributeMapCommonColumns,
457 NULL,
460 static const AttributeMapEntry *attributeMapList[] = {
461 attributeMapCommonShared,
462 attributeMapCommonList,
463 NULL,
466 static const AttributeMapEntry *attributeMapTable[] = {
467 attributeMapCommonShared,
468 attributeMapCommonBlock,
469 attributeMapCommonTable,
470 NULL,
473 static const AttributeMapEntry *attributeMapIllustration[] = {
474 // XXX: Illustrations may have some attributes from the "shared", "inline",
475 // the "block" sets. This is a loose specification; making it better
476 // means duplicating entries from the sets. This seems good enough...
477 attributeMapCommonShared,
478 attributeMapCommonBlock,
479 attributeMapCommonInline,
480 NULL,
483 // Table mapping owners of attributes to their names.
484 static const struct OwnerMapEntry {
485 Attribute::Owner owner;
486 const char *name;
487 } ownerMap[] = {
488 // XXX: Those are sorted in the owner priority resolution order. If the
489 // same attribute is defined with two owners, the order in the table
490 // can be used to know which one has more priority.
491 { Attribute::XML_1_00, "XML-1.00" },
492 { Attribute::HTML_3_20, "HTML-3.20" },
493 { Attribute::HTML_4_01, "HTML-4.01" },
494 { Attribute::OEB_1_00, "OEB-1.00" },
495 { Attribute::RTF_1_05, "RTF-1.05" },
496 { Attribute::CSS_1_00, "CSS-1.00" },
497 { Attribute::CSS_2_00, "CSS-2.00" },
498 { Attribute::Layout, "Layout" },
499 { Attribute::PrintField, "PrintField" },
500 { Attribute::Table, "Table" },
501 { Attribute::List, "List" },
502 { Attribute::UserProperties, "UserProperties" },
506 static GBool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b)
508 unsigned aIndex, bIndex;
510 for (unsigned i = aIndex = bIndex = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
511 if (ownerMap[i].owner == a)
512 aIndex = i;
513 if (ownerMap[i].owner == b)
514 bIndex = i;
517 return aIndex < bIndex;
521 // Maps element types to their names and also serves as lookup table
522 // for additional element type attributes.
524 enum ElementType {
525 elementTypeUndefined,
526 elementTypeGrouping,
527 elementTypeInline,
528 elementTypeBlock,
531 static const struct TypeMapEntry {
532 StructElement::Type type;
533 const char *name;
534 ElementType elementType;
535 const AttributeMapEntry **attributes;
536 } typeMap[] = {
537 { StructElement::Document, "Document", elementTypeGrouping, attributeMapShared },
538 { StructElement::Part, "Part", elementTypeGrouping, attributeMapShared },
539 { StructElement::Art, "Art", elementTypeGrouping, attributeMapColumns },
540 { StructElement::Sect, "Sect", elementTypeGrouping, attributeMapColumns },
541 { StructElement::Div, "Div", elementTypeGrouping, attributeMapColumns },
542 { StructElement::BlockQuote, "BlockQuote", elementTypeGrouping, attributeMapInline },
543 { StructElement::Caption, "Caption", elementTypeGrouping, attributeMapInline },
544 { StructElement::NonStruct, "NonStruct", elementTypeGrouping, attributeMapInline },
545 { StructElement::Index, "Index", elementTypeGrouping, attributeMapInline },
546 { StructElement::Private, "Private", elementTypeGrouping, attributeMapInline },
547 { StructElement::Span, "Span", elementTypeInline, attributeMapInline },
548 { StructElement::Quote, "Quote", elementTypeInline, attributeMapInline },
549 { StructElement::Note, "Note", elementTypeInline, attributeMapInline },
550 { StructElement::Reference, "Reference", elementTypeInline, attributeMapInline },
551 { StructElement::BibEntry, "BibEntry", elementTypeInline, attributeMapInline },
552 { StructElement::Code, "Code", elementTypeInline, attributeMapInline },
553 { StructElement::Link, "Link", elementTypeInline, attributeMapInline },
554 { StructElement::Annot, "Annot", elementTypeInline, attributeMapInline },
555 { StructElement::Ruby, "Ruby", elementTypeInline, attributeMapRubyText },
556 { StructElement::RB, "RB", elementTypeUndefined, attributeMapRubyText },
557 { StructElement::RT, "RT", elementTypeUndefined, attributeMapRubyText },
558 { StructElement::RP, "RP", elementTypeUndefined, attributeMapShared },
559 { StructElement::Warichu, "Warichu", elementTypeInline, attributeMapRubyText },
560 { StructElement::WT, "WT", elementTypeUndefined, attributeMapShared },
561 { StructElement::WP, "WP", elementTypeUndefined, attributeMapShared },
562 { StructElement::P, "P", elementTypeBlock, attributeMapBlock },
563 { StructElement::H, "H", elementTypeBlock, attributeMapBlock },
564 { StructElement::H1, "H1", elementTypeBlock, attributeMapBlock },
565 { StructElement::H2, "H2", elementTypeBlock, attributeMapBlock },
566 { StructElement::H3, "H3", elementTypeBlock, attributeMapBlock },
567 { StructElement::H4, "H4", elementTypeBlock, attributeMapBlock },
568 { StructElement::H5, "H5", elementTypeBlock, attributeMapBlock },
569 { StructElement::H6, "H6", elementTypeBlock, attributeMapBlock },
570 { StructElement::L, "L", elementTypeBlock, attributeMapList },
571 { StructElement::LI, "LI", elementTypeBlock, attributeMapBlock },
572 { StructElement::Lbl, "Lbl", elementTypeBlock, attributeMapBlock },
573 { StructElement::LBody, "LBody", elementTypeBlock, attributeMapBlock },
574 { StructElement::Table, "Table", elementTypeBlock, attributeMapTable },
575 { StructElement::TR, "TR", elementTypeUndefined, attributeMapShared },
576 { StructElement::TH, "TH", elementTypeUndefined, attributeMapTableCell },
577 { StructElement::TD, "TD", elementTypeUndefined, attributeMapTableCell },
578 { StructElement::THead, "THead", elementTypeUndefined, attributeMapShared },
579 { StructElement::TFoot, "TFoot", elementTypeUndefined, attributeMapShared },
580 { StructElement::TBody, "TBody", elementTypeUndefined, attributeMapShared },
581 { StructElement::Figure, "Figure", elementTypeUndefined, attributeMapIllustration },
582 { StructElement::Formula, "Formula", elementTypeUndefined, attributeMapIllustration },
583 { StructElement::Form, "Form", elementTypeUndefined, attributeMapIllustration },
584 { StructElement::TOC, "TOC", elementTypeGrouping, attributeMapShared },
585 { StructElement::TOCI, "TOCI", elementTypeGrouping, attributeMapShared },
589 //------------------------------------------------------------------------
590 // Helpers for the attribute and structure type tables
591 //------------------------------------------------------------------------
593 static inline const AttributeMapEntry *
594 getAttributeMapEntry(const AttributeMapEntry **entryList, Attribute::Type type)
596 assert(entryList);
597 while (*entryList) {
598 const AttributeMapEntry *entry = *entryList;
599 while (entry->type != Attribute::Unknown) {
600 assert(entry->name);
601 if (type == entry->type)
602 return entry;
603 entry++;
605 entryList++;
607 return NULL;
610 static inline const AttributeMapEntry *
611 getAttributeMapEntry(const AttributeMapEntry **entryList, const char *name)
613 assert(entryList);
614 while (*entryList) {
615 const AttributeMapEntry *entry = *entryList;
616 while (entry->type != Attribute::Unknown) {
617 assert(entry->name);
618 if (strcmp(name, entry->name) == 0)
619 return entry;
620 entry++;
622 entryList++;
624 return NULL;
627 static inline const OwnerMapEntry *getOwnerMapEntry(Attribute::Owner owner)
629 for (unsigned i = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
630 if (owner == ownerMap[i].owner)
631 return &ownerMap[i];
633 return NULL;
636 static inline const OwnerMapEntry *getOwnerMapEntry(const char *name)
638 for (unsigned i = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
639 if (strcmp(name, ownerMap[i].name) == 0)
640 return &ownerMap[i];
642 return NULL;
645 static const char *ownerToName(Attribute::Owner owner)
647 const OwnerMapEntry *entry = getOwnerMapEntry(owner);
648 return entry ? entry->name : "UnknownOwner";
651 static Attribute::Owner nameToOwner(const char *name)
653 const OwnerMapEntry *entry = getOwnerMapEntry(name);
654 return entry ? entry->owner : Attribute::UnknownOwner;
657 static inline const TypeMapEntry *getTypeMapEntry(StructElement::Type type)
659 for (unsigned i = 0; i < sizeof(typeMap) / sizeof(typeMap[0]); i++) {
660 if (type == typeMap[i].type)
661 return &typeMap[i];
663 return NULL;
666 static inline const TypeMapEntry *getTypeMapEntry(const char *name)
668 for (unsigned i = 0; i < sizeof(typeMap) / sizeof(typeMap[0]); i++) {
669 if (strcmp(name, typeMap[i].name) == 0)
670 return &typeMap[i];
672 return NULL;
675 static const char *typeToName(StructElement::Type type)
677 if (type == StructElement::MCID)
678 return "MarkedContent";
679 if (type == StructElement::OBJR)
680 return "ObjectReference";
682 const TypeMapEntry *entry = getTypeMapEntry(type);
683 return entry ? entry->name : "Unknown";
686 static StructElement::Type nameToType(const char *name)
688 const TypeMapEntry *entry = getTypeMapEntry(name);
689 return entry ? entry->type : StructElement::Unknown;
693 //------------------------------------------------------------------------
694 // Attribute
695 //------------------------------------------------------------------------
697 Attribute::Attribute(const char *nameA, int nameLenA, Object *valueA):
698 type(UserProperty),
699 owner(UserProperties),
700 revision(0),
701 name(nameA, nameLenA),
702 value(),
703 hidden(gFalse),
704 formatted(NULL)
706 assert(valueA);
707 valueA->copy(&value);
710 Attribute::Attribute(Type type, Object *valueA):
711 type(type),
712 owner(UserProperties), // TODO: Determine corresponding owner from Type
713 revision(0),
714 name(),
715 value(),
716 hidden(gFalse),
717 formatted(NULL)
719 assert(valueA);
721 valueA->copy(&value);
723 if (!checkType())
724 type = Unknown;
727 Attribute::~Attribute()
729 delete formatted;
730 value.free();
733 const char *Attribute::getTypeName() const
735 if (type == UserProperty)
736 return name.getCString();
738 const AttributeMapEntry *entry = getAttributeMapEntry(attributeMapAll, type);
739 if (entry)
740 return entry->name;
742 return "Unknown";
745 const char *Attribute::getOwnerName() const
747 return ownerToName(owner);
750 Object *Attribute::getDefaultValue(Attribute::Type type)
752 const AttributeMapEntry *entry = getAttributeMapEntry(attributeMapAll, type);
753 return entry ? const_cast<Object*>(entry->defval) : NULL;
756 void Attribute::setFormattedValue(const char *formattedA)
758 if (formattedA) {
759 if (formatted)
760 formatted->Set(formattedA);
761 else
762 formatted = new GooString(formattedA);
763 } else {
764 delete formatted;
765 formatted = NULL;
769 GBool Attribute::checkType(StructElement *element)
771 // If an element is passed, tighther type-checking can be done.
772 if (!element)
773 return gTrue;
775 const TypeMapEntry *elementTypeEntry = getTypeMapEntry(element->getType());
776 if (elementTypeEntry && elementTypeEntry->attributes) {
777 const AttributeMapEntry *entry = getAttributeMapEntry(elementTypeEntry->attributes, type);
778 if (entry) {
779 if (entry->check && !((*entry->check)(&value))) {
780 return gFalse;
782 } else {
783 // No entry: the attribute is not valid for the containing element.
784 return gFalse;
788 return gTrue;
791 Attribute::Type Attribute::getTypeForName(const char *name, StructElement *element)
793 const AttributeMapEntry **attributes = attributeMapAll;
794 if (element) {
795 const TypeMapEntry *elementTypeEntry = getTypeMapEntry(element->getType());
796 if (elementTypeEntry && elementTypeEntry->attributes) {
797 attributes = elementTypeEntry->attributes;
801 const AttributeMapEntry *entry = getAttributeMapEntry(attributes, name);
802 return entry ? entry->type : Unknown;
805 Attribute *Attribute::parseUserProperty(Dict *property)
807 Object obj, value;
808 const char *name = NULL;
809 int nameLen = GooString::CALC_STRING_LEN;
811 if (property->lookup("N", &obj)->isString()) {
812 GooString *s = obj.getString();
813 name = s->getCString();
814 nameLen = s->getLength();
815 } else if (obj.isName())
816 name = obj.getName();
817 else {
818 error(errSyntaxError, -1, "N object is wrong type ({0:s})", obj.getTypeName());
819 obj.free();
820 return NULL;
823 if (property->lookup("V", &value)->isNull()) {
824 error(errSyntaxError, -1, "V object is wrong type ({0:s})", value.getTypeName());
825 value.free();
826 obj.free();
827 return NULL;
830 Attribute *attribute = new Attribute(name, nameLen, &value);
831 value.free();
832 obj.free();
834 if (property->lookup("F", &obj)->isString()) {
835 attribute->setFormattedValue(obj.getString()->getCString());
836 } else if (!obj.isNull()) {
837 error(errSyntaxWarning, -1, "F object is wrong type ({0:s})", obj.getTypeName());
839 obj.free();
841 if (property->lookup("H", &obj)->isBool()) {
842 attribute->setHidden(obj.getBool());
843 } else if (!obj.isNull()) {
844 error(errSyntaxWarning, -1, "H object is wrong type ({0:s})", obj.getTypeName());
846 obj.free();
848 return attribute;
852 //------------------------------------------------------------------------
853 // StructElement
854 //------------------------------------------------------------------------
856 StructElement::StructData::StructData():
857 altText(0),
858 actualText(0),
859 id(0),
860 title(0),
861 expandedAbbr(0),
862 language(0),
863 revision(0)
867 StructElement::StructData::~StructData()
869 delete altText;
870 delete actualText;
871 delete id;
872 delete title;
873 delete language;
874 parentRef.free();
875 for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i) delete *i;
876 for (AttrPtrArray::iterator i = attributes.begin(); i != attributes.end(); ++i) delete *i;
880 StructElement::StructElement(Dict *element,
881 StructTreeRoot *treeRootA,
882 StructElement *parentA,
883 std::set<int> &seen):
884 type(Unknown),
885 treeRoot(treeRootA),
886 parent(parentA),
887 s(new StructData())
889 assert(treeRoot);
890 assert(element);
892 parse(element);
893 parseChildren(element, seen);
896 StructElement::StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA):
897 type(MCID),
898 treeRoot(treeRootA),
899 parent(parentA),
900 c(new ContentData(mcid))
902 assert(treeRoot);
903 assert(parent);
906 StructElement::StructElement(const Ref& ref, StructTreeRoot *treeRootA, StructElement *parentA):
907 type(OBJR),
908 treeRoot(treeRootA),
909 parent(parentA),
910 c(new ContentData(ref))
912 assert(treeRoot);
913 assert(parent);
916 StructElement::~StructElement()
918 if (isContent())
919 delete c;
920 else
921 delete s;
922 pageRef.free();
925 GBool StructElement::isBlock() const
927 const TypeMapEntry *entry = getTypeMapEntry(type);
928 return entry ? (entry->elementType == elementTypeBlock) : gFalse;
931 GBool StructElement::isInline() const
933 const TypeMapEntry *entry = getTypeMapEntry(type);
934 return entry ? (entry->elementType == elementTypeInline) : gFalse;
937 GBool StructElement::isGrouping() const
939 const TypeMapEntry *entry = getTypeMapEntry(type);
940 return entry ? (entry->elementType == elementTypeGrouping) : gFalse;
943 GBool StructElement::hasPageRef() const
945 return pageRef.isRef() || (parent && parent->hasPageRef());
948 bool StructElement::getPageRef(Ref& ref) const
950 if (pageRef.isRef()) {
951 ref = pageRef.getRef();
952 return gTrue;
955 if (parent)
956 return parent->getPageRef(ref);
958 return gFalse;
961 const char *StructElement::getTypeName() const
963 return typeToName(type);
966 const Attribute *StructElement::findAttribute(Attribute::Type attributeType, GBool inherit,
967 Attribute::Owner attributeOwner) const
969 if (isContent())
970 return parent->findAttribute(attributeType, inherit, attributeOwner);
972 if (attributeType == Attribute::Unknown || attributeType == Attribute::UserProperty)
973 return NULL;
975 const Attribute *result = NULL;
977 if (attributeOwner == Attribute::UnknownOwner) {
978 // Search for the attribute, no matter who the owner is
979 for (unsigned i = 0; i < getNumAttributes(); i++) {
980 const Attribute *attr = getAttribute(i);
981 if (attributeType == attr->getType()) {
982 if (!result || ownerHasMorePriority(attr->getOwner(), result->getOwner()))
983 result = attr;
986 } else {
987 // Search for the attribute, with a specific owner
988 for (unsigned i = 0; i < getNumAttributes(); i++) {
989 const Attribute *attr = getAttribute(i);
990 if (attributeType == attr->getType() && attributeOwner == attr->getOwner()) {
991 result = attr;
992 break;
997 if (result)
998 return result;
1000 if (inherit && parent) {
1001 const AttributeMapEntry *entry = getAttributeMapEntry(attributeMapAll, attributeType);
1002 assert(entry);
1003 // TODO: Take into account special inheritance cases, for example:
1004 // inline elements which have been changed to be block using
1005 // "/Placement/Block" have slightly different rules.
1006 if (entry->inherit)
1007 return parent->findAttribute(attributeType, inherit, attributeOwner);
1010 return NULL;
1013 GooString* StructElement::appendSubTreeText(GooString *string, GBool recursive) const
1015 if (isContent() && !isObjectRef()) {
1016 MarkedContentOutputDev mcdev(getMCID());
1017 const TextSpanArray& spans(getTextSpansInternal(mcdev));
1019 if (!string)
1020 string = new GooString();
1022 for (TextSpanArray::const_iterator i = spans.begin(); i != spans.end(); ++i)
1023 string->append(i->getText());
1025 return string;
1028 if (!recursive)
1029 return NULL;
1031 // Do a depth-first traversal, to get elements in logical order
1032 if (!string)
1033 string = new GooString();
1035 for (unsigned i = 0; i < getNumChildren(); i++)
1036 getChild(i)->appendSubTreeText(string, recursive);
1038 return string;
1041 const TextSpanArray& StructElement::getTextSpansInternal(MarkedContentOutputDev& mcdev) const
1043 assert(isContent());
1045 int startPage = 0, endPage = 0;
1047 Ref ref;
1048 if (getPageRef(ref)) {
1049 startPage = endPage = treeRoot->getDoc()->findPage(ref.num, ref.gen);
1052 if (!(startPage && endPage)) {
1053 startPage = 1;
1054 endPage = treeRoot->getDoc()->getNumPages();
1057 treeRoot->getDoc()->displayPages(&mcdev, startPage, endPage, 72.0, 72.0, 0, gTrue, gFalse, gFalse);
1058 return mcdev.getTextSpans();
1061 static StructElement::Type roleMapResolve(Dict *roleMap, const char *name, const char *curName, Object *resolved)
1063 // Circular reference
1064 if (curName && !strcmp(name, curName))
1065 return StructElement::Unknown;
1067 if (roleMap->lookup(curName ? curName : name, resolved)->isName()) {
1068 StructElement::Type type = nameToType(resolved->getName());
1069 return type == StructElement::Unknown
1070 ? roleMapResolve(roleMap, name, resolved->getName(), resolved)
1071 : type;
1074 if (!resolved->isNull())
1075 error(errSyntaxWarning, -1, "RoleMap entry is wrong type ({0:s})", resolved->getTypeName());
1076 return StructElement::Unknown;
1079 void StructElement::parse(Dict *element)
1081 Object obj;
1083 // Type is optional, but if present must be StructElem
1084 if (!element->lookup("Type", &obj)->isNull() && !obj.isName("StructElem")) {
1085 error(errSyntaxError, -1, "Type of StructElem object is wrong");
1086 obj.free();
1087 return;
1089 obj.free();
1091 // Parent object reference (required).
1092 if (!element->lookupNF("P", &s->parentRef)->isRef()) {
1093 error(errSyntaxError, -1, "P object is wrong type ({0:s})", obj.getTypeName());
1094 return;
1097 // Check whether the S-type is valid for the top level
1098 // element and create a node of the appropriate type.
1099 if (!element->lookup("S", &obj)->isName()) {
1100 error(errSyntaxError, -1, "S object is wrong type ({0:s})", obj.getTypeName());
1101 obj.free();
1102 return;
1105 // Type name may not be standard, resolve through RoleMap first.
1106 if (treeRoot->getRoleMap()) {
1107 Object resolvedName;
1108 type = roleMapResolve(treeRoot->getRoleMap(), obj.getName(), NULL, &resolvedName);
1111 // Resolving through RoleMap may leave type as Unknown, e.g. for types
1112 // which are not present in it, yet they are standard element types.
1113 if (type == Unknown)
1114 type = nameToType(obj.getName());
1116 // At this point either the type name must have been resolved.
1117 if (type == Unknown) {
1118 error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
1119 obj.free();
1120 return;
1122 obj.free();
1124 // Object ID (optional), to be looked at the IDTree in the tree root.
1125 if (element->lookup("ID", &obj)->isString()) {
1126 s->id = obj.takeString();
1128 obj.free();
1130 // Page reference (optional) in which at least one of the child items
1131 // is to be rendered in. Note: each element stores only the /Pg value
1132 // contained by it, and StructElement::getPageRef() may look in parent
1133 // elements to find the page where an element belongs.
1134 element->lookupNF("Pg", &pageRef);
1136 // Revision number (optional).
1137 if (element->lookup("R", &obj)->isInt()) {
1138 s->revision = obj.getInt();
1140 obj.free();
1142 // Element title (optional).
1143 if (element->lookup("T", &obj)->isString()) {
1144 s->title = obj.takeString();
1146 obj.free();
1148 // Language (optional).
1149 if (element->lookup("Lang", &obj)->isString()) {
1150 s->language = obj.takeString();
1152 obj.free();
1154 // Alternative text (optional).
1155 if (element->lookup("Alt", &obj)->isString()) {
1156 s->altText = obj.takeString();
1158 obj.free();
1160 // Expanded form of an abbreviation (optional).
1161 if (element->lookup("E", &obj)->isString()) {
1162 s->expandedAbbr = obj.takeString();
1164 obj.free();
1166 // Actual text (optional).
1167 if (element->lookup("ActualText", &obj)->isString()) {
1168 s->actualText = obj.takeString();
1170 obj.free();
1172 // Attributes directly attached to the element (optional).
1173 if (element->lookup("A", &obj)->isDict()) {
1174 parseAttributes(obj.getDict());
1175 } else if (obj.isArray()) {
1176 Object iobj;
1177 unsigned attrIndex = getNumAttributes();
1178 for (int i = 0; i < obj.arrayGetLength(); i++) {
1179 if (obj.arrayGet(i, &iobj)->isDict()) {
1180 attrIndex = getNumAttributes();
1181 parseAttributes(iobj.getDict());
1182 } else if (iobj.isInt()) {
1183 const int revision = iobj.getInt();
1184 // Set revision numbers for the elements previously created.
1185 for (unsigned j = attrIndex; j < getNumAttributes(); j++)
1186 getAttribute(j)->setRevision(revision);
1187 } else {
1188 error(errSyntaxWarning, -1, "A item is wrong type ({0:s})", iobj.getTypeName());
1190 iobj.free();
1192 } else if (!obj.isNull()) {
1193 error(errSyntaxWarning, -1, "A is wrong type ({0:s})", obj.getTypeName());
1195 obj.free();
1197 // Attributes referenced indirectly through the ClassMap (optional).
1198 if (treeRoot->getClassMap()) {
1199 Object classes;
1200 if (element->lookup("C", &classes)->isName()) {
1201 Object attr;
1202 if (treeRoot->getClassMap()->lookup(classes.getName(), &attr)->isDict()) {
1203 parseAttributes(attr.getDict(), gTrue);
1204 } else if (attr.isArray()) {
1205 for (int i = 0; i < attr.arrayGetLength(); i++) {
1206 Object iobj;
1207 unsigned attrIndex = getNumAttributes();
1208 if (attr.arrayGet(i, &iobj)->isDict()) {
1209 attrIndex = getNumAttributes();
1210 parseAttributes(iobj.getDict(), gTrue);
1211 } else if (iobj.isInt()) {
1212 // Set revision numbers for the elements previously created.
1213 const int revision = iobj.getInt();
1214 for (unsigned j = attrIndex; j < getNumAttributes(); j++)
1215 getAttribute(j)->setRevision(revision);
1216 } else {
1217 error(errSyntaxWarning, -1, "C item is wrong type ({0:s})", iobj.getTypeName());
1219 iobj.free();
1221 } else if (!attr.isNull()) {
1222 error(errSyntaxWarning, -1, "C object is wrong type ({0:s})", classes.getTypeName());
1224 classes.free();
1225 attr.free();
1230 StructElement *StructElement::parseChild(Object *ref,
1231 Object *childObj,
1232 std::set<int> &seen)
1234 assert(childObj);
1235 assert(ref);
1237 StructElement *child = NULL;
1239 if (childObj->isInt()) {
1240 child = new StructElement(childObj->getInt(), treeRoot, this);
1241 } else if (childObj->isDict("MCR")) {
1243 * TODO: The optional Stm/StwOwn attributes are not handled, so all the
1244 * page will be always scanned when calling StructElement::getText().
1246 Object mcidObj;
1247 Object pageRefObj;
1249 if (!childObj->dictLookup("MCID", &mcidObj)->isInt()) {
1250 error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
1251 mcidObj.free();
1252 return NULL;
1255 child = new StructElement(mcidObj.getInt(), treeRoot, this);
1256 mcidObj.free();
1258 if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) {
1259 child->pageRef = pageRefObj;
1260 } else {
1261 pageRefObj.free();
1263 } else if (childObj->isDict("OBJR")) {
1264 Object refObj;
1266 if (childObj->dictLookupNF("Obj", &refObj)->isRef()) {
1267 Object pageRefObj;
1269 child = new StructElement(refObj.getRef(), treeRoot, this);
1271 if (childObj->dictLookupNF("Pg", &pageRefObj)->isRef()) {
1272 child->pageRef = pageRefObj;
1273 } else {
1274 pageRefObj.free();
1276 } else {
1277 error(errSyntaxError, -1, "Obj object is wrong type ({0:s})", refObj.getTypeName());
1279 refObj.free();
1280 } else if (childObj->isDict()) {
1281 if (!ref->isRef()) {
1282 error(errSyntaxError, -1,
1283 "Structure element dictionary is not an indirect reference ({0:s})",
1284 ref->getTypeName());
1285 } else if (seen.find(ref->getRefNum()) == seen.end()) {
1286 seen.insert(ref->getRefNum());
1287 child = new StructElement(childObj->getDict(), treeRoot, this, seen);
1288 } else {
1289 error(errSyntaxWarning, -1,
1290 "Loop detected in structure tree, skipping subtree at object {0:d}:{1:d}",
1291 ref->getRefNum(), ref->getRefGen());
1293 } else {
1294 error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", childObj->getTypeName());
1297 if (child) {
1298 if (child->isOk()) {
1299 appendChild(child);
1300 if (ref->isRef())
1301 treeRoot->parentTreeAdd(ref->getRef(), child);
1302 } else {
1303 delete child;
1304 child = NULL;
1308 return child;
1311 void StructElement::parseChildren(Dict *element, std::set<int> &seen)
1313 Object kids;
1315 if (element->lookup("K", &kids)->isArray()) {
1316 for (int i = 0; i < kids.arrayGetLength(); i++) {
1317 Object obj, ref;
1318 parseChild(kids.arrayGetNF(i, &ref), kids.arrayGet(i, &obj), seen);
1319 obj.free();
1320 ref.free();
1322 } else if (kids.isDict() || kids.isInt()) {
1323 Object ref;
1324 parseChild(element->lookupNF("K", &ref), &kids, seen);
1325 ref.free();
1328 kids.free();
1331 void StructElement::parseAttributes(Dict *attributes, GBool keepExisting)
1333 Object owner;
1334 if (attributes->lookup("O", &owner)->isName("UserProperties")) {
1335 // In this case /P is an array of UserProperty dictionaries
1336 Object userProperties;
1337 if (attributes->lookup("P", &userProperties)->isArray()) {
1338 for (int i = 0; i < userProperties.arrayGetLength(); i++) {
1339 Object property;
1340 if (userProperties.arrayGet(i, &property)->isDict()) {
1341 Attribute *attribute = Attribute::parseUserProperty(property.getDict());
1342 if (attribute && attribute->isOk()) {
1343 appendAttribute(attribute);
1344 } else {
1345 error(errSyntaxWarning, -1, "Item in P is invalid");
1346 delete attribute;
1348 } else {
1349 error(errSyntaxWarning, -1, "Item in P is wrong type ({0:s})", property.getTypeName());
1351 property.free();
1354 userProperties.free();
1355 } else if (owner.isName()) {
1356 // In this case /P contains standard attributes.
1357 // Check first if the owner is a valid standard one.
1358 Attribute::Owner ownerValue = nameToOwner(owner.getName());
1359 if (ownerValue != Attribute::UnknownOwner) {
1360 // Iterate over the entries of the "attributes" dictionary.
1361 // The /O entry (owner) is skipped.
1362 for (int i = 0; i < attributes->getLength(); i++) {
1363 const char *key = attributes->getKey(i);
1364 if (strcmp(key, "O") != 0) {
1365 Attribute::Type type = Attribute::getTypeForName(key, this);
1367 // Check if the attribute is already defined.
1368 if (keepExisting) {
1369 GBool exists = gFalse;
1370 for (unsigned j = 0; j < getNumAttributes(); j++) {
1371 if (getAttribute(j)->getType() == type) {
1372 exists = gTrue;
1373 break;
1376 if (exists)
1377 continue;
1380 if (type != Attribute::Unknown) {
1381 Object value;
1382 GBool typeCheckOk = gTrue;
1383 Attribute *attribute = new Attribute(type, attributes->getVal(i, &value));
1384 value.free();
1386 if (attribute->isOk() && (typeCheckOk = attribute->checkType(this))) {
1387 appendAttribute(attribute);
1388 } else {
1389 // It is not needed to free "value", the Attribute instance
1390 // owns the contents, so deleting "attribute" is enough.
1391 if (!typeCheckOk) {
1392 error(errSyntaxWarning, -1, "Attribute {0:s} value is of wrong type ({1:s})",
1393 attribute->getTypeName(), attribute->getValue()->getTypeName());
1395 delete attribute;
1397 } else {
1398 error(errSyntaxWarning, -1, "Wrong Attribute '{0:s}' in element {1:s}", key, getTypeName());
1402 } else {
1403 error(errSyntaxWarning, -1, "O object is invalid value ({0:s})", owner.getName());
1405 } else if (!owner.isNull()) {
1406 error(errSyntaxWarning, -1, "O is wrong type ({0:s})", owner.getTypeName());
1408 owner.free();