1 //========================================================================
5 // This file is licensed under the GPLv2 or later
7 // Copyright 2013, 2014 Igalia S.L.
8 // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com>
9 // Copyright 2014 Albert Astals Cid <aacid@kde.org>
10 // Copyright 2015 Dmytro Morgun <lztoad@gmail.com>
12 //========================================================================
14 #ifdef USE_GCC_PRAGMAS
18 #include "StructElement.h"
19 #include "StructTreeRoot.h"
20 #include "GlobalParams.h"
21 #include "UnicodeMap.h"
30 static GBool
isPlacementName(Object
*value
)
32 return value
->isName("Block")
33 || value
->isName("Inline")
34 || value
->isName("Before")
35 || value
->isName("Start")
36 || value
->isName("End");
39 static GBool
isWritingModeName(Object
*value
)
41 return value
->isName("LrTb")
42 || value
->isName("RlTb")
43 || value
->isName("TbRl");
46 static GBool
isBorderStyleName(Object
*value
)
48 return value
->isName("None")
49 || value
->isName("Hidden")
50 || value
->isName("Dotted")
51 || value
->isName("Dashed")
52 || value
->isName("Solid")
53 || value
->isName("Double")
54 || value
->isName("Groove")
55 || value
->isName("Ridge")
56 || value
->isName("Inset")
57 || value
->isName("Outset");
60 static GBool
isTextAlignName(Object
*value
)
62 return value
->isName("Start")
63 || value
->isName("End")
64 || value
->isName("Center")
65 || value
->isName("Justify");
68 static GBool
isBlockAlignName(Object
*value
)
70 return value
->isName("Before")
71 || value
->isName("Middle")
72 || value
->isName("After")
73 || value
->isName("Justify");
76 static GBool
isInlineAlignName(Object
*value
)
78 return value
->isName("Start")
79 || value
->isName("End")
80 || value
->isName("Center");
83 static GBool
isNumber(Object
*value
)
85 return value
->isNum();
88 static GBool
isLineHeight(Object
*value
)
90 return value
->isName("Normal")
91 || value
->isName("Auto")
95 static GBool
isTextDecorationName(Object
*value
)
97 return value
->isName("None")
98 || value
->isName("Underline")
99 || value
->isName("Overline")
100 || value
->isName("LineThrough");
103 static GBool
isRubyAlignName(Object
*value
)
105 return value
->isName("Start")
106 || value
->isName("End")
107 || value
->isName("Center")
108 || value
->isName("Justify")
109 || value
->isName("Distribute");
112 static GBool
isRubyPositionName(Object
*value
)
114 return value
->isName("Before")
115 || value
->isName("After")
116 || value
->isName("Warichu")
117 || value
->isName("Inline");
120 static GBool
isGlyphOrientationName(Object
*value
)
122 return value
->isName("Auto")
123 || value
->isName("90")
124 || value
->isName("180")
125 || value
->isName("270")
126 || value
->isName("360")
127 || value
->isName("-90")
128 || value
->isName("-180");
131 static GBool
isListNumberingName(Object
*value
)
133 return value
->isName("None")
134 || value
->isName("Disc")
135 || value
->isName("Circle")
136 || value
->isName("Square")
137 || value
->isName("Decimal")
138 || value
->isName("UpperRoman")
139 || value
->isName("LowerRoman")
140 || value
->isName("UpperAlpha")
141 || value
->isName("LowerAlpha");
144 static GBool
isFieldRoleName(Object
*value
)
146 return value
->isName("rb")
147 || value
->isName("cb")
148 || value
->isName("pb")
149 || value
->isName("tv");
152 static GBool
isFieldCheckedName(Object
*value
)
154 return value
->isName("on")
155 || value
->isName("off")
156 || value
->isName("neutral");
159 static GBool
isTableScopeName(Object
*value
)
161 return value
->isName("Row")
162 || value
->isName("Column")
163 || value
->isName("Both");
166 static GBool
isRGBColor(Object
*value
)
168 if (!(value
->isArray() && value
->arrayGetLength() == 3))
172 for (int i
= 0; i
< 3; i
++) {
174 if (!value
->arrayGet(i
, &obj
)->isNum()) {
179 if (obj
.getNum() < 0.0 || obj
.getNum() > 1.0) {
190 static GBool
isNatural(Object
*value
)
192 return (value
->isInt() && value
->getInt() > 0)
193 || (value
->isInt64() && value
->getInt64() > 0);
196 static GBool
isPositive(Object
*value
)
198 return value
->isNum() && value
->getNum() >= 0.0;
201 static GBool
isNumberOrAuto(Object
*value
)
203 return isNumber(value
) || value
->isName("Auto");
206 static GBool
isTextString(Object
*value
)
208 // XXX: Shall isName() also be checked?
209 return value
->isString();
213 #define ARRAY_CHECKER(name, checkItem, length, allowSingle, allowNulls) \
214 static GBool name(Object *value) { \
215 if (!value->isArray()) \
216 return allowSingle ? checkItem(value) : gFalse; \
218 if (length && value->arrayGetLength() != length) \
221 GBool okay = gTrue; \
222 for (int i = 0; i < value->arrayGetLength(); i++) { \
224 value->arrayGet(i, &obj); \
225 if ((!allowNulls && obj.isNull()) || !checkItem(&obj)) { \
235 ARRAY_CHECKER(isRGBColorOrOptionalArray4
, isRGBColor
, 4, gTrue
, gTrue
);
236 ARRAY_CHECKER(isPositiveOrOptionalArray4
, isPositive
, 4, gTrue
, gTrue
);
237 ARRAY_CHECKER(isPositiveOrArray4
, isPositive
, 4, gTrue
, gFalse
);
238 ARRAY_CHECKER(isBorderStyle
, isBorderStyleName
, 4, gTrue
, gTrue
);
239 ARRAY_CHECKER(isNumberArray4
, isNumber
, 4, gFalse
, gFalse
);
240 ARRAY_CHECKER(isNumberOrArrayN
, isNumber
, 0, gTrue
, gFalse
);
241 ARRAY_CHECKER(isTableHeaders
, isTextString
, 0, gFalse
, gFalse
);
244 // Type of functions used to do type-checking on attribute values
245 typedef GBool (*AttributeCheckFunc
)(Object
*);
247 // Maps attributes to their names and whether the attribute can be inherited.
248 struct AttributeMapEntry
{
249 Attribute::Type type
;
251 const Object
*defval
;
253 AttributeCheckFunc check
;
256 struct AttributeDefaults
{
269 AttributeDefaults() {
270 Inline
.initName("Inline");
271 LrTb
.initName("LrTb");
272 Normal
.initName("Normal");
273 Distribute
.initName("Distribute");
277 Auto
.initName("Auto");
278 Start
.initName("Start");
279 None
.initName("None");
280 Before
.initName("Before");
284 ~AttributeDefaults() {
299 static const AttributeDefaults attributeDefaults
;
302 #define ATTR_LIST_END \
303 { Attribute::Unknown, NULL, NULL, gFalse, NULL }
305 #define ATTR_WITH_DEFAULT(name, inherit, check, defval) \
308 &attributeDefaults.defval, \
312 #define ATTR(name, inherit, check) \
319 static const AttributeMapEntry attributeMapCommonShared
[] =
321 ATTR_WITH_DEFAULT(Placement
, gFalse
, isPlacementName
, Inline
),
322 ATTR_WITH_DEFAULT(WritingMode
, gTrue
, isWritingModeName
, LrTb
),
323 ATTR (BackgroundColor
, gFalse
, isRGBColor
),
324 ATTR (BorderColor
, gTrue
, isRGBColorOrOptionalArray4
),
325 ATTR_WITH_DEFAULT(BorderStyle
, gFalse
, isBorderStyle
, None
),
326 ATTR (BorderThickness
, gTrue
, isPositiveOrOptionalArray4
),
327 ATTR_WITH_DEFAULT(Padding
, gFalse
, isPositiveOrArray4
, Zero
),
328 ATTR (Color
, gTrue
, isRGBColor
),
332 static const AttributeMapEntry attributeMapCommonBlock
[] =
334 ATTR_WITH_DEFAULT(SpaceBefore
, gFalse
, isPositive
, Zero
),
335 ATTR_WITH_DEFAULT(SpaceAfter
, gFalse
, isPositive
, Zero
),
336 ATTR_WITH_DEFAULT(StartIndent
, gTrue
, isNumber
, Zero
),
337 ATTR_WITH_DEFAULT(EndIndent
, gTrue
, isNumber
, Zero
),
338 ATTR_WITH_DEFAULT(TextIndent
, gTrue
, isNumber
, Zero
),
339 ATTR_WITH_DEFAULT(TextAlign
, gTrue
, isTextAlignName
, Start
),
340 ATTR (BBox
, gFalse
, isNumberArray4
),
341 ATTR_WITH_DEFAULT(Width
, gFalse
, isNumberOrAuto
, Auto
),
342 ATTR_WITH_DEFAULT(Height
, gFalse
, isNumberOrAuto
, Auto
),
343 ATTR_WITH_DEFAULT(BlockAlign
, gTrue
, isBlockAlignName
, Before
),
344 ATTR_WITH_DEFAULT(InlineAlign
, gTrue
, isInlineAlignName
, Start
),
348 static const AttributeMapEntry attributeMapCommonInline
[] =
350 ATTR_WITH_DEFAULT(BaselineShift
, gFalse
, isNumber
, Zero
),
351 ATTR_WITH_DEFAULT(LineHeight
, gTrue
, isLineHeight
, Normal
),
352 ATTR (TextDecorationColor
, gTrue
, isRGBColor
),
353 ATTR (TextDecorationThickness
, gTrue
, isPositive
),
354 ATTR_WITH_DEFAULT(TextDecorationType
, gFalse
, isTextDecorationName
, None
),
355 ATTR_WITH_DEFAULT(GlyphOrientationVertical
, gTrue
, isGlyphOrientationName
, Auto
),
359 static const AttributeMapEntry attributeMapCommonRubyText
[] =
361 ATTR_WITH_DEFAULT(RubyPosition
, gTrue
, isRubyPositionName
, Before
),
362 ATTR_WITH_DEFAULT(RubyAlign
, gTrue
, isRubyAlignName
, Distribute
),
366 static const AttributeMapEntry attributeMapCommonColumns
[] =
368 ATTR_WITH_DEFAULT(ColumnCount
, gFalse
, isNatural
, Nat1
),
369 ATTR (ColumnGap
, gFalse
, isNumberOrArrayN
),
370 ATTR (ColumnWidths
, gFalse
, isNumberOrArrayN
),
374 static const AttributeMapEntry attributeMapCommonList
[] = {
375 ATTR_WITH_DEFAULT(ListNumbering
, gTrue
, isListNumberingName
, None
),
379 static const AttributeMapEntry attributeMapCommonPrintField
[] =
381 ATTR (Role
, gFalse
, isFieldRoleName
),
382 ATTR_WITH_DEFAULT(checked
, gFalse
, isFieldCheckedName
, off
),
383 ATTR (Desc
, gFalse
, isTextString
),
387 static const AttributeMapEntry attributeMapCommonTable
[] =
389 ATTR(Headers
, gFalse
, isTableHeaders
),
390 ATTR(Scope
, gFalse
, isTableScopeName
),
391 ATTR(Summary
, gFalse
, isTextString
),
395 static const AttributeMapEntry attributeMapCommonTableCell
[] =
397 ATTR_WITH_DEFAULT(RowSpan
, gFalse
, isNatural
, Nat1
),
398 ATTR_WITH_DEFAULT(ColSpan
, gFalse
, isNatural
, Nat1
),
399 ATTR_WITH_DEFAULT(TBorderStyle
, gTrue
, isBorderStyle
, None
),
400 ATTR_WITH_DEFAULT(TPadding
, gTrue
, isPositiveOrArray4
, Zero
),
404 #undef ATTR_WITH_DEFAULT
408 static const AttributeMapEntry
*attributeMapAll
[] = {
409 attributeMapCommonShared
,
410 attributeMapCommonBlock
,
411 attributeMapCommonInline
,
412 attributeMapCommonRubyText
,
413 attributeMapCommonColumns
,
414 attributeMapCommonList
,
415 attributeMapCommonPrintField
,
416 attributeMapCommonTable
,
417 attributeMapCommonTableCell
,
421 static const AttributeMapEntry
*attributeMapShared
[] = {
422 attributeMapCommonShared
,
426 static const AttributeMapEntry
*attributeMapBlock
[] = {
427 attributeMapCommonShared
,
428 attributeMapCommonBlock
,
432 static const AttributeMapEntry
*attributeMapInline
[] = {
433 attributeMapCommonShared
,
434 attributeMapCommonInline
,
438 static const AttributeMapEntry
*attributeMapTableCell
[] = {
439 attributeMapCommonShared
,
440 attributeMapCommonBlock
,
441 attributeMapCommonTable
,
442 attributeMapCommonTableCell
,
446 static const AttributeMapEntry
*attributeMapRubyText
[] = {
447 attributeMapCommonShared
,
448 attributeMapCommonInline
,
449 attributeMapCommonRubyText
,
453 static const AttributeMapEntry
*attributeMapColumns
[] = {
454 attributeMapCommonShared
,
455 attributeMapCommonInline
,
456 attributeMapCommonColumns
,
460 static const AttributeMapEntry
*attributeMapList
[] = {
461 attributeMapCommonShared
,
462 attributeMapCommonList
,
466 static const AttributeMapEntry
*attributeMapTable
[] = {
467 attributeMapCommonShared
,
468 attributeMapCommonBlock
,
469 attributeMapCommonTable
,
473 static const AttributeMapEntry
*attributeMapIllustration
[] = {
474 // XXX: Illustrations may have some attributes from the "shared", "inline",
475 // the "block" sets. This is a loose specification; making it better
476 // means duplicating entries from the sets. This seems good enough...
477 attributeMapCommonShared
,
478 attributeMapCommonBlock
,
479 attributeMapCommonInline
,
483 // Table mapping owners of attributes to their names.
484 static const struct OwnerMapEntry
{
485 Attribute::Owner owner
;
488 // XXX: Those are sorted in the owner priority resolution order. If the
489 // same attribute is defined with two owners, the order in the table
490 // can be used to know which one has more priority.
491 { Attribute::XML_1_00
, "XML-1.00" },
492 { Attribute::HTML_3_20
, "HTML-3.20" },
493 { Attribute::HTML_4_01
, "HTML-4.01" },
494 { Attribute::OEB_1_00
, "OEB-1.00" },
495 { Attribute::RTF_1_05
, "RTF-1.05" },
496 { Attribute::CSS_1_00
, "CSS-1.00" },
497 { Attribute::CSS_2_00
, "CSS-2.00" },
498 { Attribute::Layout
, "Layout" },
499 { Attribute::PrintField
, "PrintField" },
500 { Attribute::Table
, "Table" },
501 { Attribute::List
, "List" },
502 { Attribute::UserProperties
, "UserProperties" },
506 static GBool
ownerHasMorePriority(Attribute::Owner a
, Attribute::Owner b
)
508 unsigned aIndex
, bIndex
;
510 for (unsigned i
= aIndex
= bIndex
= 0; i
< sizeof(ownerMap
) / sizeof(ownerMap
[0]); i
++) {
511 if (ownerMap
[i
].owner
== a
)
513 if (ownerMap
[i
].owner
== b
)
517 return aIndex
< bIndex
;
521 // Maps element types to their names and also serves as lookup table
522 // for additional element type attributes.
525 elementTypeUndefined
,
531 static const struct TypeMapEntry
{
532 StructElement::Type type
;
534 ElementType elementType
;
535 const AttributeMapEntry
**attributes
;
537 { StructElement::Document
, "Document", elementTypeGrouping
, attributeMapShared
},
538 { StructElement::Part
, "Part", elementTypeGrouping
, attributeMapShared
},
539 { StructElement::Art
, "Art", elementTypeGrouping
, attributeMapColumns
},
540 { StructElement::Sect
, "Sect", elementTypeGrouping
, attributeMapColumns
},
541 { StructElement::Div
, "Div", elementTypeGrouping
, attributeMapColumns
},
542 { StructElement::BlockQuote
, "BlockQuote", elementTypeGrouping
, attributeMapInline
},
543 { StructElement::Caption
, "Caption", elementTypeGrouping
, attributeMapInline
},
544 { StructElement::NonStruct
, "NonStruct", elementTypeGrouping
, attributeMapInline
},
545 { StructElement::Index
, "Index", elementTypeGrouping
, attributeMapInline
},
546 { StructElement::Private
, "Private", elementTypeGrouping
, attributeMapInline
},
547 { StructElement::Span
, "Span", elementTypeInline
, attributeMapInline
},
548 { StructElement::Quote
, "Quote", elementTypeInline
, attributeMapInline
},
549 { StructElement::Note
, "Note", elementTypeInline
, attributeMapInline
},
550 { StructElement::Reference
, "Reference", elementTypeInline
, attributeMapInline
},
551 { StructElement::BibEntry
, "BibEntry", elementTypeInline
, attributeMapInline
},
552 { StructElement::Code
, "Code", elementTypeInline
, attributeMapInline
},
553 { StructElement::Link
, "Link", elementTypeInline
, attributeMapInline
},
554 { StructElement::Annot
, "Annot", elementTypeInline
, attributeMapInline
},
555 { StructElement::Ruby
, "Ruby", elementTypeInline
, attributeMapRubyText
},
556 { StructElement::RB
, "RB", elementTypeUndefined
, attributeMapRubyText
},
557 { StructElement::RT
, "RT", elementTypeUndefined
, attributeMapRubyText
},
558 { StructElement::RP
, "RP", elementTypeUndefined
, attributeMapShared
},
559 { StructElement::Warichu
, "Warichu", elementTypeInline
, attributeMapRubyText
},
560 { StructElement::WT
, "WT", elementTypeUndefined
, attributeMapShared
},
561 { StructElement::WP
, "WP", elementTypeUndefined
, attributeMapShared
},
562 { StructElement::P
, "P", elementTypeBlock
, attributeMapBlock
},
563 { StructElement::H
, "H", elementTypeBlock
, attributeMapBlock
},
564 { StructElement::H1
, "H1", elementTypeBlock
, attributeMapBlock
},
565 { StructElement::H2
, "H2", elementTypeBlock
, attributeMapBlock
},
566 { StructElement::H3
, "H3", elementTypeBlock
, attributeMapBlock
},
567 { StructElement::H4
, "H4", elementTypeBlock
, attributeMapBlock
},
568 { StructElement::H5
, "H5", elementTypeBlock
, attributeMapBlock
},
569 { StructElement::H6
, "H6", elementTypeBlock
, attributeMapBlock
},
570 { StructElement::L
, "L", elementTypeBlock
, attributeMapList
},
571 { StructElement::LI
, "LI", elementTypeBlock
, attributeMapBlock
},
572 { StructElement::Lbl
, "Lbl", elementTypeBlock
, attributeMapBlock
},
573 { StructElement::LBody
, "LBody", elementTypeBlock
, attributeMapBlock
},
574 { StructElement::Table
, "Table", elementTypeBlock
, attributeMapTable
},
575 { StructElement::TR
, "TR", elementTypeUndefined
, attributeMapShared
},
576 { StructElement::TH
, "TH", elementTypeUndefined
, attributeMapTableCell
},
577 { StructElement::TD
, "TD", elementTypeUndefined
, attributeMapTableCell
},
578 { StructElement::THead
, "THead", elementTypeUndefined
, attributeMapShared
},
579 { StructElement::TFoot
, "TFoot", elementTypeUndefined
, attributeMapShared
},
580 { StructElement::TBody
, "TBody", elementTypeUndefined
, attributeMapShared
},
581 { StructElement::Figure
, "Figure", elementTypeUndefined
, attributeMapIllustration
},
582 { StructElement::Formula
, "Formula", elementTypeUndefined
, attributeMapIllustration
},
583 { StructElement::Form
, "Form", elementTypeUndefined
, attributeMapIllustration
},
584 { StructElement::TOC
, "TOC", elementTypeGrouping
, attributeMapShared
},
585 { StructElement::TOCI
, "TOCI", elementTypeGrouping
, attributeMapShared
},
589 //------------------------------------------------------------------------
590 // Helpers for the attribute and structure type tables
591 //------------------------------------------------------------------------
593 static inline const AttributeMapEntry
*
594 getAttributeMapEntry(const AttributeMapEntry
**entryList
, Attribute::Type type
)
598 const AttributeMapEntry
*entry
= *entryList
;
599 while (entry
->type
!= Attribute::Unknown
) {
601 if (type
== entry
->type
)
610 static inline const AttributeMapEntry
*
611 getAttributeMapEntry(const AttributeMapEntry
**entryList
, const char *name
)
615 const AttributeMapEntry
*entry
= *entryList
;
616 while (entry
->type
!= Attribute::Unknown
) {
618 if (strcmp(name
, entry
->name
) == 0)
627 static inline const OwnerMapEntry
*getOwnerMapEntry(Attribute::Owner owner
)
629 for (unsigned i
= 0; i
< sizeof(ownerMap
) / sizeof(ownerMap
[0]); i
++) {
630 if (owner
== ownerMap
[i
].owner
)
636 static inline const OwnerMapEntry
*getOwnerMapEntry(const char *name
)
638 for (unsigned i
= 0; i
< sizeof(ownerMap
) / sizeof(ownerMap
[0]); i
++) {
639 if (strcmp(name
, ownerMap
[i
].name
) == 0)
645 static const char *ownerToName(Attribute::Owner owner
)
647 const OwnerMapEntry
*entry
= getOwnerMapEntry(owner
);
648 return entry
? entry
->name
: "UnknownOwner";
651 static Attribute::Owner
nameToOwner(const char *name
)
653 const OwnerMapEntry
*entry
= getOwnerMapEntry(name
);
654 return entry
? entry
->owner
: Attribute::UnknownOwner
;
657 static inline const TypeMapEntry
*getTypeMapEntry(StructElement::Type type
)
659 for (unsigned i
= 0; i
< sizeof(typeMap
) / sizeof(typeMap
[0]); i
++) {
660 if (type
== typeMap
[i
].type
)
666 static inline const TypeMapEntry
*getTypeMapEntry(const char *name
)
668 for (unsigned i
= 0; i
< sizeof(typeMap
) / sizeof(typeMap
[0]); i
++) {
669 if (strcmp(name
, typeMap
[i
].name
) == 0)
675 static const char *typeToName(StructElement::Type type
)
677 if (type
== StructElement::MCID
)
678 return "MarkedContent";
679 if (type
== StructElement::OBJR
)
680 return "ObjectReference";
682 const TypeMapEntry
*entry
= getTypeMapEntry(type
);
683 return entry
? entry
->name
: "Unknown";
686 static StructElement::Type
nameToType(const char *name
)
688 const TypeMapEntry
*entry
= getTypeMapEntry(name
);
689 return entry
? entry
->type
: StructElement::Unknown
;
693 //------------------------------------------------------------------------
695 //------------------------------------------------------------------------
697 Attribute::Attribute(const char *nameA
, int nameLenA
, Object
*valueA
):
699 owner(UserProperties
),
701 name(nameA
, nameLenA
),
707 valueA
->copy(&value
);
710 Attribute::Attribute(Type type
, Object
*valueA
):
712 owner(UserProperties
), // TODO: Determine corresponding owner from Type
721 valueA
->copy(&value
);
727 Attribute::~Attribute()
733 const char *Attribute::getTypeName() const
735 if (type
== UserProperty
)
736 return name
.getCString();
738 const AttributeMapEntry
*entry
= getAttributeMapEntry(attributeMapAll
, type
);
745 const char *Attribute::getOwnerName() const
747 return ownerToName(owner
);
750 Object
*Attribute::getDefaultValue(Attribute::Type type
)
752 const AttributeMapEntry
*entry
= getAttributeMapEntry(attributeMapAll
, type
);
753 return entry
? const_cast<Object
*>(entry
->defval
) : NULL
;
756 void Attribute::setFormattedValue(const char *formattedA
)
760 formatted
->Set(formattedA
);
762 formatted
= new GooString(formattedA
);
769 GBool
Attribute::checkType(StructElement
*element
)
771 // If an element is passed, tighther type-checking can be done.
775 const TypeMapEntry
*elementTypeEntry
= getTypeMapEntry(element
->getType());
776 if (elementTypeEntry
&& elementTypeEntry
->attributes
) {
777 const AttributeMapEntry
*entry
= getAttributeMapEntry(elementTypeEntry
->attributes
, type
);
779 if (entry
->check
&& !((*entry
->check
)(&value
))) {
783 // No entry: the attribute is not valid for the containing element.
791 Attribute::Type
Attribute::getTypeForName(const char *name
, StructElement
*element
)
793 const AttributeMapEntry
**attributes
= attributeMapAll
;
795 const TypeMapEntry
*elementTypeEntry
= getTypeMapEntry(element
->getType());
796 if (elementTypeEntry
&& elementTypeEntry
->attributes
) {
797 attributes
= elementTypeEntry
->attributes
;
801 const AttributeMapEntry
*entry
= getAttributeMapEntry(attributes
, name
);
802 return entry
? entry
->type
: Unknown
;
805 Attribute
*Attribute::parseUserProperty(Dict
*property
)
808 const char *name
= NULL
;
809 int nameLen
= GooString::CALC_STRING_LEN
;
811 if (property
->lookup("N", &obj
)->isString()) {
812 GooString
*s
= obj
.getString();
813 name
= s
->getCString();
814 nameLen
= s
->getLength();
815 } else if (obj
.isName())
816 name
= obj
.getName();
818 error(errSyntaxError
, -1, "N object is wrong type ({0:s})", obj
.getTypeName());
823 if (property
->lookup("V", &value
)->isNull()) {
824 error(errSyntaxError
, -1, "V object is wrong type ({0:s})", value
.getTypeName());
830 Attribute
*attribute
= new Attribute(name
, nameLen
, &value
);
834 if (property
->lookup("F", &obj
)->isString()) {
835 attribute
->setFormattedValue(obj
.getString()->getCString());
836 } else if (!obj
.isNull()) {
837 error(errSyntaxWarning
, -1, "F object is wrong type ({0:s})", obj
.getTypeName());
841 if (property
->lookup("H", &obj
)->isBool()) {
842 attribute
->setHidden(obj
.getBool());
843 } else if (!obj
.isNull()) {
844 error(errSyntaxWarning
, -1, "H object is wrong type ({0:s})", obj
.getTypeName());
852 //------------------------------------------------------------------------
854 //------------------------------------------------------------------------
856 StructElement::StructData::StructData():
867 StructElement::StructData::~StructData()
875 for (ElemPtrArray::iterator i
= elements
.begin(); i
!= elements
.end(); ++i
) delete *i
;
876 for (AttrPtrArray::iterator i
= attributes
.begin(); i
!= attributes
.end(); ++i
) delete *i
;
880 StructElement::StructElement(Dict
*element
,
881 StructTreeRoot
*treeRootA
,
882 StructElement
*parentA
,
883 std::set
<int> &seen
):
893 parseChildren(element
, seen
);
896 StructElement::StructElement(int mcid
, StructTreeRoot
*treeRootA
, StructElement
*parentA
):
900 c(new ContentData(mcid
))
906 StructElement::StructElement(const Ref
& ref
, StructTreeRoot
*treeRootA
, StructElement
*parentA
):
910 c(new ContentData(ref
))
916 StructElement::~StructElement()
925 GBool
StructElement::isBlock() const
927 const TypeMapEntry
*entry
= getTypeMapEntry(type
);
928 return entry
? (entry
->elementType
== elementTypeBlock
) : gFalse
;
931 GBool
StructElement::isInline() const
933 const TypeMapEntry
*entry
= getTypeMapEntry(type
);
934 return entry
? (entry
->elementType
== elementTypeInline
) : gFalse
;
937 GBool
StructElement::isGrouping() const
939 const TypeMapEntry
*entry
= getTypeMapEntry(type
);
940 return entry
? (entry
->elementType
== elementTypeGrouping
) : gFalse
;
943 GBool
StructElement::hasPageRef() const
945 return pageRef
.isRef() || (parent
&& parent
->hasPageRef());
948 bool StructElement::getPageRef(Ref
& ref
) const
950 if (pageRef
.isRef()) {
951 ref
= pageRef
.getRef();
956 return parent
->getPageRef(ref
);
961 const char *StructElement::getTypeName() const
963 return typeToName(type
);
966 const Attribute
*StructElement::findAttribute(Attribute::Type attributeType
, GBool inherit
,
967 Attribute::Owner attributeOwner
) const
970 return parent
->findAttribute(attributeType
, inherit
, attributeOwner
);
972 if (attributeType
== Attribute::Unknown
|| attributeType
== Attribute::UserProperty
)
975 const Attribute
*result
= NULL
;
977 if (attributeOwner
== Attribute::UnknownOwner
) {
978 // Search for the attribute, no matter who the owner is
979 for (unsigned i
= 0; i
< getNumAttributes(); i
++) {
980 const Attribute
*attr
= getAttribute(i
);
981 if (attributeType
== attr
->getType()) {
982 if (!result
|| ownerHasMorePriority(attr
->getOwner(), result
->getOwner()))
987 // Search for the attribute, with a specific owner
988 for (unsigned i
= 0; i
< getNumAttributes(); i
++) {
989 const Attribute
*attr
= getAttribute(i
);
990 if (attributeType
== attr
->getType() && attributeOwner
== attr
->getOwner()) {
1000 if (inherit
&& parent
) {
1001 const AttributeMapEntry
*entry
= getAttributeMapEntry(attributeMapAll
, attributeType
);
1003 // TODO: Take into account special inheritance cases, for example:
1004 // inline elements which have been changed to be block using
1005 // "/Placement/Block" have slightly different rules.
1007 return parent
->findAttribute(attributeType
, inherit
, attributeOwner
);
1013 GooString
* StructElement::appendSubTreeText(GooString
*string
, GBool recursive
) const
1015 if (isContent() && !isObjectRef()) {
1016 MarkedContentOutputDev
mcdev(getMCID());
1017 const TextSpanArray
& spans(getTextSpansInternal(mcdev
));
1020 string
= new GooString();
1022 for (TextSpanArray::const_iterator i
= spans
.begin(); i
!= spans
.end(); ++i
)
1023 string
->append(i
->getText());
1031 // Do a depth-first traversal, to get elements in logical order
1033 string
= new GooString();
1035 for (unsigned i
= 0; i
< getNumChildren(); i
++)
1036 getChild(i
)->appendSubTreeText(string
, recursive
);
1041 const TextSpanArray
& StructElement::getTextSpansInternal(MarkedContentOutputDev
& mcdev
) const
1043 assert(isContent());
1045 int startPage
= 0, endPage
= 0;
1048 if (getPageRef(ref
)) {
1049 startPage
= endPage
= treeRoot
->getDoc()->findPage(ref
.num
, ref
.gen
);
1052 if (!(startPage
&& endPage
)) {
1054 endPage
= treeRoot
->getDoc()->getNumPages();
1057 treeRoot
->getDoc()->displayPages(&mcdev
, startPage
, endPage
, 72.0, 72.0, 0, gTrue
, gFalse
, gFalse
);
1058 return mcdev
.getTextSpans();
1061 static StructElement::Type
roleMapResolve(Dict
*roleMap
, const char *name
, const char *curName
, Object
*resolved
)
1063 // Circular reference
1064 if (curName
&& !strcmp(name
, curName
))
1065 return StructElement::Unknown
;
1067 if (roleMap
->lookup(curName
? curName
: name
, resolved
)->isName()) {
1068 StructElement::Type type
= nameToType(resolved
->getName());
1069 return type
== StructElement::Unknown
1070 ? roleMapResolve(roleMap
, name
, resolved
->getName(), resolved
)
1074 if (!resolved
->isNull())
1075 error(errSyntaxWarning
, -1, "RoleMap entry is wrong type ({0:s})", resolved
->getTypeName());
1076 return StructElement::Unknown
;
1079 void StructElement::parse(Dict
*element
)
1083 // Type is optional, but if present must be StructElem
1084 if (!element
->lookup("Type", &obj
)->isNull() && !obj
.isName("StructElem")) {
1085 error(errSyntaxError
, -1, "Type of StructElem object is wrong");
1091 // Parent object reference (required).
1092 if (!element
->lookupNF("P", &s
->parentRef
)->isRef()) {
1093 error(errSyntaxError
, -1, "P object is wrong type ({0:s})", obj
.getTypeName());
1097 // Check whether the S-type is valid for the top level
1098 // element and create a node of the appropriate type.
1099 if (!element
->lookup("S", &obj
)->isName()) {
1100 error(errSyntaxError
, -1, "S object is wrong type ({0:s})", obj
.getTypeName());
1105 // Type name may not be standard, resolve through RoleMap first.
1106 if (treeRoot
->getRoleMap()) {
1107 Object resolvedName
;
1108 type
= roleMapResolve(treeRoot
->getRoleMap(), obj
.getName(), NULL
, &resolvedName
);
1111 // Resolving through RoleMap may leave type as Unknown, e.g. for types
1112 // which are not present in it, yet they are standard element types.
1113 if (type
== Unknown
)
1114 type
= nameToType(obj
.getName());
1116 // At this point either the type name must have been resolved.
1117 if (type
== Unknown
) {
1118 error(errSyntaxError
, -1, "StructElem object is wrong type ({0:s})", obj
.getName());
1124 // Object ID (optional), to be looked at the IDTree in the tree root.
1125 if (element
->lookup("ID", &obj
)->isString()) {
1126 s
->id
= obj
.takeString();
1130 // Page reference (optional) in which at least one of the child items
1131 // is to be rendered in. Note: each element stores only the /Pg value
1132 // contained by it, and StructElement::getPageRef() may look in parent
1133 // elements to find the page where an element belongs.
1134 element
->lookupNF("Pg", &pageRef
);
1136 // Revision number (optional).
1137 if (element
->lookup("R", &obj
)->isInt()) {
1138 s
->revision
= obj
.getInt();
1142 // Element title (optional).
1143 if (element
->lookup("T", &obj
)->isString()) {
1144 s
->title
= obj
.takeString();
1148 // Language (optional).
1149 if (element
->lookup("Lang", &obj
)->isString()) {
1150 s
->language
= obj
.takeString();
1154 // Alternative text (optional).
1155 if (element
->lookup("Alt", &obj
)->isString()) {
1156 s
->altText
= obj
.takeString();
1160 // Expanded form of an abbreviation (optional).
1161 if (element
->lookup("E", &obj
)->isString()) {
1162 s
->expandedAbbr
= obj
.takeString();
1166 // Actual text (optional).
1167 if (element
->lookup("ActualText", &obj
)->isString()) {
1168 s
->actualText
= obj
.takeString();
1172 // Attributes directly attached to the element (optional).
1173 if (element
->lookup("A", &obj
)->isDict()) {
1174 parseAttributes(obj
.getDict());
1175 } else if (obj
.isArray()) {
1177 unsigned attrIndex
= getNumAttributes();
1178 for (int i
= 0; i
< obj
.arrayGetLength(); i
++) {
1179 if (obj
.arrayGet(i
, &iobj
)->isDict()) {
1180 attrIndex
= getNumAttributes();
1181 parseAttributes(iobj
.getDict());
1182 } else if (iobj
.isInt()) {
1183 const int revision
= iobj
.getInt();
1184 // Set revision numbers for the elements previously created.
1185 for (unsigned j
= attrIndex
; j
< getNumAttributes(); j
++)
1186 getAttribute(j
)->setRevision(revision
);
1188 error(errSyntaxWarning
, -1, "A item is wrong type ({0:s})", iobj
.getTypeName());
1192 } else if (!obj
.isNull()) {
1193 error(errSyntaxWarning
, -1, "A is wrong type ({0:s})", obj
.getTypeName());
1197 // Attributes referenced indirectly through the ClassMap (optional).
1198 if (treeRoot
->getClassMap()) {
1200 if (element
->lookup("C", &classes
)->isName()) {
1202 if (treeRoot
->getClassMap()->lookup(classes
.getName(), &attr
)->isDict()) {
1203 parseAttributes(attr
.getDict(), gTrue
);
1204 } else if (attr
.isArray()) {
1205 for (int i
= 0; i
< attr
.arrayGetLength(); i
++) {
1207 unsigned attrIndex
= getNumAttributes();
1208 if (attr
.arrayGet(i
, &iobj
)->isDict()) {
1209 attrIndex
= getNumAttributes();
1210 parseAttributes(iobj
.getDict(), gTrue
);
1211 } else if (iobj
.isInt()) {
1212 // Set revision numbers for the elements previously created.
1213 const int revision
= iobj
.getInt();
1214 for (unsigned j
= attrIndex
; j
< getNumAttributes(); j
++)
1215 getAttribute(j
)->setRevision(revision
);
1217 error(errSyntaxWarning
, -1, "C item is wrong type ({0:s})", iobj
.getTypeName());
1221 } else if (!attr
.isNull()) {
1222 error(errSyntaxWarning
, -1, "C object is wrong type ({0:s})", classes
.getTypeName());
1230 StructElement
*StructElement::parseChild(Object
*ref
,
1232 std::set
<int> &seen
)
1237 StructElement
*child
= NULL
;
1239 if (childObj
->isInt()) {
1240 child
= new StructElement(childObj
->getInt(), treeRoot
, this);
1241 } else if (childObj
->isDict("MCR")) {
1243 * TODO: The optional Stm/StwOwn attributes are not handled, so all the
1244 * page will be always scanned when calling StructElement::getText().
1249 if (!childObj
->dictLookup("MCID", &mcidObj
)->isInt()) {
1250 error(errSyntaxError
, -1, "MCID object is wrong type ({0:s})", mcidObj
.getTypeName());
1255 child
= new StructElement(mcidObj
.getInt(), treeRoot
, this);
1258 if (childObj
->dictLookupNF("Pg", &pageRefObj
)->isRef()) {
1259 child
->pageRef
= pageRefObj
;
1263 } else if (childObj
->isDict("OBJR")) {
1266 if (childObj
->dictLookupNF("Obj", &refObj
)->isRef()) {
1269 child
= new StructElement(refObj
.getRef(), treeRoot
, this);
1271 if (childObj
->dictLookupNF("Pg", &pageRefObj
)->isRef()) {
1272 child
->pageRef
= pageRefObj
;
1277 error(errSyntaxError
, -1, "Obj object is wrong type ({0:s})", refObj
.getTypeName());
1280 } else if (childObj
->isDict()) {
1281 if (!ref
->isRef()) {
1282 error(errSyntaxError
, -1,
1283 "Structure element dictionary is not an indirect reference ({0:s})",
1284 ref
->getTypeName());
1285 } else if (seen
.find(ref
->getRefNum()) == seen
.end()) {
1286 seen
.insert(ref
->getRefNum());
1287 child
= new StructElement(childObj
->getDict(), treeRoot
, this, seen
);
1289 error(errSyntaxWarning
, -1,
1290 "Loop detected in structure tree, skipping subtree at object {0:d}:{1:d}",
1291 ref
->getRefNum(), ref
->getRefGen());
1294 error(errSyntaxWarning
, -1, "K has a child of wrong type ({0:s})", childObj
->getTypeName());
1298 if (child
->isOk()) {
1301 treeRoot
->parentTreeAdd(ref
->getRef(), child
);
1311 void StructElement::parseChildren(Dict
*element
, std::set
<int> &seen
)
1315 if (element
->lookup("K", &kids
)->isArray()) {
1316 for (int i
= 0; i
< kids
.arrayGetLength(); i
++) {
1318 parseChild(kids
.arrayGetNF(i
, &ref
), kids
.arrayGet(i
, &obj
), seen
);
1322 } else if (kids
.isDict() || kids
.isInt()) {
1324 parseChild(element
->lookupNF("K", &ref
), &kids
, seen
);
1331 void StructElement::parseAttributes(Dict
*attributes
, GBool keepExisting
)
1334 if (attributes
->lookup("O", &owner
)->isName("UserProperties")) {
1335 // In this case /P is an array of UserProperty dictionaries
1336 Object userProperties
;
1337 if (attributes
->lookup("P", &userProperties
)->isArray()) {
1338 for (int i
= 0; i
< userProperties
.arrayGetLength(); i
++) {
1340 if (userProperties
.arrayGet(i
, &property
)->isDict()) {
1341 Attribute
*attribute
= Attribute::parseUserProperty(property
.getDict());
1342 if (attribute
&& attribute
->isOk()) {
1343 appendAttribute(attribute
);
1345 error(errSyntaxWarning
, -1, "Item in P is invalid");
1349 error(errSyntaxWarning
, -1, "Item in P is wrong type ({0:s})", property
.getTypeName());
1354 userProperties
.free();
1355 } else if (owner
.isName()) {
1356 // In this case /P contains standard attributes.
1357 // Check first if the owner is a valid standard one.
1358 Attribute::Owner ownerValue
= nameToOwner(owner
.getName());
1359 if (ownerValue
!= Attribute::UnknownOwner
) {
1360 // Iterate over the entries of the "attributes" dictionary.
1361 // The /O entry (owner) is skipped.
1362 for (int i
= 0; i
< attributes
->getLength(); i
++) {
1363 const char *key
= attributes
->getKey(i
);
1364 if (strcmp(key
, "O") != 0) {
1365 Attribute::Type type
= Attribute::getTypeForName(key
, this);
1367 // Check if the attribute is already defined.
1369 GBool exists
= gFalse
;
1370 for (unsigned j
= 0; j
< getNumAttributes(); j
++) {
1371 if (getAttribute(j
)->getType() == type
) {
1380 if (type
!= Attribute::Unknown
) {
1382 GBool typeCheckOk
= gTrue
;
1383 Attribute
*attribute
= new Attribute(type
, attributes
->getVal(i
, &value
));
1386 if (attribute
->isOk() && (typeCheckOk
= attribute
->checkType(this))) {
1387 appendAttribute(attribute
);
1389 // It is not needed to free "value", the Attribute instance
1390 // owns the contents, so deleting "attribute" is enough.
1392 error(errSyntaxWarning
, -1, "Attribute {0:s} value is of wrong type ({1:s})",
1393 attribute
->getTypeName(), attribute
->getValue()->getTypeName());
1398 error(errSyntaxWarning
, -1, "Wrong Attribute '{0:s}' in element {1:s}", key
, getTypeName());
1403 error(errSyntaxWarning
, -1, "O object is invalid value ({0:s})", owner
.getName());
1405 } else if (!owner
.isNull()) {
1406 error(errSyntaxWarning
, -1, "O is wrong type ({0:s})", owner
.getTypeName());