1 //========================================================================
5 // This file is licensed under the GPLv2 or later
7 // Copyright 2013, 2014 Igalia S.L.
8 // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com>
9 // Copyright 2014 Albert Astals Cid <aacid@kde.org>
11 //========================================================================
13 #ifndef STRUCTELEMENT_H
14 #define STRUCTELEMENT_H
16 #ifdef USE_GCC_PRAGMAS
20 #include "goo/gtypes.h"
21 #include "goo/GooString.h"
22 #include "MarkedContentOutputDev.h"
36 Unknown
= 0, // Uninitialized, parsing error, etc.
37 UserProperty
, // User defined attribute (i.e. non-standard)
39 // Common standard attributes
40 Placement
, WritingMode
, BackgroundColor
, BorderColor
, BorderStyle
,
41 BorderThickness
, Color
, Padding
,
43 // Block element standard attributes
44 SpaceBefore
, SpaceAfter
, StartIndent
, EndIndent
, TextIndent
, TextAlign
,
45 BBox
, Width
, Height
, BlockAlign
, InlineAlign
, TBorderStyle
, TPadding
,
47 // Inline element standard attributes
48 BaselineShift
, LineHeight
, TextDecorationColor
, TextDecorationThickness
,
49 TextDecorationType
, RubyAlign
, RubyPosition
, GlyphOrientationVertical
,
51 // Column-only standard attributes
52 ColumnCount
, ColumnGap
, ColumnWidths
,
54 // List-only standard attributes
57 // PrintField-only standard attributes
60 // Table-only standard attributes
61 RowSpan
, ColSpan
, Headers
, Scope
, Summary
,
66 // User-defined attributes
68 // Standard attributes
69 Layout
, List
, PrintField
, Table
,
70 // Translation to other formats
71 XML_1_00
, HTML_3_20
, HTML_4_01
, OEB_1_00
, RTF_1_05
, CSS_1_00
, CSS_2_00
,
74 // Creates a standard attribute. The name is predefined, and the
75 // value is type-checked to conform to the PDF specification.
76 Attribute(Type type
, Object
*value
);
78 // Creates an UserProperty attribute, with an arbitrary name and value.
79 Attribute(const char *name
, int nameLen
, Object
*value
);
81 GBool
isOk() const { return type
!= Unknown
; }
83 // Name, type and value can be set only on construction.
84 Type
getType() const { return type
; }
85 Owner
getOwner() const { return owner
; }
86 const char *getTypeName() const;
87 const char *getOwnerName() const;
88 Object
*getValue() const { return &value
; }
89 static Object
*getDefaultValue(Type type
);
91 // The caller gets the ownership of the return GooString and is responsible of deleting it
92 GooString
*getName() const { return type
== UserProperty
? name
.copy() : new GooString(getTypeName()); }
94 // The revision is optional, and defaults to zero.
95 Guint
getRevision() const { return revision
; }
96 void setRevision(Guint revisionA
) { revision
= revisionA
; }
98 // Hidden elements should not be displayed by the user agent
99 GBool
isHidden() const { return hidden
; }
100 void setHidden(GBool hiddenA
) { hidden
= hiddenA
; }
102 // The formatted value may be in the PDF, or be left undefined (NULL).
103 // In the later case the user agent should provide a default representation.
104 const char *getFormattedValue() const { return formatted
? formatted
->getCString() : NULL
; }
105 void setFormattedValue(const char *formattedA
);
113 mutable GooString name
;
114 mutable Object value
;
116 GooString
*formatted
;
118 GBool
checkType(StructElement
*element
= NULL
);
119 static Type
getTypeForName(const char *name
, StructElement
*element
= NULL
);
120 static Attribute
*parseUserProperty(Dict
*property
);
122 friend class StructElement
;
126 class StructElement
{
130 MCID
, // MCID reference, used internally
131 OBJR
, // Object reference, used internally
133 Document
, Part
, Art
, Sect
, Div
, // Structural elements
135 Span
, Quote
, Note
, Reference
, BibEntry
, // Inline elements
137 BlockQuote
, Caption
, NonStruct
,
138 TOC
, TOCI
, Index
, Private
,
140 P
, H
, H1
, H2
, H3
, H4
, H5
, H6
, // Paragraph-like
142 L
, LI
, Lbl
, LBody
, // List elements
144 Table
, TR
, TH
, TD
, THead
, TFoot
, TBody
, // Table elements
146 Ruby
, RB
, RT
, RP
, // Ruby text elements
149 Figure
, Formula
, Form
, // Illustration-like elements
152 static const Ref InvalidRef
;
154 const char *getTypeName() const;
155 Type
getType() const { return type
; }
156 GBool
isOk() const { return type
!= Unknown
; }
157 GBool
isBlock() const;
158 GBool
isInline() const;
159 GBool
isGrouping() const;
161 inline GBool
isContent() const { return (type
== MCID
) || isObjectRef(); }
162 inline GBool
isObjectRef() const { return (type
== OBJR
&& c
->ref
.num
!= -1 && c
->ref
.gen
!= -1); }
164 int getMCID() const { return c
->mcid
; }
165 Ref
getObjectRef() const { return c
->ref
; }
166 Ref
getParentRef() { return isContent() ? parent
->getParentRef() : s
->parentRef
.getRef(); }
167 GBool
hasPageRef() const;
168 GBool
getPageRef(Ref
& ref
) const;
169 StructTreeRoot
*getStructTreeRoot() { return treeRoot
; }
171 // Optional element identifier.
172 const GooString
*getID() const { return isContent() ? NULL
: s
->id
; }
173 GooString
*getID() { return isContent() ? NULL
: s
->id
; }
175 // Optional ISO language name, e.g. en_US
176 GooString
*getLanguage() {
177 if (!isContent() && s
->language
) return s
->language
;
178 return parent
? parent
->getLanguage() : NULL
;
180 const GooString
*getLanguage() const {
181 if (!isContent() && s
->language
) return s
->language
;
182 return parent
? parent
->getLanguage() : NULL
;
185 // Optional revision number, defaults to zero.
186 Guint
getRevision() const { return isContent() ? 0 : s
->revision
; }
187 void setRevision(Guint revision
) { if (isContent()) s
->revision
= revision
; }
189 // Optional element title, in human-readable form.
190 const GooString
*getTitle() const { return isContent() ? NULL
: s
->title
; }
191 GooString
*getTitle() { return isContent() ? NULL
: s
->title
; }
193 // Optional element expanded abbreviation text.
194 const GooString
*getExpandedAbbr() const { return isContent() ? NULL
: s
->expandedAbbr
; }
195 GooString
*getExpandedAbbr() { return isContent() ? NULL
: s
->expandedAbbr
; }
197 unsigned getNumChildren() const { return isContent() ? 0 : s
->elements
.size(); }
198 const StructElement
*getChild(int i
) const { return isContent() ? NULL
: s
->elements
.at(i
); }
199 StructElement
*getChild(int i
) { return isContent() ? NULL
: s
->elements
.at(i
); }
201 void appendChild(StructElement
*element
) {
202 if (!isContent() && element
&& element
->isOk()) {
203 s
->elements
.push_back(element
);
207 unsigned getNumAttributes() const { return isContent() ? 0 : s
->attributes
.size(); }
208 const Attribute
*getAttribute(int i
) const { return isContent() ? NULL
: s
->attributes
.at(i
); }
209 Attribute
*getAttribute(int i
) { return isContent() ? NULL
: s
->attributes
.at(i
); }
211 void appendAttribute(Attribute
*attribute
) {
212 if (!isContent() && attribute
) {
213 s
->attributes
.push_back(attribute
);
217 const Attribute
* findAttribute(Attribute::Type attributeType
, GBool inherit
= gFalse
,
218 Attribute::Owner owner
= Attribute::UnknownOwner
) const;
220 const GooString
*getAltText() const { return isContent() ? NULL
: s
->altText
; }
221 GooString
*getAltText() { return isContent() ? NULL
: s
->altText
; }
223 const GooString
*getActualText() const { return isContent() ? NULL
: s
->actualText
; }
224 GooString
*getActualText() { return isContent() ? NULL
: s
->actualText
; }
226 // Content text referenced by the element:
228 // - For MCID reference elements, this is just the text of the
229 // corresponding marked content object in the page stream, regardless
230 // of the setting of the "recursive" flag.
231 // - For other elements, if the "recursive" flag is set, the text
232 // enclosed by *all* the child MCID reference elements of the subtree
233 // is returned. The text is assembled by traversing the leaf MCID
234 // reference elements in logical order.
235 // - In any other case, the function returns NULL.
237 // A new string is returned, and the ownership passed to the caller.
239 GooString
*getText(GBool recursive
= gTrue
) const {
240 return appendSubTreeText(NULL
, recursive
);
243 const TextSpanArray
getTextSpans() const {
245 return TextSpanArray();
246 MarkedContentOutputDev
mcdev(getMCID());
247 return getTextSpansInternal(mcdev
);
253 GooString
* appendSubTreeText(GooString
*string
, GBool recursive
) const;
254 const TextSpanArray
& getTextSpansInternal(MarkedContentOutputDev
& mcdev
) const;
256 typedef std::vector
<Attribute
*> AttrPtrArray
;
257 typedef std::vector
<StructElement
*> ElemPtrArray
;
262 GooString
*actualText
;
265 GooString
*expandedAbbr
;
268 ElemPtrArray elements
;
269 AttrPtrArray attributes
;
275 // Data in content elements (MCID, MCR)
282 ContentData(int mcidA
): mcid(mcidA
) {}
283 ContentData(const Ref
& r
) { ref
.num
= r
.num
; ref
.gen
= r
.gen
; }
288 StructTreeRoot
*treeRoot
;
289 StructElement
*parent
;
290 mutable Object pageRef
;
297 StructElement(Dict
*elementDict
, StructTreeRoot
*treeRootA
, StructElement
*parentA
, std::set
<int> &seen
);
298 StructElement(int mcid
, StructTreeRoot
*treeRootA
, StructElement
*parentA
);
299 StructElement(const Ref
&ref
, StructTreeRoot
*treeRootA
, StructElement
*parentA
);
301 void parse(Dict
* elementDict
);
302 StructElement
* parseChild(Object
*ref
, Object
* childObj
, std::set
<int> &seen
);
303 void parseChildren(Dict
* element
, std::set
<int> &seen
);
304 void parseAttributes(Dict
*element
, GBool keepExisting
= gFalse
);
306 friend class StructTreeRoot
;