beta-0.89.2
[luatex.git] / source / libs / poppler / poppler-src / poppler / StructElement.h
blobcd89a970ddb59382d80e5255e47c2d6b39f53941
1 //========================================================================
2 //
3 // StructElement.h
4 //
5 // This file is licensed under the GPLv2 or later
6 //
7 // Copyright 2013, 2014 Igalia S.L.
8 // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com>
9 // Copyright 2014 Albert Astals Cid <aacid@kde.org>
11 //========================================================================
13 #ifndef STRUCTELEMENT_H
14 #define STRUCTELEMENT_H
16 #ifdef USE_GCC_PRAGMAS
17 #pragma interface
18 #endif
20 #include "goo/gtypes.h"
21 #include "goo/GooString.h"
22 #include "MarkedContentOutputDev.h"
23 #include "Object.h"
24 #include <vector>
25 #include <set>
27 class GooString;
28 class Dict;
29 class StructElement;
30 class StructTreeRoot;
33 class Attribute {
34 public:
35 enum Type {
36 Unknown = 0, // Uninitialized, parsing error, etc.
37 UserProperty, // User defined attribute (i.e. non-standard)
39 // Common standard attributes
40 Placement, WritingMode, BackgroundColor, BorderColor, BorderStyle,
41 BorderThickness, Color, Padding,
43 // Block element standard attributes
44 SpaceBefore, SpaceAfter, StartIndent, EndIndent, TextIndent, TextAlign,
45 BBox, Width, Height, BlockAlign, InlineAlign, TBorderStyle, TPadding,
47 // Inline element standard attributes
48 BaselineShift, LineHeight, TextDecorationColor, TextDecorationThickness,
49 TextDecorationType, RubyAlign, RubyPosition, GlyphOrientationVertical,
51 // Column-only standard attributes
52 ColumnCount, ColumnGap, ColumnWidths,
54 // List-only standard attributes
55 ListNumbering,
57 // PrintField-only standard attributes
58 Role, checked, Desc,
60 // Table-only standard attributes
61 RowSpan, ColSpan, Headers, Scope, Summary,
64 enum Owner {
65 UnknownOwner = 0,
66 // User-defined attributes
67 UserProperties,
68 // Standard attributes
69 Layout, List, PrintField, Table,
70 // Translation to other formats
71 XML_1_00, HTML_3_20, HTML_4_01, OEB_1_00, RTF_1_05, CSS_1_00, CSS_2_00,
74 // Creates a standard attribute. The name is predefined, and the
75 // value is type-checked to conform to the PDF specification.
76 Attribute(Type type, Object *value);
78 // Creates an UserProperty attribute, with an arbitrary name and value.
79 Attribute(const char *name, int nameLen, Object *value);
81 GBool isOk() const { return type != Unknown; }
83 // Name, type and value can be set only on construction.
84 Type getType() const { return type; }
85 Owner getOwner() const { return owner; }
86 const char *getTypeName() const;
87 const char *getOwnerName() const;
88 Object *getValue() const { return &value; }
89 static Object *getDefaultValue(Type type);
91 // The caller gets the ownership of the return GooString and is responsible of deleting it
92 GooString *getName() const { return type == UserProperty ? name.copy() : new GooString(getTypeName()); }
94 // The revision is optional, and defaults to zero.
95 Guint getRevision() const { return revision; }
96 void setRevision(Guint revisionA) { revision = revisionA; }
98 // Hidden elements should not be displayed by the user agent
99 GBool isHidden() const { return hidden; }
100 void setHidden(GBool hiddenA) { hidden = hiddenA; }
102 // The formatted value may be in the PDF, or be left undefined (NULL).
103 // In the later case the user agent should provide a default representation.
104 const char *getFormattedValue() const { return formatted ? formatted->getCString() : NULL; }
105 void setFormattedValue(const char *formattedA);
107 ~Attribute();
109 private:
110 Type type;
111 Owner owner;
112 Guint revision;
113 mutable GooString name;
114 mutable Object value;
115 GBool hidden;
116 GooString *formatted;
118 GBool checkType(StructElement *element = NULL);
119 static Type getTypeForName(const char *name, StructElement *element = NULL);
120 static Attribute *parseUserProperty(Dict *property);
122 friend class StructElement;
126 class StructElement {
127 public:
128 enum Type {
129 Unknown = 0,
130 MCID, // MCID reference, used internally
131 OBJR, // Object reference, used internally
133 Document, Part, Art, Sect, Div, // Structural elements
135 Span, Quote, Note, Reference, BibEntry, // Inline elements
136 Code, Link, Annot,
137 BlockQuote, Caption, NonStruct,
138 TOC, TOCI, Index, Private,
140 P, H, H1, H2, H3, H4, H5, H6, // Paragraph-like
142 L, LI, Lbl, LBody, // List elements
144 Table, TR, TH, TD, THead, TFoot, TBody, // Table elements
146 Ruby, RB, RT, RP, // Ruby text elements
147 Warichu, WT, WP,
149 Figure, Formula, Form, // Illustration-like elements
152 static const Ref InvalidRef;
154 const char *getTypeName() const;
155 Type getType() const { return type; }
156 GBool isOk() const { return type != Unknown; }
157 GBool isBlock() const;
158 GBool isInline() const;
159 GBool isGrouping() const;
161 inline GBool isContent() const { return (type == MCID) || isObjectRef(); }
162 inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); }
164 int getMCID() const { return c->mcid; }
165 Ref getObjectRef() const { return c->ref; }
166 Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef.getRef(); }
167 GBool hasPageRef() const;
168 GBool getPageRef(Ref& ref) const;
169 StructTreeRoot *getStructTreeRoot() { return treeRoot; }
171 // Optional element identifier.
172 const GooString *getID() const { return isContent() ? NULL : s->id; }
173 GooString *getID() { return isContent() ? NULL : s->id; }
175 // Optional ISO language name, e.g. en_US
176 GooString *getLanguage() {
177 if (!isContent() && s->language) return s->language;
178 return parent ? parent->getLanguage() : NULL;
180 const GooString *getLanguage() const {
181 if (!isContent() && s->language) return s->language;
182 return parent ? parent->getLanguage() : NULL;
185 // Optional revision number, defaults to zero.
186 Guint getRevision() const { return isContent() ? 0 : s->revision; }
187 void setRevision(Guint revision) { if (isContent()) s->revision = revision; }
189 // Optional element title, in human-readable form.
190 const GooString *getTitle() const { return isContent() ? NULL : s->title; }
191 GooString *getTitle() { return isContent() ? NULL : s->title; }
193 // Optional element expanded abbreviation text.
194 const GooString *getExpandedAbbr() const { return isContent() ? NULL : s->expandedAbbr; }
195 GooString *getExpandedAbbr() { return isContent() ? NULL : s->expandedAbbr; }
197 unsigned getNumChildren() const { return isContent() ? 0 : s->elements.size(); }
198 const StructElement *getChild(int i) const { return isContent() ? NULL : s->elements.at(i); }
199 StructElement *getChild(int i) { return isContent() ? NULL : s->elements.at(i); }
201 void appendChild(StructElement *element) {
202 if (!isContent() && element && element->isOk()) {
203 s->elements.push_back(element);
207 unsigned getNumAttributes() const { return isContent() ? 0 : s->attributes.size(); }
208 const Attribute *getAttribute(int i) const { return isContent() ? NULL : s->attributes.at(i); }
209 Attribute *getAttribute(int i) { return isContent() ? NULL : s->attributes.at(i); }
211 void appendAttribute(Attribute *attribute) {
212 if (!isContent() && attribute) {
213 s->attributes.push_back(attribute);
217 const Attribute* findAttribute(Attribute::Type attributeType, GBool inherit = gFalse,
218 Attribute::Owner owner = Attribute::UnknownOwner) const;
220 const GooString *getAltText() const { return isContent() ? NULL : s->altText; }
221 GooString *getAltText() { return isContent() ? NULL : s->altText; }
223 const GooString *getActualText() const { return isContent() ? NULL : s->actualText; }
224 GooString *getActualText() { return isContent() ? NULL : s->actualText; }
226 // Content text referenced by the element:
228 // - For MCID reference elements, this is just the text of the
229 // corresponding marked content object in the page stream, regardless
230 // of the setting of the "recursive" flag.
231 // - For other elements, if the "recursive" flag is set, the text
232 // enclosed by *all* the child MCID reference elements of the subtree
233 // is returned. The text is assembled by traversing the leaf MCID
234 // reference elements in logical order.
235 // - In any other case, the function returns NULL.
237 // A new string is returned, and the ownership passed to the caller.
239 GooString *getText(GBool recursive = gTrue) const {
240 return appendSubTreeText(NULL, recursive);
243 const TextSpanArray getTextSpans() const {
244 if (!isContent())
245 return TextSpanArray();
246 MarkedContentOutputDev mcdev(getMCID());
247 return getTextSpansInternal(mcdev);
250 ~StructElement();
252 private:
253 GooString* appendSubTreeText(GooString *string, GBool recursive) const;
254 const TextSpanArray& getTextSpansInternal(MarkedContentOutputDev& mcdev) const;
256 typedef std::vector<Attribute*> AttrPtrArray;
257 typedef std::vector<StructElement*> ElemPtrArray;
259 struct StructData {
260 Object parentRef;
261 GooString *altText;
262 GooString *actualText;
263 GooString *id;
264 GooString *title;
265 GooString *expandedAbbr;
266 GooString *language;
267 Guint revision;
268 ElemPtrArray elements;
269 AttrPtrArray attributes;
271 StructData();
272 ~StructData();
275 // Data in content elements (MCID, MCR)
276 struct ContentData {
277 union {
278 int mcid;
279 Ref ref;
282 ContentData(int mcidA): mcid(mcidA) {}
283 ContentData(const Ref& r) { ref.num = r.num; ref.gen = r.gen; }
286 // Common data
287 Type type;
288 StructTreeRoot *treeRoot;
289 StructElement *parent;
290 mutable Object pageRef;
292 union {
293 StructData *s;
294 ContentData *c;
297 StructElement(Dict *elementDict, StructTreeRoot *treeRootA, StructElement *parentA, std::set<int> &seen);
298 StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA);
299 StructElement(const Ref &ref, StructTreeRoot *treeRootA, StructElement *parentA);
301 void parse(Dict* elementDict);
302 StructElement* parseChild(Object *ref, Object* childObj, std::set<int> &seen);
303 void parseChildren(Dict* element, std::set<int> &seen);
304 void parseAttributes(Dict *element, GBool keepExisting = gFalse);
306 friend class StructTreeRoot;
309 #endif