2008-11-04 Anders Carlsson <andersca@apple.com>
[webkit/qt.git] / WebCore / editing / TextIterator.h
blob11a835422dd46e6543b952674227a51a8f04fe4f
1 /*
2 * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #ifndef TextIterator_h
27 #define TextIterator_h
29 #include "InlineTextBox.h"
30 #include "Range.h"
31 #include <wtf/Vector.h>
33 namespace WebCore {
35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
37 inline bool isCollapsibleWhitespace(UChar c)
39 switch (c) {
40 case ' ':
41 case '\n':
42 return true;
43 default:
44 return false;
48 String plainText(const Range*);
49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength);
50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive);
52 // Iterates through the DOM range, returning all the text, and 0-length boundaries
53 // at points where replaced elements break up the text flow. The text comes back in
54 // chunks so as to optimize for performance of the iteration.
56 class TextIterator {
57 public:
58 TextIterator();
59 explicit TextIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
61 bool atEnd() const { return !m_positionNode; }
62 void advance();
64 int length() const { return m_textLength; }
65 const UChar* characters() const { return m_textCharacters; }
67 PassRefPtr<Range> range() const;
68 Node* node() const;
70 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
71 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
72 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
74 private:
75 void exitNode();
76 bool shouldRepresentNodeOffsetZero();
77 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
78 void representNodeOffsetZero();
79 bool handleTextNode();
80 bool handleReplacedElement();
81 bool handleNonTextNode();
82 void handleTextBox();
83 void emitCharacter(UChar, Node *textNode, Node *offsetBaseNode, int textStartOffset, int textEndOffset);
84 void emitText(Node *textNode, int textStartOffset, int textEndOffset);
86 // Current position, not necessarily of the text being returned, but position
87 // as we walk through the DOM tree.
88 Node *m_node;
89 int m_offset;
90 bool m_handledNode;
91 bool m_handledChildren;
92 bool m_inShadowContent;
94 // The range.
95 Node *m_startContainer;
96 int m_startOffset;
97 Node *m_endContainer;
98 int m_endOffset;
99 Node *m_pastEndNode;
101 // The current text and its position, in the form to be returned from the iterator.
102 Node *m_positionNode;
103 mutable Node *m_positionOffsetBaseNode;
104 mutable int m_positionStartOffset;
105 mutable int m_positionEndOffset;
106 const UChar* m_textCharacters;
107 int m_textLength;
109 // Used when there is still some pending text from the current node; when these
110 // are false and 0, we go back to normal iterating.
111 bool m_needAnotherNewline;
112 InlineTextBox *m_textBox;
114 // Used to do the whitespace collapsing logic.
115 Node *m_lastTextNode;
116 bool m_lastTextNodeEndedWithCollapsedSpace;
117 UChar m_lastCharacter;
119 // Used for whitespace characters that aren't in the DOM, so we can point at them.
120 UChar m_singleCharacterBuffer;
122 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
123 Vector<InlineTextBox*> m_sortedTextBoxes;
124 size_t m_sortedTextBoxesPosition;
126 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
127 bool m_haveEmitted;
129 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
130 // in the Range used to create the TextIterator.
131 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
132 // moveParagraphs to not clone/destroy moved content.
133 bool m_emitCharactersBetweenAllVisiblePositions;
134 bool m_enterTextControls;
137 // Iterates through the DOM range, returning all the text, and 0-length boundaries
138 // at points where replaced elements break up the text flow. The text comes back in
139 // chunks so as to optimize for performance of the iteration.
140 class SimplifiedBackwardsTextIterator {
141 public:
142 SimplifiedBackwardsTextIterator();
143 explicit SimplifiedBackwardsTextIterator(const Range *);
145 bool atEnd() const { return !m_positionNode; }
146 void advance();
148 int length() const { return m_textLength; }
149 const UChar* characters() const { return m_textCharacters; }
151 PassRefPtr<Range> range() const;
153 private:
154 void exitNode();
155 bool handleTextNode();
156 bool handleReplacedElement();
157 bool handleNonTextNode();
158 void emitCharacter(UChar, Node *Node, int startOffset, int endOffset);
160 // Current position, not necessarily of the text being returned, but position
161 // as we walk through the DOM tree.
162 Node* m_node;
163 int m_offset;
164 bool m_handledNode;
165 bool m_handledChildren;
167 // End of the range.
168 Node* m_startNode;
169 int m_startOffset;
170 // Start of the range.
171 Node* m_endNode;
172 int m_endOffset;
174 // The current text and its position, in the form to be returned from the iterator.
175 Node* m_positionNode;
176 int m_positionStartOffset;
177 int m_positionEndOffset;
178 const UChar* m_textCharacters;
179 int m_textLength;
181 // Used to do the whitespace logic.
182 Node* m_lastTextNode;
183 UChar m_lastCharacter;
185 // Used for whitespace characters that aren't in the DOM, so we can point at them.
186 UChar m_singleCharacterBuffer;
188 // The node after the last node this iterator should process.
189 Node* m_pastStartNode;
192 // Builds on the text iterator, adding a character position so we can walk one
193 // character at a time, or faster, as needed. Useful for searching.
194 class CharacterIterator {
195 public:
196 CharacterIterator();
197 explicit CharacterIterator(const Range* r, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
199 void advance(int numCharacters);
201 bool atBreak() const { return m_atBreak; }
202 bool atEnd() const { return m_textIterator.atEnd(); }
204 int length() const { return m_textIterator.length() - m_runOffset; }
205 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
206 String string(int numChars);
208 int characterOffset() const { return m_offset; }
209 PassRefPtr<Range> range() const;
211 private:
212 int m_offset;
213 int m_runOffset;
214 bool m_atBreak;
216 TextIterator m_textIterator;
219 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
220 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
221 class WordAwareIterator {
222 public:
223 WordAwareIterator();
224 explicit WordAwareIterator(const Range *r);
226 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
227 void advance();
229 int length() const;
230 const UChar* characters() const;
232 // Range of the text we're currently returning
233 PassRefPtr<Range> range() const { return m_range; }
235 private:
236 // text from the previous chunk from the textIterator
237 const UChar* m_previousText;
238 int m_previousLength;
240 // many chunks from textIterator concatenated
241 Vector<UChar> m_buffer;
243 // Did we have to look ahead in the textIterator to confirm the current chunk?
244 bool m_didLookAhead;
246 RefPtr<Range> m_range;
248 TextIterator m_textIterator;
253 #endif