1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is Novell code.
17 * The Initial Developer of the Original Code is Novell Corporation.
18 * Portions created by the Initial Developer are Copyright (C) 2006
19 * the Initial Developer. All Rights Reserved.
22 * robert@ocallahan.org
23 * Ehsan Akhgari <ehsan.akhgari@gmail.com>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsTextFrameUtils.h"
41 #include "nsContentUtils.h"
42 #include "nsIWordBreaker.h"
44 #include "nsUnicharUtils.h"
45 #include "nsBidiUtils.h"
47 // XXX TODO implement transform of backslash to yen that nsTextTransform does
48 // when requested by PresContext->LanguageSpecificTransformType(). Do it with
49 // a new factory type that just munges the input stream. But first, check
50 // that we really still need this, it's only enabled via a hidden pref
51 // which defaults false...
53 #define UNICODE_ZWSP 0x200B
55 static PRBool
IsDiscardable(PRUnichar ch
, PRUint32
* aFlags
)
57 // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun
58 // and discarding it would force us to copy text in many cases of preformatted
59 // text containing \r\n.
61 *aFlags
|= nsTextFrameUtils::TEXT_HAS_SHY
;
64 if ((ch
& 0xFF00) != 0x2000) {
65 // Not a Bidi control character
68 return IS_BIDI_CONTROL_CHAR(ch
);
71 static PRBool
IsDiscardable(PRUint8 ch
, PRUint32
* aFlags
)
74 *aFlags
|= nsTextFrameUtils::TEXT_HAS_SHY
;
81 nsTextFrameUtils::TransformText(const PRUnichar
* aText
, PRUint32 aLength
,
83 CompressionMode aCompression
,
84 PRUint8
* aIncomingFlags
,
85 gfxSkipCharsBuilder
* aSkipChars
,
86 PRUint32
* aAnalysisFlags
)
89 PRUnichar
* outputStart
= aOutput
;
91 PRBool lastCharArabic
= PR_FALSE
;
93 if (aCompression
== COMPRESS_NONE
) {
96 for (i
= 0; i
< aLength
; ++i
) {
97 PRUnichar ch
= *aText
++;
98 if (IsDiscardable(ch
, &flags
)) {
99 aSkipChars
->SkipChar();
101 aSkipChars
->KeepChar();
103 flags
|= TEXT_HAS_TAB
;
104 } else if (ch
!= ' ' && ch
!= '\n') {
105 // we already know it's not a tab from the previous check
106 lastCharArabic
= IS_ARABIC_CHAR(ch
);
111 if (lastCharArabic
) {
112 *aIncomingFlags
|= INCOMING_ARABICCHAR
;
114 *aIncomingFlags
&= ~INCOMING_ARABICCHAR
;
116 *aIncomingFlags
&= ~INCOMING_WHITESPACE
;
118 PRBool inWhitespace
= (*aIncomingFlags
& INCOMING_WHITESPACE
) != 0;
120 for (i
= 0; i
< aLength
; ++i
) {
121 PRUnichar ch
= *aText
++;
122 PRBool nowInWhitespace
;
125 !IsSpaceCombiningSequenceTail(aText
, aLength
- (i
+ 1)))) {
126 nowInWhitespace
= PR_TRUE
;
127 } else if (ch
== '\n' && aCompression
== COMPRESS_WHITESPACE_NEWLINE
) {
128 if (i
> 0 && IS_CJ_CHAR(aText
[-1]) &&
129 i
+ 1 < aLength
&& IS_CJ_CHAR(aText
[1])) {
130 // Discard newlines between CJK chars.
131 // XXX this really requires more context to get right!
132 aSkipChars
->SkipChar();
135 nowInWhitespace
= PR_TRUE
;
137 nowInWhitespace
= ch
== '\t';
140 if (!nowInWhitespace
) {
141 if (IsDiscardable(ch
, &flags
)) {
142 aSkipChars
->SkipChar();
143 nowInWhitespace
= inWhitespace
;
146 aSkipChars
->KeepChar();
147 lastCharArabic
= IS_ARABIC_CHAR(ch
);
151 aSkipChars
->SkipChar();
154 flags
|= TEXT_WAS_TRANSFORMED
;
157 aSkipChars
->KeepChar();
160 inWhitespace
= nowInWhitespace
;
162 if (lastCharArabic
) {
163 *aIncomingFlags
|= INCOMING_ARABICCHAR
;
165 *aIncomingFlags
&= ~INCOMING_ARABICCHAR
;
168 *aIncomingFlags
|= INCOMING_WHITESPACE
;
170 *aIncomingFlags
&= ~INCOMING_WHITESPACE
;
174 if (outputStart
+ aLength
!= aOutput
) {
175 flags
|= TEXT_WAS_TRANSFORMED
;
177 *aAnalysisFlags
= flags
;
182 nsTextFrameUtils::TransformText(const PRUint8
* aText
, PRUint32 aLength
,
184 CompressionMode aCompression
,
185 PRUint8
* aIncomingFlags
,
186 gfxSkipCharsBuilder
* aSkipChars
,
187 PRUint32
* aAnalysisFlags
)
190 PRUint8
* outputStart
= aOutput
;
192 if (aCompression
== COMPRESS_NONE
) {
193 // Skip discardables.
195 for (i
= 0; i
< aLength
; ++i
) {
196 PRUint8 ch
= *aText
++;
197 if (IsDiscardable(ch
, &flags
)) {
198 aSkipChars
->SkipChar();
200 aSkipChars
->KeepChar();
202 flags
|= TEXT_HAS_TAB
;
207 *aIncomingFlags
&= ~(INCOMING_ARABICCHAR
| INCOMING_WHITESPACE
);
209 PRBool inWhitespace
= (*aIncomingFlags
& INCOMING_WHITESPACE
) != 0;
211 for (i
= 0; i
< aLength
; ++i
) {
212 PRUint8 ch
= *aText
++;
213 PRBool nowInWhitespace
= ch
== ' ' || ch
== '\t' ||
214 (ch
== '\n' && aCompression
== COMPRESS_WHITESPACE_NEWLINE
);
215 if (!nowInWhitespace
) {
216 if (IsDiscardable(ch
, &flags
)) {
217 aSkipChars
->SkipChar();
218 nowInWhitespace
= inWhitespace
;
221 aSkipChars
->KeepChar();
225 aSkipChars
->SkipChar();
228 flags
|= TEXT_WAS_TRANSFORMED
;
231 aSkipChars
->KeepChar();
234 inWhitespace
= nowInWhitespace
;
236 *aIncomingFlags
&= ~INCOMING_ARABICCHAR
;
238 *aIncomingFlags
|= INCOMING_WHITESPACE
;
240 *aIncomingFlags
&= ~INCOMING_WHITESPACE
;
244 if (outputStart
+ aLength
!= aOutput
) {
245 flags
|= TEXT_WAS_TRANSFORMED
;
247 *aAnalysisFlags
= flags
;
251 PRBool
nsSkipCharsRunIterator::NextRun() {
254 mIterator
.AdvanceOriginal(mRunLength
);
255 NS_ASSERTION(mRunLength
> 0, "No characters in run (initial length too large?)");
256 if (!mSkipped
|| mLengthIncludesSkipped
) {
257 mRemainingLength
-= mRunLength
;
260 if (!mRemainingLength
)
263 mSkipped
= mIterator
.IsOriginalCharSkipped(&length
);
264 mRunLength
= PR_MIN(length
, mRemainingLength
);
265 } while (!mVisitSkipped
&& mSkipped
);