1 // Scintilla source code edit control
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
30 #include "SplitVector.h"
31 #include "Partitioning.h"
32 #include "RunStyles.h"
33 #include "CellBuffer.h"
35 #include "CharClassify.h"
36 #include "Decoration.h"
37 #include "CaseFolder.h"
40 #include "UniConversion.h"
41 #include "UnicodeFromUTF8.h"
44 using namespace Scintilla
;
47 static inline bool IsPunctuation(char ch
) {
48 return IsASCII(ch
) && ispunct(ch
);
51 void LexInterface::Colourise(int start
, int end
) {
52 if (pdoc
&& instance
&& !performingStyle
) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle
= true;
58 int lengthDoc
= pdoc
->Length();
61 int len
= end
- start
;
63 PLATFORM_ASSERT(len
>= 0);
64 PLATFORM_ASSERT(start
+ len
<= lengthDoc
);
68 styleStart
= pdoc
->StyleAt(start
- 1);
71 instance
->Lex(start
, len
, styleStart
, pdoc
);
72 instance
->Fold(start
, len
, styleStart
, pdoc
);
75 performingStyle
= false;
79 int LexInterface::LineEndTypesSupported() {
81 int interfaceVersion
= instance
->Version();
82 if (interfaceVersion
>= lvSubStyles
) {
83 ILexerWithSubStyles
*ssinstance
= static_cast<ILexerWithSubStyles
*>(instance
);
84 return ssinstance
->LineEndTypesSupported();
90 Document::Document() {
94 eolMode
= SC_EOL_CRLF
;
99 lineEndBitSet
= SC_LINE_END_TYPE_DEFAULT
;
102 enteredModification
= 0;
104 enteredReadOnlyCount
= 0;
105 insertionSet
= false;
108 actualIndentInChars
= 8;
111 backspaceUnindents
= false;
112 durationStyleOneLine
= 0.00001;
114 matchesValid
= false;
117 UTF8BytesOfLeadInitialise();
119 perLineData
[ldMarkers
] = new LineMarkers();
120 perLineData
[ldLevels
] = new LineLevels();
121 perLineData
[ldState
] = new LineState();
122 perLineData
[ldMargin
] = new LineAnnotation();
123 perLineData
[ldAnnotation
] = new LineAnnotation();
130 Document::~Document() {
131 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
132 it
->watcher
->NotifyDeleted(this, it
->userData
);
134 for (int j
=0; j
<ldSize
; j
++) {
135 delete perLineData
[j
];
146 void Document::Init() {
147 for (int j
=0; j
<ldSize
; j
++) {
149 perLineData
[j
]->Init();
153 int Document::LineEndTypesSupported() const {
154 if ((SC_CP_UTF8
== dbcsCodePage
) && pli
)
155 return pli
->LineEndTypesSupported();
160 bool Document::SetDBCSCodePage(int dbcsCodePage_
) {
161 if (dbcsCodePage
!= dbcsCodePage_
) {
162 dbcsCodePage
= dbcsCodePage_
;
164 cb
.SetLineEndTypes(lineEndBitSet
& LineEndTypesSupported());
171 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_
) {
172 if (lineEndBitSet
!= lineEndBitSet_
) {
173 lineEndBitSet
= lineEndBitSet_
;
174 int lineEndBitSetActive
= lineEndBitSet
& LineEndTypesSupported();
175 if (lineEndBitSetActive
!= cb
.GetLineEndTypes()) {
177 cb
.SetLineEndTypes(lineEndBitSetActive
);
187 void Document::InsertLine(int line
) {
188 for (int j
=0; j
<ldSize
; j
++) {
190 perLineData
[j
]->InsertLine(line
);
194 void Document::RemoveLine(int line
) {
195 for (int j
=0; j
<ldSize
; j
++) {
197 perLineData
[j
]->RemoveLine(line
);
201 // Increase reference count and return its previous value.
202 int Document::AddRef() {
206 // Decrease reference count and return its previous value.
207 // Delete the document if reference count reaches zero.
208 int SCI_METHOD
Document::Release() {
209 int curRefCount
= --refCount
;
210 if (curRefCount
== 0)
215 void Document::SetSavePoint() {
217 NotifySavePoint(true);
220 void Document::TentativeUndo() {
221 if (!TentativeActive())
224 if (enteredModification
== 0) {
225 enteredModification
++;
226 if (!cb
.IsReadOnly()) {
227 bool startSavePoint
= cb
.IsSavePoint();
228 bool multiLine
= false;
229 int steps
= cb
.TentativeSteps();
230 //Platform::DebugPrintf("Steps=%d\n", steps);
231 for (int step
= 0; step
< steps
; step
++) {
232 const int prevLinesTotal
= LinesTotal();
233 const Action
&action
= cb
.GetUndoStep();
234 if (action
.at
== removeAction
) {
235 NotifyModified(DocModification(
236 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
237 } else if (action
.at
== containerAction
) {
238 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
239 dm
.token
= action
.position
;
242 NotifyModified(DocModification(
243 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
245 cb
.PerformUndoStep();
246 if (action
.at
!= containerAction
) {
247 ModifiedAt(action
.position
);
250 int modFlags
= SC_PERFORMED_UNDO
;
251 // With undo, an insertion action becomes a deletion notification
252 if (action
.at
== removeAction
) {
253 modFlags
|= SC_MOD_INSERTTEXT
;
254 } else if (action
.at
== insertAction
) {
255 modFlags
|= SC_MOD_DELETETEXT
;
258 modFlags
|= SC_MULTISTEPUNDOREDO
;
259 const int linesAdded
= LinesTotal() - prevLinesTotal
;
262 if (step
== steps
- 1) {
263 modFlags
|= SC_LASTSTEPINUNDOREDO
;
265 modFlags
|= SC_MULTILINEUNDOREDO
;
267 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
268 linesAdded
, action
.data
));
271 bool endSavePoint
= cb
.IsSavePoint();
272 if (startSavePoint
!= endSavePoint
)
273 NotifySavePoint(endSavePoint
);
275 cb
.TentativeCommit();
277 enteredModification
--;
281 int Document::GetMark(int line
) {
282 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkValue(line
);
285 int Document::MarkerNext(int lineStart
, int mask
) const {
286 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkerNext(lineStart
, mask
);
289 int Document::AddMark(int line
, int markerNum
) {
290 if (line
>= 0 && line
<= LinesTotal()) {
291 int prev
= static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
292 AddMark(line
, markerNum
, LinesTotal());
293 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
301 void Document::AddMarkSet(int line
, int valueSet
) {
302 if (line
< 0 || line
> LinesTotal()) {
305 unsigned int m
= valueSet
;
306 for (int i
= 0; m
; i
++, m
>>= 1)
308 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
309 AddMark(line
, i
, LinesTotal());
310 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
314 void Document::DeleteMark(int line
, int markerNum
) {
315 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, false);
316 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
320 void Document::DeleteMarkFromHandle(int markerHandle
) {
321 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMarkFromHandle(markerHandle
);
322 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
327 void Document::DeleteAllMarks(int markerNum
) {
328 bool someChanges
= false;
329 for (int line
= 0; line
< LinesTotal(); line
++) {
330 if (static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, true))
334 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
340 int Document::LineFromHandle(int markerHandle
) {
341 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->LineFromHandle(markerHandle
);
344 Sci_Position SCI_METHOD
Document::LineStart(Sci_Position line
) const {
345 return cb
.LineStart(line
);
348 bool Document::IsLineStartPosition(int position
) const {
349 return LineStart(LineFromPosition(position
)) == position
;
352 Sci_Position SCI_METHOD
Document::LineEnd(Sci_Position line
) const {
353 if (line
>= LinesTotal() - 1) {
354 return LineStart(line
+ 1);
356 int position
= LineStart(line
+ 1);
357 if (SC_CP_UTF8
== dbcsCodePage
) {
358 unsigned char bytes
[] = {
359 static_cast<unsigned char>(cb
.CharAt(position
-3)),
360 static_cast<unsigned char>(cb
.CharAt(position
-2)),
361 static_cast<unsigned char>(cb
.CharAt(position
-1)),
363 if (UTF8IsSeparator(bytes
)) {
364 return position
- UTF8SeparatorLength
;
366 if (UTF8IsNEL(bytes
+1)) {
367 return position
- UTF8NELLength
;
370 position
--; // Back over CR or LF
371 // When line terminator is CR+LF, may need to go back one more
372 if ((position
> LineStart(line
)) && (cb
.CharAt(position
- 1) == '\r')) {
379 void SCI_METHOD
Document::SetErrorStatus(int status
) {
380 // Tell the watchers an error has occurred.
381 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
382 it
->watcher
->NotifyErrorOccurred(this, it
->userData
, status
);
386 Sci_Position SCI_METHOD
Document::LineFromPosition(Sci_Position pos
) const {
387 return cb
.LineFromPosition(pos
);
390 int Document::LineEndPosition(int position
) const {
391 return LineEnd(LineFromPosition(position
));
394 bool Document::IsLineEndPosition(int position
) const {
395 return LineEnd(LineFromPosition(position
)) == position
;
398 bool Document::IsPositionInLineEnd(int position
) const {
399 return position
>= LineEnd(LineFromPosition(position
));
402 int Document::VCHomePosition(int position
) const {
403 int line
= LineFromPosition(position
);
404 int startPosition
= LineStart(line
);
405 int endLine
= LineEnd(line
);
406 int startText
= startPosition
;
407 while (startText
< endLine
&& (cb
.CharAt(startText
) == ' ' || cb
.CharAt(startText
) == '\t'))
409 if (position
== startText
)
410 return startPosition
;
415 int SCI_METHOD
Document::SetLevel(Sci_Position line
, int level
) {
416 int prev
= static_cast<LineLevels
*>(perLineData
[ldLevels
])->SetLevel(line
, level
, LinesTotal());
418 DocModification
mh(SC_MOD_CHANGEFOLD
| SC_MOD_CHANGEMARKER
,
419 LineStart(line
), 0, 0, 0, line
);
420 mh
.foldLevelNow
= level
;
421 mh
.foldLevelPrev
= prev
;
427 int SCI_METHOD
Document::GetLevel(Sci_Position line
) const {
428 return static_cast<LineLevels
*>(perLineData
[ldLevels
])->GetLevel(line
);
431 void Document::ClearLevels() {
432 static_cast<LineLevels
*>(perLineData
[ldLevels
])->ClearLevels();
435 static bool IsSubordinate(int levelStart
, int levelTry
) {
436 if (levelTry
& SC_FOLDLEVELWHITEFLAG
)
439 return (levelStart
& SC_FOLDLEVELNUMBERMASK
) < (levelTry
& SC_FOLDLEVELNUMBERMASK
);
442 int Document::GetLastChild(int lineParent
, int level
, int lastLine
) {
444 level
= GetLevel(lineParent
) & SC_FOLDLEVELNUMBERMASK
;
445 int maxLine
= LinesTotal();
446 int lookLastLine
= (lastLine
!= -1) ? Platform::Minimum(LinesTotal() - 1, lastLine
) : -1;
447 int lineMaxSubord
= lineParent
;
448 while (lineMaxSubord
< maxLine
- 1) {
449 EnsureStyledTo(LineStart(lineMaxSubord
+ 2));
450 if (!IsSubordinate(level
, GetLevel(lineMaxSubord
+ 1)))
452 if ((lookLastLine
!= -1) && (lineMaxSubord
>= lookLastLine
) && !(GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
))
456 if (lineMaxSubord
> lineParent
) {
457 if (level
> (GetLevel(lineMaxSubord
+ 1) & SC_FOLDLEVELNUMBERMASK
)) {
458 // Have chewed up some whitespace that belongs to a parent so seek back
459 if (GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
) {
464 return lineMaxSubord
;
467 int Document::GetFoldParent(int line
) const {
468 int level
= GetLevel(line
) & SC_FOLDLEVELNUMBERMASK
;
469 int lineLook
= line
- 1;
470 while ((lineLook
> 0) && (
471 (!(GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
)) ||
472 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) >= level
))
476 if ((GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
) &&
477 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) < level
)) {
484 void Document::GetHighlightDelimiters(HighlightDelimiter
&highlightDelimiter
, int line
, int lastLine
) {
485 int level
= GetLevel(line
);
486 int lookLastLine
= Platform::Maximum(line
, lastLine
) + 1;
489 int lookLineLevel
= level
;
490 int lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
491 while ((lookLine
> 0) && ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) ||
492 ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
>= (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))))) {
493 lookLineLevel
= GetLevel(--lookLine
);
494 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
497 int beginFoldBlock
= (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) ? lookLine
: GetFoldParent(lookLine
);
498 if (beginFoldBlock
== -1) {
499 highlightDelimiter
.Clear();
503 int endFoldBlock
= GetLastChild(beginFoldBlock
, -1, lookLastLine
);
504 int firstChangeableLineBefore
= -1;
505 if (endFoldBlock
< line
) {
506 lookLine
= beginFoldBlock
- 1;
507 lookLineLevel
= GetLevel(lookLine
);
508 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
509 while ((lookLine
>= 0) && (lookLineLevelNum
>= SC_FOLDLEVELBASE
)) {
510 if (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) {
511 if (GetLastChild(lookLine
, -1, lookLastLine
) == line
) {
512 beginFoldBlock
= lookLine
;
514 firstChangeableLineBefore
= line
- 1;
517 if ((lookLine
> 0) && (lookLineLevelNum
== SC_FOLDLEVELBASE
) && ((GetLevel(lookLine
- 1) & SC_FOLDLEVELNUMBERMASK
) > lookLineLevelNum
))
519 lookLineLevel
= GetLevel(--lookLine
);
520 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
523 if (firstChangeableLineBefore
== -1) {
524 for (lookLine
= line
- 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
525 lookLine
>= beginFoldBlock
;
526 lookLineLevel
= GetLevel(--lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
527 if ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) || (lookLineLevelNum
> (level
& SC_FOLDLEVELNUMBERMASK
))) {
528 firstChangeableLineBefore
= lookLine
;
533 if (firstChangeableLineBefore
== -1)
534 firstChangeableLineBefore
= beginFoldBlock
- 1;
536 int firstChangeableLineAfter
= -1;
537 for (lookLine
= line
+ 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
538 lookLine
<= endFoldBlock
;
539 lookLineLevel
= GetLevel(++lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
540 if ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
< (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))) {
541 firstChangeableLineAfter
= lookLine
;
545 if (firstChangeableLineAfter
== -1)
546 firstChangeableLineAfter
= endFoldBlock
+ 1;
548 highlightDelimiter
.beginFoldBlock
= beginFoldBlock
;
549 highlightDelimiter
.endFoldBlock
= endFoldBlock
;
550 highlightDelimiter
.firstChangeableLineBefore
= firstChangeableLineBefore
;
551 highlightDelimiter
.firstChangeableLineAfter
= firstChangeableLineAfter
;
554 int Document::ClampPositionIntoDocument(int pos
) const {
555 return Platform::Clamp(pos
, 0, Length());
558 bool Document::IsCrLf(int pos
) const {
561 if (pos
>= (Length() - 1))
563 return (cb
.CharAt(pos
) == '\r') && (cb
.CharAt(pos
+ 1) == '\n');
566 int Document::LenChar(int pos
) {
569 } else if (IsCrLf(pos
)) {
571 } else if (SC_CP_UTF8
== dbcsCodePage
) {
572 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
573 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
574 int lengthDoc
= Length();
575 if ((pos
+ widthCharBytes
) > lengthDoc
)
576 return lengthDoc
- pos
;
578 return widthCharBytes
;
579 } else if (dbcsCodePage
) {
580 return IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
586 bool Document::InGoodUTF8(int pos
, int &start
, int &end
) const {
588 while ((trail
>0) && (pos
-trail
< UTF8MaxBytes
) && UTF8IsTrailByte(static_cast<unsigned char>(cb
.CharAt(trail
-1))))
590 start
= (trail
> 0) ? trail
-1 : trail
;
592 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(start
));
593 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
594 if (widthCharBytes
== 1) {
597 int trailBytes
= widthCharBytes
- 1;
598 int len
= pos
- start
;
599 if (len
> trailBytes
)
600 // pos too far from lead
602 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
603 for (int b
=1; b
<widthCharBytes
&& ((start
+b
) < Length()); b
++)
604 charBytes
[b
] = cb
.CharAt(static_cast<int>(start
+b
));
605 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
606 if (utf8status
& UTF8MaskInvalid
)
608 end
= start
+ widthCharBytes
;
613 // Normalise a position so that it is not halfway through a two byte character.
614 // This can occur in two situations -
615 // When lines are terminated with \r\n pairs which should be treated as one character.
616 // When displaying DBCS text such as Japanese.
617 // If moving, move the position in the indicated direction.
618 int Document::MovePositionOutsideChar(int pos
, int moveDir
, bool checkLineEnd
) const {
619 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
620 // If out of range, just return minimum/maximum value.
626 // PLATFORM_ASSERT(pos > 0 && pos < Length());
627 if (checkLineEnd
&& IsCrLf(pos
- 1)) {
635 if (SC_CP_UTF8
== dbcsCodePage
) {
636 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
637 // If ch is not a trail byte then pos is valid intercharacter position
638 if (UTF8IsTrailByte(ch
)) {
641 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
642 // ch is a trail byte within a UTF-8 character
648 // Else invalid UTF-8 so return position of isolated trail byte
651 // Anchor DBCS calculations at start of line because start of line can
652 // not be a DBCS trail byte.
653 int posStartLine
= LineStart(LineFromPosition(pos
));
654 if (pos
== posStartLine
)
657 // Step back until a non-lead-byte is found.
659 while ((posCheck
> posStartLine
) && IsDBCSLeadByte(cb
.CharAt(posCheck
-1)))
662 // Check from known start of character.
663 while (posCheck
< pos
) {
664 int mbsize
= IsDBCSLeadByte(cb
.CharAt(posCheck
)) ? 2 : 1;
665 if (posCheck
+ mbsize
== pos
) {
667 } else if (posCheck
+ mbsize
> pos
) {
669 return posCheck
+ mbsize
;
682 // NextPosition moves between valid positions - it can not handle a position in the middle of a
683 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
684 // A \r\n pair is treated as two characters.
685 int Document::NextPosition(int pos
, int moveDir
) const {
686 // If out of range, just return minimum/maximum value.
687 int increment
= (moveDir
> 0) ? 1 : -1;
688 if (pos
+ increment
<= 0)
690 if (pos
+ increment
>= Length())
694 if (SC_CP_UTF8
== dbcsCodePage
) {
695 if (increment
== 1) {
696 // Simple forward movement case so can avoid some checks
697 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
698 if (UTF8IsAscii(leadByte
)) {
699 // Single byte character or invalid
702 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
703 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
704 for (int b
=1; b
<widthCharBytes
; b
++)
705 charBytes
[b
] = cb
.CharAt(static_cast<int>(pos
+b
));
706 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
707 if (utf8status
& UTF8MaskInvalid
)
710 pos
+= utf8status
& UTF8MaskWidth
;
713 // Examine byte before position
715 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
716 // If ch is not a trail byte then pos is valid intercharacter position
717 if (UTF8IsTrailByte(ch
)) {
718 // If ch is a trail byte in a valid UTF-8 character then return start of character
721 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
724 // Else invalid UTF-8 so return position of isolated trail byte
729 int mbsize
= IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
734 // Anchor DBCS calculations at start of line because start of line can
735 // not be a DBCS trail byte.
736 int posStartLine
= LineStart(LineFromPosition(pos
));
737 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
738 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
739 if ((pos
- 1) <= posStartLine
) {
741 } else if (IsDBCSLeadByte(cb
.CharAt(pos
- 1))) {
742 // Must actually be trail byte
745 // Otherwise, step back until a non-lead-byte is found.
746 int posTemp
= pos
- 1;
747 while (posStartLine
<= --posTemp
&& IsDBCSLeadByte(cb
.CharAt(posTemp
)))
749 // Now posTemp+1 must point to the beginning of a character,
750 // so figure out whether we went back an even or an odd
751 // number of bytes and go back 1 or 2 bytes, respectively.
752 return (pos
- 1 - ((pos
- posTemp
) & 1));
763 bool Document::NextCharacter(int &pos
, int moveDir
) const {
764 // Returns true if pos changed
765 int posNext
= NextPosition(pos
, moveDir
);
766 if (posNext
== pos
) {
774 // Return -1 on out-of-bounds
775 Sci_Position SCI_METHOD
Document::GetRelativePosition(Sci_Position positionStart
, Sci_Position characterOffset
) const {
776 int pos
= positionStart
;
778 const int increment
= (characterOffset
> 0) ? 1 : -1;
779 while (characterOffset
!= 0) {
780 const int posNext
= NextPosition(pos
, increment
);
782 return INVALID_POSITION
;
784 characterOffset
-= increment
;
787 pos
= positionStart
+ characterOffset
;
788 if ((pos
< 0) || (pos
> Length()))
789 return INVALID_POSITION
;
794 int Document::GetRelativePositionUTF16(int positionStart
, int characterOffset
) const {
795 int pos
= positionStart
;
797 const int increment
= (characterOffset
> 0) ? 1 : -1;
798 while (characterOffset
!= 0) {
799 const int posNext
= NextPosition(pos
, increment
);
801 return INVALID_POSITION
;
802 if (abs(pos
-posNext
) > 3) // 4 byte character = 2*UTF16.
803 characterOffset
-= increment
;
805 characterOffset
-= increment
;
808 pos
= positionStart
+ characterOffset
;
809 if ((pos
< 0) || (pos
> Length()))
810 return INVALID_POSITION
;
815 int SCI_METHOD
Document::GetCharacterAndWidth(Sci_Position position
, Sci_Position
*pWidth
) const {
817 int bytesInCharacter
= 1;
819 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(position
));
820 if (SC_CP_UTF8
== dbcsCodePage
) {
821 if (UTF8IsAscii(leadByte
)) {
822 // Single byte character or invalid
823 character
= leadByte
;
825 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
826 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
827 for (int b
=1; b
<widthCharBytes
; b
++)
828 charBytes
[b
] = static_cast<unsigned char>(cb
.CharAt(position
+b
));
829 int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
830 if (utf8status
& UTF8MaskInvalid
) {
831 // Report as singleton surrogate values which are invalid Unicode
832 character
= 0xDC80 + leadByte
;
834 bytesInCharacter
= utf8status
& UTF8MaskWidth
;
835 character
= UnicodeFromUTF8(charBytes
);
839 if (IsDBCSLeadByte(leadByte
)) {
840 bytesInCharacter
= 2;
841 character
= (leadByte
<< 8) | static_cast<unsigned char>(cb
.CharAt(position
+1));
843 character
= leadByte
;
847 character
= cb
.CharAt(position
);
850 *pWidth
= bytesInCharacter
;
855 int SCI_METHOD
Document::CodePage() const {
859 bool SCI_METHOD
Document::IsDBCSLeadByte(char ch
) const {
860 // Byte ranges found in Wikipedia articles with relevant search strings in each case
861 unsigned char uch
= static_cast<unsigned char>(ch
);
862 switch (dbcsCodePage
) {
865 return ((uch
>= 0x81) && (uch
<= 0x9F)) ||
866 ((uch
>= 0xE0) && (uch
<= 0xFC));
867 // Lead bytes F0 to FC may be a Microsoft addition.
870 return (uch
>= 0x81) && (uch
<= 0xFE);
872 // Korean Wansung KS C-5601-1987
873 return (uch
>= 0x81) && (uch
<= 0xFE);
876 return (uch
>= 0x81) && (uch
<= 0xFE);
878 // Korean Johab KS C-5601-1992
880 ((uch
>= 0x84) && (uch
<= 0xD3)) ||
881 ((uch
>= 0xD8) && (uch
<= 0xDE)) ||
882 ((uch
>= 0xE0) && (uch
<= 0xF9));
887 static inline bool IsSpaceOrTab(int ch
) {
888 return ch
== ' ' || ch
== '\t';
891 // Need to break text into segments near lengthSegment but taking into
892 // account the encoding to not break inside a UTF-8 or DBCS character
893 // and also trying to avoid breaking inside a pair of combining characters.
894 // The segment length must always be long enough (more than 4 bytes)
895 // so that there will be at least one whole character to make a segment.
896 // For UTF-8, text must consist only of valid whole characters.
897 // In preference order from best to worst:
898 // 1) Break after space
899 // 2) Break before punctuation
900 // 3) Break after whole character
902 int Document::SafeSegment(const char *text
, int length
, int lengthSegment
) const {
903 if (length
<= lengthSegment
)
905 int lastSpaceBreak
= -1;
906 int lastPunctuationBreak
= -1;
907 int lastEncodingAllowedBreak
= 0;
908 for (int j
=0; j
< lengthSegment
;) {
909 unsigned char ch
= static_cast<unsigned char>(text
[j
]);
911 if (IsSpaceOrTab(text
[j
- 1]) && !IsSpaceOrTab(text
[j
])) {
915 lastPunctuationBreak
= j
;
918 lastEncodingAllowedBreak
= j
;
920 if (dbcsCodePage
== SC_CP_UTF8
) {
921 j
+= UTF8BytesOfLead
[ch
];
922 } else if (dbcsCodePage
) {
923 j
+= IsDBCSLeadByte(ch
) ? 2 : 1;
928 if (lastSpaceBreak
>= 0) {
929 return lastSpaceBreak
;
930 } else if (lastPunctuationBreak
>= 0) {
931 return lastPunctuationBreak
;
933 return lastEncodingAllowedBreak
;
936 EncodingFamily
Document::CodePageFamily() const {
937 if (SC_CP_UTF8
== dbcsCodePage
)
939 else if (dbcsCodePage
)
945 void Document::ModifiedAt(int pos
) {
950 void Document::CheckReadOnly() {
951 if (cb
.IsReadOnly() && enteredReadOnlyCount
== 0) {
952 enteredReadOnlyCount
++;
953 NotifyModifyAttempt();
954 enteredReadOnlyCount
--;
958 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
959 // SetStyleAt does not change the persistent state of a document
961 bool Document::DeleteChars(int pos
, int len
) {
966 if ((pos
+ len
) > Length())
969 if (enteredModification
!= 0) {
972 enteredModification
++;
973 if (!cb
.IsReadOnly()) {
976 SC_MOD_BEFOREDELETE
| SC_PERFORMED_USER
,
979 int prevLinesTotal
= LinesTotal();
980 bool startSavePoint
= cb
.IsSavePoint();
981 bool startSequence
= false;
982 const char *text
= cb
.DeleteChars(pos
, len
, startSequence
);
983 if (startSavePoint
&& cb
.IsCollectingUndo())
984 NotifySavePoint(!startSavePoint
);
985 if ((pos
< Length()) || (pos
== 0))
991 SC_MOD_DELETETEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
993 LinesTotal() - prevLinesTotal
, text
));
995 enteredModification
--;
997 return !cb
.IsReadOnly();
1001 * Insert a string with a length.
1003 int Document::InsertString(int position
, const char *s
, int insertLength
) {
1004 if (insertLength
<= 0) {
1007 CheckReadOnly(); // Application may change read only state here
1008 if (cb
.IsReadOnly()) {
1011 if (enteredModification
!= 0) {
1014 enteredModification
++;
1015 insertionSet
= false;
1020 position
, insertLength
,
1023 s
= insertion
.c_str();
1024 insertLength
= static_cast<int>(insertion
.length());
1028 SC_MOD_BEFOREINSERT
| SC_PERFORMED_USER
,
1029 position
, insertLength
,
1031 int prevLinesTotal
= LinesTotal();
1032 bool startSavePoint
= cb
.IsSavePoint();
1033 bool startSequence
= false;
1034 const char *text
= cb
.InsertString(position
, s
, insertLength
, startSequence
);
1035 if (startSavePoint
&& cb
.IsCollectingUndo())
1036 NotifySavePoint(!startSavePoint
);
1037 ModifiedAt(position
);
1040 SC_MOD_INSERTTEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
1041 position
, insertLength
,
1042 LinesTotal() - prevLinesTotal
, text
));
1043 if (insertionSet
) { // Free memory as could be large
1044 std::string().swap(insertion
);
1046 enteredModification
--;
1047 return insertLength
;
1050 void Document::ChangeInsertion(const char *s
, int length
) {
1051 insertionSet
= true;
1052 insertion
.assign(s
, length
);
1055 int SCI_METHOD
Document::AddData(char *data
, Sci_Position length
) {
1057 int position
= Length();
1058 InsertString(position
, data
, length
);
1059 } catch (std::bad_alloc
&) {
1060 return SC_STATUS_BADALLOC
;
1062 return SC_STATUS_FAILURE
;
1067 void * SCI_METHOD
Document::ConvertToDocument() {
1071 int Document::Undo() {
1074 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1075 enteredModification
++;
1076 if (!cb
.IsReadOnly()) {
1077 bool startSavePoint
= cb
.IsSavePoint();
1078 bool multiLine
= false;
1079 int steps
= cb
.StartUndo();
1080 //Platform::DebugPrintf("Steps=%d\n", steps);
1081 int coalescedRemovePos
= -1;
1082 int coalescedRemoveLen
= 0;
1083 int prevRemoveActionPos
= -1;
1084 int prevRemoveActionLen
= 0;
1085 for (int step
= 0; step
< steps
; step
++) {
1086 const int prevLinesTotal
= LinesTotal();
1087 const Action
&action
= cb
.GetUndoStep();
1088 if (action
.at
== removeAction
) {
1089 NotifyModified(DocModification(
1090 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
1091 } else if (action
.at
== containerAction
) {
1092 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
1093 dm
.token
= action
.position
;
1095 if (!action
.mayCoalesce
) {
1096 coalescedRemovePos
= -1;
1097 coalescedRemoveLen
= 0;
1098 prevRemoveActionPos
= -1;
1099 prevRemoveActionLen
= 0;
1102 NotifyModified(DocModification(
1103 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
1105 cb
.PerformUndoStep();
1106 if (action
.at
!= containerAction
) {
1107 ModifiedAt(action
.position
);
1108 newPos
= action
.position
;
1111 int modFlags
= SC_PERFORMED_UNDO
;
1112 // With undo, an insertion action becomes a deletion notification
1113 if (action
.at
== removeAction
) {
1114 newPos
+= action
.lenData
;
1115 modFlags
|= SC_MOD_INSERTTEXT
;
1116 if ((coalescedRemoveLen
> 0) &&
1117 (action
.position
== prevRemoveActionPos
|| action
.position
== (prevRemoveActionPos
+ prevRemoveActionLen
))) {
1118 coalescedRemoveLen
+= action
.lenData
;
1119 newPos
= coalescedRemovePos
+ coalescedRemoveLen
;
1121 coalescedRemovePos
= action
.position
;
1122 coalescedRemoveLen
= action
.lenData
;
1124 prevRemoveActionPos
= action
.position
;
1125 prevRemoveActionLen
= action
.lenData
;
1126 } else if (action
.at
== insertAction
) {
1127 modFlags
|= SC_MOD_DELETETEXT
;
1128 coalescedRemovePos
= -1;
1129 coalescedRemoveLen
= 0;
1130 prevRemoveActionPos
= -1;
1131 prevRemoveActionLen
= 0;
1134 modFlags
|= SC_MULTISTEPUNDOREDO
;
1135 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1136 if (linesAdded
!= 0)
1138 if (step
== steps
- 1) {
1139 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1141 modFlags
|= SC_MULTILINEUNDOREDO
;
1143 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
1144 linesAdded
, action
.data
));
1147 bool endSavePoint
= cb
.IsSavePoint();
1148 if (startSavePoint
!= endSavePoint
)
1149 NotifySavePoint(endSavePoint
);
1151 enteredModification
--;
1156 int Document::Redo() {
1159 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1160 enteredModification
++;
1161 if (!cb
.IsReadOnly()) {
1162 bool startSavePoint
= cb
.IsSavePoint();
1163 bool multiLine
= false;
1164 int steps
= cb
.StartRedo();
1165 for (int step
= 0; step
< steps
; step
++) {
1166 const int prevLinesTotal
= LinesTotal();
1167 const Action
&action
= cb
.GetRedoStep();
1168 if (action
.at
== insertAction
) {
1169 NotifyModified(DocModification(
1170 SC_MOD_BEFOREINSERT
| SC_PERFORMED_REDO
, action
));
1171 } else if (action
.at
== containerAction
) {
1172 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_REDO
);
1173 dm
.token
= action
.position
;
1176 NotifyModified(DocModification(
1177 SC_MOD_BEFOREDELETE
| SC_PERFORMED_REDO
, action
));
1179 cb
.PerformRedoStep();
1180 if (action
.at
!= containerAction
) {
1181 ModifiedAt(action
.position
);
1182 newPos
= action
.position
;
1185 int modFlags
= SC_PERFORMED_REDO
;
1186 if (action
.at
== insertAction
) {
1187 newPos
+= action
.lenData
;
1188 modFlags
|= SC_MOD_INSERTTEXT
;
1189 } else if (action
.at
== removeAction
) {
1190 modFlags
|= SC_MOD_DELETETEXT
;
1193 modFlags
|= SC_MULTISTEPUNDOREDO
;
1194 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1195 if (linesAdded
!= 0)
1197 if (step
== steps
- 1) {
1198 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1200 modFlags
|= SC_MULTILINEUNDOREDO
;
1203 DocModification(modFlags
, action
.position
, action
.lenData
,
1204 linesAdded
, action
.data
));
1207 bool endSavePoint
= cb
.IsSavePoint();
1208 if (startSavePoint
!= endSavePoint
)
1209 NotifySavePoint(endSavePoint
);
1211 enteredModification
--;
1216 void Document::DelChar(int pos
) {
1217 DeleteChars(pos
, LenChar(pos
));
1220 void Document::DelCharBack(int pos
) {
1223 } else if (IsCrLf(pos
- 2)) {
1224 DeleteChars(pos
- 2, 2);
1225 } else if (dbcsCodePage
) {
1226 int startChar
= NextPosition(pos
, -1);
1227 DeleteChars(startChar
, pos
- startChar
);
1229 DeleteChars(pos
- 1, 1);
1233 static int NextTab(int pos
, int tabSize
) {
1234 return ((pos
/ tabSize
) + 1) * tabSize
;
1237 static std::string
CreateIndentation(int indent
, int tabSize
, bool insertSpaces
) {
1238 std::string indentation
;
1239 if (!insertSpaces
) {
1240 while (indent
>= tabSize
) {
1241 indentation
+= '\t';
1245 while (indent
> 0) {
1252 int SCI_METHOD
Document::GetLineIndentation(Sci_Position line
) {
1254 if ((line
>= 0) && (line
< LinesTotal())) {
1255 int lineStart
= LineStart(line
);
1256 int length
= Length();
1257 for (int i
= lineStart
; i
< length
; i
++) {
1258 char ch
= cb
.CharAt(i
);
1261 else if (ch
== '\t')
1262 indent
= NextTab(indent
, tabInChars
);
1270 int Document::SetLineIndentation(int line
, int indent
) {
1271 int indentOfLine
= GetLineIndentation(line
);
1274 if (indent
!= indentOfLine
) {
1275 std::string linebuf
= CreateIndentation(indent
, tabInChars
, !useTabs
);
1276 int thisLineStart
= LineStart(line
);
1277 int indentPos
= GetLineIndentPosition(line
);
1279 DeleteChars(thisLineStart
, indentPos
- thisLineStart
);
1280 return thisLineStart
+ InsertString(thisLineStart
, linebuf
.c_str(),
1281 static_cast<int>(linebuf
.length()));
1283 return GetLineIndentPosition(line
);
1287 int Document::GetLineIndentPosition(int line
) const {
1290 int pos
= LineStart(line
);
1291 int length
= Length();
1292 while ((pos
< length
) && IsSpaceOrTab(cb
.CharAt(pos
))) {
1298 int Document::GetColumn(int pos
) {
1300 int line
= LineFromPosition(pos
);
1301 if ((line
>= 0) && (line
< LinesTotal())) {
1302 for (int i
= LineStart(line
); i
< pos
;) {
1303 char ch
= cb
.CharAt(i
);
1305 column
= NextTab(column
, tabInChars
);
1307 } else if (ch
== '\r') {
1309 } else if (ch
== '\n') {
1311 } else if (i
>= Length()) {
1315 i
= NextPosition(i
, 1);
1322 int Document::CountCharacters(int startPos
, int endPos
) const {
1323 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1324 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1327 while (i
< endPos
) {
1329 i
= NextPosition(i
, 1);
1334 int Document::CountUTF16(int startPos
, int endPos
) const {
1335 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1336 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1339 while (i
< endPos
) {
1341 const int next
= NextPosition(i
, 1);
1349 int Document::FindColumn(int line
, int column
) {
1350 int position
= LineStart(line
);
1351 if ((line
>= 0) && (line
< LinesTotal())) {
1352 int columnCurrent
= 0;
1353 while ((columnCurrent
< column
) && (position
< Length())) {
1354 char ch
= cb
.CharAt(position
);
1356 columnCurrent
= NextTab(columnCurrent
, tabInChars
);
1357 if (columnCurrent
> column
)
1360 } else if (ch
== '\r') {
1362 } else if (ch
== '\n') {
1366 position
= NextPosition(position
, 1);
1373 void Document::Indent(bool forwards
, int lineBottom
, int lineTop
) {
1374 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1375 for (int line
= lineBottom
; line
>= lineTop
; line
--) {
1376 int indentOfLine
= GetLineIndentation(line
);
1378 if (LineStart(line
) < LineEnd(line
)) {
1379 SetLineIndentation(line
, indentOfLine
+ IndentSize());
1382 SetLineIndentation(line
, indentOfLine
- IndentSize());
1387 // Convert line endings for a piece of text to a particular mode.
1388 // Stop at len or when a NUL is found.
1389 std::string
Document::TransformLineEnds(const char *s
, size_t len
, int eolModeWanted
) {
1391 for (size_t i
= 0; (i
< len
) && (s
[i
]); i
++) {
1392 if (s
[i
] == '\n' || s
[i
] == '\r') {
1393 if (eolModeWanted
== SC_EOL_CR
) {
1394 dest
.push_back('\r');
1395 } else if (eolModeWanted
== SC_EOL_LF
) {
1396 dest
.push_back('\n');
1397 } else { // eolModeWanted == SC_EOL_CRLF
1398 dest
.push_back('\r');
1399 dest
.push_back('\n');
1401 if ((s
[i
] == '\r') && (i
+1 < len
) && (s
[i
+1] == '\n')) {
1405 dest
.push_back(s
[i
]);
1411 void Document::ConvertLineEnds(int eolModeSet
) {
1414 for (int pos
= 0; pos
< Length(); pos
++) {
1415 if (cb
.CharAt(pos
) == '\r') {
1416 if (cb
.CharAt(pos
+ 1) == '\n') {
1418 if (eolModeSet
== SC_EOL_CR
) {
1419 DeleteChars(pos
+ 1, 1); // Delete the LF
1420 } else if (eolModeSet
== SC_EOL_LF
) {
1421 DeleteChars(pos
, 1); // Delete the CR
1427 if (eolModeSet
== SC_EOL_CRLF
) {
1428 pos
+= InsertString(pos
+ 1, "\n", 1); // Insert LF
1429 } else if (eolModeSet
== SC_EOL_LF
) {
1430 pos
+= InsertString(pos
, "\n", 1); // Insert LF
1431 DeleteChars(pos
, 1); // Delete CR
1435 } else if (cb
.CharAt(pos
) == '\n') {
1437 if (eolModeSet
== SC_EOL_CRLF
) {
1438 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1439 } else if (eolModeSet
== SC_EOL_CR
) {
1440 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1441 DeleteChars(pos
, 1); // Delete LF
1449 bool Document::IsWhiteLine(int line
) const {
1450 int currentChar
= LineStart(line
);
1451 int endLine
= LineEnd(line
);
1452 while (currentChar
< endLine
) {
1453 if (cb
.CharAt(currentChar
) != ' ' && cb
.CharAt(currentChar
) != '\t') {
1461 int Document::ParaUp(int pos
) const {
1462 int line
= LineFromPosition(pos
);
1464 while (line
>= 0 && IsWhiteLine(line
)) { // skip empty lines
1467 while (line
>= 0 && !IsWhiteLine(line
)) { // skip non-empty lines
1471 return LineStart(line
);
1474 int Document::ParaDown(int pos
) const {
1475 int line
= LineFromPosition(pos
);
1476 while (line
< LinesTotal() && !IsWhiteLine(line
)) { // skip non-empty lines
1479 while (line
< LinesTotal() && IsWhiteLine(line
)) { // skip empty lines
1482 if (line
< LinesTotal())
1483 return LineStart(line
);
1484 else // end of a document
1485 return LineEnd(line
-1);
1488 CharClassify::cc
Document::WordCharClass(unsigned char ch
) const {
1489 if ((SC_CP_UTF8
== dbcsCodePage
) && (!UTF8IsAscii(ch
)))
1490 return CharClassify::ccWord
;
1491 return charClass
.GetClass(ch
);
1495 * Used by commmands that want to select whole words.
1496 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1498 int Document::ExtendWordSelect(int pos
, int delta
, bool onlyWordCharacters
) {
1499 CharClassify::cc ccStart
= CharClassify::ccWord
;
1501 if (!onlyWordCharacters
)
1502 ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1503 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
))
1506 if (!onlyWordCharacters
&& pos
< Length())
1507 ccStart
= WordCharClass(cb
.CharAt(pos
));
1508 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1511 return MovePositionOutsideChar(pos
, delta
, true);
1515 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1517 * This is looking for a transition between character classes although there is also some
1518 * additional movement to transit white space.
1519 * Used by cursor movement by word commands.
1521 int Document::NextWordStart(int pos
, int delta
) {
1523 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
))
1526 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1527 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
)) {
1532 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1533 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1535 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
))
1542 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1544 * This is looking for a transition between character classes although there is also some
1545 * additional movement to transit white space.
1546 * Used by cursor movement by word commands.
1548 int Document::NextWordEnd(int pos
, int delta
) {
1551 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1552 if (ccStart
!= CharClassify::ccSpace
) {
1553 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
) {
1557 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
) {
1562 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
) {
1565 if (pos
< Length()) {
1566 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1567 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == ccStart
) {
1576 * Check that the character at the given position is a word or punctuation character and that
1577 * the previous character is of a different character class.
1579 bool Document::IsWordStartAt(int pos
) const {
1581 CharClassify::cc ccPos
= WordCharClass(CharAt(pos
));
1582 return (ccPos
== CharClassify::ccWord
|| ccPos
== CharClassify::ccPunctuation
) &&
1583 (ccPos
!= WordCharClass(CharAt(pos
- 1)));
1589 * Check that the character at the given position is a word or punctuation character and that
1590 * the next character is of a different character class.
1592 bool Document::IsWordEndAt(int pos
) const {
1593 if (pos
< Length()) {
1594 CharClassify::cc ccPrev
= WordCharClass(CharAt(pos
-1));
1595 return (ccPrev
== CharClassify::ccWord
|| ccPrev
== CharClassify::ccPunctuation
) &&
1596 (ccPrev
!= WordCharClass(CharAt(pos
)));
1602 * Check that the given range is has transitions between character classes at both
1603 * ends and where the characters on the inside are word or punctuation characters.
1605 bool Document::IsWordAt(int start
, int end
) const {
1606 return (start
< end
) && IsWordStartAt(start
) && IsWordEndAt(end
);
1609 bool Document::MatchesWordOptions(bool word
, bool wordStart
, int pos
, int length
) const {
1610 return (!word
&& !wordStart
) ||
1611 (word
&& IsWordAt(pos
, pos
+ length
)) ||
1612 (wordStart
&& IsWordStartAt(pos
));
1615 bool Document::HasCaseFolder(void) const {
1619 void Document::SetCaseFolder(CaseFolder
*pcf_
) {
1624 Document::CharacterExtracted
Document::ExtractCharacter(int position
) const {
1625 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(position
));
1626 if (UTF8IsAscii(leadByte
)) {
1627 // Common case: ASCII character
1628 return CharacterExtracted(leadByte
, 1);
1630 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1631 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
1632 for (int b
=1; b
<widthCharBytes
; b
++)
1633 charBytes
[b
] = static_cast<unsigned char>(cb
.CharAt(position
+ b
));
1634 int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
1635 if (utf8status
& UTF8MaskInvalid
) {
1636 // Treat as invalid and use up just one byte
1637 return CharacterExtracted(unicodeReplacementChar
, 1);
1639 return CharacterExtracted(UnicodeFromUTF8(charBytes
), utf8status
& UTF8MaskWidth
);
1644 * Find text in document, supporting both forward and backward
1645 * searches (just pass minPos > maxPos to do a backward search)
1646 * Has not been tested with backwards DBCS searches yet.
1648 long Document::FindText(int minPos
, int maxPos
, const char *search
,
1649 int flags
, int *length
) {
1652 const bool caseSensitive
= (flags
& SCFIND_MATCHCASE
) != 0;
1653 const bool word
= (flags
& SCFIND_WHOLEWORD
) != 0;
1654 const bool wordStart
= (flags
& SCFIND_WORDSTART
) != 0;
1655 const bool regExp
= (flags
& SCFIND_REGEXP
) != 0;
1658 regex
= CreateRegexSearch(&charClass
);
1659 return regex
->FindText(this, minPos
, maxPos
, search
, caseSensitive
, word
, wordStart
, flags
, length
);
1662 const bool forward
= minPos
<= maxPos
;
1663 const int increment
= forward
? 1 : -1;
1665 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1666 const int startPos
= MovePositionOutsideChar(minPos
, increment
, false);
1667 const int endPos
= MovePositionOutsideChar(maxPos
, increment
, false);
1669 // Compute actual search ranges needed
1670 const int lengthFind
= *length
;
1672 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1673 const int limitPos
= Platform::Maximum(startPos
, endPos
);
1676 // Back all of a character
1677 pos
= NextPosition(pos
, increment
);
1679 if (caseSensitive
) {
1680 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1681 const char charStartSearch
= search
[0];
1682 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1683 if (CharAt(pos
) == charStartSearch
) {
1684 bool found
= (pos
+ lengthFind
) <= limitPos
;
1685 for (int indexSearch
= 1; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1686 found
= CharAt(pos
+ indexSearch
) == search
[indexSearch
];
1688 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1692 if (!NextCharacter(pos
, increment
))
1695 } else if (SC_CP_UTF8
== dbcsCodePage
) {
1696 const size_t maxFoldingExpansion
= 4;
1697 std::vector
<char> searchThing(lengthFind
* UTF8MaxBytes
* maxFoldingExpansion
+ 1);
1698 const int lenSearch
= static_cast<int>(
1699 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1700 char bytes
[UTF8MaxBytes
+ 1];
1701 char folded
[UTF8MaxBytes
* maxFoldingExpansion
+ 1];
1702 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1703 int widthFirstCharacter
= 0;
1704 int posIndexDocument
= pos
;
1705 int indexSearch
= 0;
1706 bool characterMatches
= true;
1708 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(posIndexDocument
));
1709 bytes
[0] = leadByte
;
1711 if (!UTF8IsAscii(leadByte
)) {
1712 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1713 for (int b
=1; b
<widthCharBytes
; b
++) {
1714 bytes
[b
] = cb
.CharAt(posIndexDocument
+b
);
1716 widthChar
= UTF8Classify(reinterpret_cast<const unsigned char *>(bytes
), widthCharBytes
) & UTF8MaskWidth
;
1718 if (!widthFirstCharacter
)
1719 widthFirstCharacter
= widthChar
;
1720 if ((posIndexDocument
+ widthChar
) > limitPos
)
1722 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1723 folded
[lenFlat
] = 0;
1724 // Does folded match the buffer
1725 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1726 if (!characterMatches
)
1728 posIndexDocument
+= widthChar
;
1729 indexSearch
+= lenFlat
;
1730 if (indexSearch
>= lenSearch
)
1733 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1734 if (MatchesWordOptions(word
, wordStart
, pos
, posIndexDocument
- pos
)) {
1735 *length
= posIndexDocument
- pos
;
1740 pos
+= widthFirstCharacter
;
1742 if (!NextCharacter(pos
, increment
))
1746 } else if (dbcsCodePage
) {
1747 const size_t maxBytesCharacter
= 2;
1748 const size_t maxFoldingExpansion
= 4;
1749 std::vector
<char> searchThing(lengthFind
* maxBytesCharacter
* maxFoldingExpansion
+ 1);
1750 const int lenSearch
= static_cast<int>(
1751 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1752 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1753 int indexDocument
= 0;
1754 int indexSearch
= 0;
1755 bool characterMatches
= true;
1756 while (characterMatches
&&
1757 ((pos
+ indexDocument
) < limitPos
) &&
1758 (indexSearch
< lenSearch
)) {
1759 char bytes
[maxBytesCharacter
+ 1];
1760 bytes
[0] = cb
.CharAt(pos
+ indexDocument
);
1761 const int widthChar
= IsDBCSLeadByte(bytes
[0]) ? 2 : 1;
1763 bytes
[1] = cb
.CharAt(pos
+ indexDocument
+ 1);
1764 if ((pos
+ indexDocument
+ widthChar
) > limitPos
)
1766 char folded
[maxBytesCharacter
* maxFoldingExpansion
+ 1];
1767 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1768 folded
[lenFlat
] = 0;
1769 // Does folded match the buffer
1770 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1771 indexDocument
+= widthChar
;
1772 indexSearch
+= lenFlat
;
1774 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1775 if (MatchesWordOptions(word
, wordStart
, pos
, indexDocument
)) {
1776 *length
= indexDocument
;
1780 if (!NextCharacter(pos
, increment
))
1784 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1785 std::vector
<char> searchThing(lengthFind
+ 1);
1786 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
1787 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1788 bool found
= (pos
+ lengthFind
) <= limitPos
;
1789 for (int indexSearch
= 0; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1790 char ch
= CharAt(pos
+ indexSearch
);
1792 pcf
->Fold(folded
, sizeof(folded
), &ch
, 1);
1793 found
= folded
[0] == searchThing
[indexSearch
];
1795 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1798 if (!NextCharacter(pos
, increment
))
1803 //Platform::DebugPrintf("Not found\n");
1807 const char *Document::SubstituteByPosition(const char *text
, int *length
) {
1809 return regex
->SubstituteByPosition(this, text
, length
);
1814 int Document::LinesTotal() const {
1818 void Document::SetDefaultCharClasses(bool includeWordClass
) {
1819 charClass
.SetDefaultCharClasses(includeWordClass
);
1822 void Document::SetCharClasses(const unsigned char *chars
, CharClassify::cc newCharClass
) {
1823 charClass
.SetCharClasses(chars
, newCharClass
);
1826 int Document::GetCharsOfClass(CharClassify::cc characterClass
, unsigned char *buffer
) {
1827 return charClass
.GetCharsOfClass(characterClass
, buffer
);
1830 void SCI_METHOD
Document::StartStyling(Sci_Position position
, char) {
1831 endStyled
= position
;
1834 bool SCI_METHOD
Document::SetStyleFor(Sci_Position length
, char style
) {
1835 if (enteredStyling
!= 0) {
1839 int prevEndStyled
= endStyled
;
1840 if (cb
.SetStyleFor(endStyled
, length
, style
)) {
1841 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1842 prevEndStyled
, length
);
1845 endStyled
+= length
;
1851 bool SCI_METHOD
Document::SetStyles(Sci_Position length
, const char *styles
) {
1852 if (enteredStyling
!= 0) {
1856 bool didChange
= false;
1859 for (int iPos
= 0; iPos
< length
; iPos
++, endStyled
++) {
1860 PLATFORM_ASSERT(endStyled
< Length());
1861 if (cb
.SetStyleAt(endStyled
, styles
[iPos
])) {
1863 startMod
= endStyled
;
1870 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1871 startMod
, endMod
- startMod
+ 1);
1879 void Document::EnsureStyledTo(int pos
) {
1880 if ((enteredStyling
== 0) && (pos
> GetEndStyled())) {
1881 IncrementStyleClock();
1882 if (pli
&& !pli
->UseContainerLexing()) {
1883 int lineEndStyled
= LineFromPosition(GetEndStyled());
1884 int endStyledTo
= LineStart(lineEndStyled
);
1885 pli
->Colourise(endStyledTo
, pos
);
1887 // Ask the watchers to style, and stop as soon as one responds.
1888 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin();
1889 (pos
> GetEndStyled()) && (it
!= watchers
.end()); ++it
) {
1890 it
->watcher
->NotifyStyleNeeded(this, it
->userData
, pos
);
1896 void Document::StyleToAdjustingLineDuration(int pos
) {
1897 // Place bounds on the duration used to avoid glitches spiking it
1898 // and so causing slow styling or non-responsive scrolling
1899 const double minDurationOneLine
= 0.000001;
1900 const double maxDurationOneLine
= 0.0001;
1902 // Alpha value for exponential smoothing.
1903 // Most recent value contributes 25% to smoothed value.
1904 const double alpha
= 0.25;
1906 const Sci_Position lineFirst
= LineFromPosition(GetEndStyled());
1907 ElapsedTime etStyling
;
1908 EnsureStyledTo(pos
);
1909 const double durationStyling
= etStyling
.Duration();
1910 const Sci_Position lineLast
= LineFromPosition(GetEndStyled());
1911 if (lineLast
>= lineFirst
+ 8) {
1912 // Only adjust for styling multiple lines to avoid instability
1913 const double durationOneLine
= durationStyling
/ (lineLast
- lineFirst
);
1914 durationStyleOneLine
= alpha
* durationOneLine
+ (1.0 - alpha
) * durationStyleOneLine
;
1915 if (durationStyleOneLine
< minDurationOneLine
) {
1916 durationStyleOneLine
= minDurationOneLine
;
1917 } else if (durationStyleOneLine
> maxDurationOneLine
) {
1918 durationStyleOneLine
= maxDurationOneLine
;
1923 void Document::LexerChanged() {
1924 // Tell the watchers the lexer has changed.
1925 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1926 it
->watcher
->NotifyLexerChanged(this, it
->userData
);
1930 int SCI_METHOD
Document::SetLineState(Sci_Position line
, int state
) {
1931 int statePrevious
= static_cast<LineState
*>(perLineData
[ldState
])->SetLineState(line
, state
);
1932 if (state
!= statePrevious
) {
1933 DocModification
mh(SC_MOD_CHANGELINESTATE
, LineStart(line
), 0, 0, 0, line
);
1936 return statePrevious
;
1939 int SCI_METHOD
Document::GetLineState(Sci_Position line
) const {
1940 return static_cast<LineState
*>(perLineData
[ldState
])->GetLineState(line
);
1943 int Document::GetMaxLineState() {
1944 return static_cast<LineState
*>(perLineData
[ldState
])->GetMaxLineState();
1947 void SCI_METHOD
Document::ChangeLexerState(Sci_Position start
, Sci_Position end
) {
1948 DocModification
mh(SC_MOD_LEXERSTATE
, start
, end
-start
, 0, 0, 0);
1952 StyledText
Document::MarginStyledText(int line
) const {
1953 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldMargin
]);
1954 return StyledText(pla
->Length(line
), pla
->Text(line
),
1955 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1958 void Document::MarginSetText(int line
, const char *text
) {
1959 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetText(line
, text
);
1960 DocModification
mh(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
);
1964 void Document::MarginSetStyle(int line
, int style
) {
1965 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyle(line
, style
);
1966 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1969 void Document::MarginSetStyles(int line
, const unsigned char *styles
) {
1970 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyles(line
, styles
);
1971 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1974 void Document::MarginClearAll() {
1975 int maxEditorLine
= LinesTotal();
1976 for (int l
=0; l
<maxEditorLine
; l
++)
1977 MarginSetText(l
, 0);
1978 // Free remaining data
1979 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->ClearAll();
1982 StyledText
Document::AnnotationStyledText(int line
) const {
1983 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
]);
1984 return StyledText(pla
->Length(line
), pla
->Text(line
),
1985 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1988 void Document::AnnotationSetText(int line
, const char *text
) {
1989 if (line
>= 0 && line
< LinesTotal()) {
1990 const int linesBefore
= AnnotationLines(line
);
1991 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetText(line
, text
);
1992 const int linesAfter
= AnnotationLines(line
);
1993 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
1994 mh
.annotationLinesAdded
= linesAfter
- linesBefore
;
1999 void Document::AnnotationSetStyle(int line
, int style
) {
2000 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyle(line
, style
);
2001 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
2005 void Document::AnnotationSetStyles(int line
, const unsigned char *styles
) {
2006 if (line
>= 0 && line
< LinesTotal()) {
2007 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyles(line
, styles
);
2011 int Document::AnnotationLines(int line
) const {
2012 return static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->Lines(line
);
2015 void Document::AnnotationClearAll() {
2016 int maxEditorLine
= LinesTotal();
2017 for (int l
=0; l
<maxEditorLine
; l
++)
2018 AnnotationSetText(l
, 0);
2019 // Free remaining data
2020 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->ClearAll();
2023 void Document::IncrementStyleClock() {
2024 styleClock
= (styleClock
+ 1) % 0x100000;
2027 void SCI_METHOD
Document::DecorationFillRange(Sci_Position position
, int value
, Sci_Position fillLength
) {
2028 if (decorations
.FillRange(position
, value
, fillLength
)) {
2029 DocModification
mh(SC_MOD_CHANGEINDICATOR
| SC_PERFORMED_USER
,
2030 position
, fillLength
);
2035 bool Document::AddWatcher(DocWatcher
*watcher
, void *userData
) {
2036 WatcherWithUserData
wwud(watcher
, userData
);
2037 std::vector
<WatcherWithUserData
>::iterator it
=
2038 std::find(watchers
.begin(), watchers
.end(), wwud
);
2039 if (it
!= watchers
.end())
2041 watchers
.push_back(wwud
);
2045 bool Document::RemoveWatcher(DocWatcher
*watcher
, void *userData
) {
2046 std::vector
<WatcherWithUserData
>::iterator it
=
2047 std::find(watchers
.begin(), watchers
.end(), WatcherWithUserData(watcher
, userData
));
2048 if (it
!= watchers
.end()) {
2055 void Document::NotifyModifyAttempt() {
2056 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2057 it
->watcher
->NotifyModifyAttempt(this, it
->userData
);
2061 void Document::NotifySavePoint(bool atSavePoint
) {
2062 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2063 it
->watcher
->NotifySavePoint(this, it
->userData
, atSavePoint
);
2067 void Document::NotifyModified(DocModification mh
) {
2068 if (mh
.modificationType
& SC_MOD_INSERTTEXT
) {
2069 decorations
.InsertSpace(mh
.position
, mh
.length
);
2070 } else if (mh
.modificationType
& SC_MOD_DELETETEXT
) {
2071 decorations
.DeleteRange(mh
.position
, mh
.length
);
2073 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2074 it
->watcher
->NotifyModified(this, mh
, it
->userData
);
2078 bool Document::IsWordPartSeparator(char ch
) const {
2079 return (WordCharClass(ch
) == CharClassify::ccWord
) && IsPunctuation(ch
);
2082 int Document::WordPartLeft(int pos
) {
2085 char startChar
= cb
.CharAt(pos
);
2086 if (IsWordPartSeparator(startChar
)) {
2087 while (pos
> 0 && IsWordPartSeparator(cb
.CharAt(pos
))) {
2092 startChar
= cb
.CharAt(pos
);
2094 if (IsLowerCase(startChar
)) {
2095 while (pos
> 0 && IsLowerCase(cb
.CharAt(pos
)))
2097 if (!IsUpperCase(cb
.CharAt(pos
)) && !IsLowerCase(cb
.CharAt(pos
)))
2099 } else if (IsUpperCase(startChar
)) {
2100 while (pos
> 0 && IsUpperCase(cb
.CharAt(pos
)))
2102 if (!IsUpperCase(cb
.CharAt(pos
)))
2104 } else if (IsADigit(startChar
)) {
2105 while (pos
> 0 && IsADigit(cb
.CharAt(pos
)))
2107 if (!IsADigit(cb
.CharAt(pos
)))
2109 } else if (IsPunctuation(startChar
)) {
2110 while (pos
> 0 && IsPunctuation(cb
.CharAt(pos
)))
2112 if (!IsPunctuation(cb
.CharAt(pos
)))
2114 } else if (isspacechar(startChar
)) {
2115 while (pos
> 0 && isspacechar(cb
.CharAt(pos
)))
2117 if (!isspacechar(cb
.CharAt(pos
)))
2119 } else if (!IsASCII(startChar
)) {
2120 while (pos
> 0 && !IsASCII(cb
.CharAt(pos
)))
2122 if (IsASCII(cb
.CharAt(pos
)))
2132 int Document::WordPartRight(int pos
) {
2133 char startChar
= cb
.CharAt(pos
);
2134 int length
= Length();
2135 if (IsWordPartSeparator(startChar
)) {
2136 while (pos
< length
&& IsWordPartSeparator(cb
.CharAt(pos
)))
2138 startChar
= cb
.CharAt(pos
);
2140 if (!IsASCII(startChar
)) {
2141 while (pos
< length
&& !IsASCII(cb
.CharAt(pos
)))
2143 } else if (IsLowerCase(startChar
)) {
2144 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2146 } else if (IsUpperCase(startChar
)) {
2147 if (IsLowerCase(cb
.CharAt(pos
+ 1))) {
2149 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2152 while (pos
< length
&& IsUpperCase(cb
.CharAt(pos
)))
2155 if (IsLowerCase(cb
.CharAt(pos
)) && IsUpperCase(cb
.CharAt(pos
- 1)))
2157 } else if (IsADigit(startChar
)) {
2158 while (pos
< length
&& IsADigit(cb
.CharAt(pos
)))
2160 } else if (IsPunctuation(startChar
)) {
2161 while (pos
< length
&& IsPunctuation(cb
.CharAt(pos
)))
2163 } else if (isspacechar(startChar
)) {
2164 while (pos
< length
&& isspacechar(cb
.CharAt(pos
)))
2172 static bool IsLineEndChar(char c
) {
2173 return (c
== '\n' || c
== '\r');
2176 int Document::ExtendStyleRange(int pos
, int delta
, bool singleLine
) {
2177 int sStart
= cb
.StyleAt(pos
);
2179 while (pos
> 0 && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2183 while (pos
< (Length()) && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2189 static char BraceOpposite(char ch
) {
2212 // TODO: should be able to extend styled region to find matching brace
2213 int Document::BraceMatch(int position
, int /*maxReStyle*/) {
2214 char chBrace
= CharAt(position
);
2215 char chSeek
= BraceOpposite(chBrace
);
2218 const int styBrace
= StyleIndexAt(position
);
2220 if (chBrace
== '(' || chBrace
== '[' || chBrace
== '{' || chBrace
== '<')
2223 position
= NextPosition(position
, direction
);
2224 while ((position
>= 0) && (position
< Length())) {
2225 char chAtPos
= CharAt(position
);
2226 const int styAtPos
= StyleIndexAt(position
);
2227 if ((position
> GetEndStyled()) || (styAtPos
== styBrace
)) {
2228 if (chAtPos
== chBrace
)
2230 if (chAtPos
== chSeek
)
2235 int positionBeforeMove
= position
;
2236 position
= NextPosition(position
, direction
);
2237 if (position
== positionBeforeMove
)
2244 * Implementation of RegexSearchBase for the default built-in regular expression engine
2246 class BuiltinRegex
: public RegexSearchBase
{
2248 explicit BuiltinRegex(CharClassify
*charClassTable
) : search(charClassTable
) {}
2250 virtual ~BuiltinRegex() {
2253 virtual long FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2254 bool caseSensitive
, bool word
, bool wordStart
, int flags
,
2257 virtual const char *SubstituteByPosition(Document
*doc
, const char *text
, int *length
);
2261 std::string substituted
;
2267 * RESearchRange keeps track of search range.
2269 class RESearchRange
{
2271 const Document
*doc
;
2278 RESearchRange(const Document
*doc_
, int minPos
, int maxPos
) : doc(doc_
) {
2279 increment
= (minPos
<= maxPos
) ? 1 : -1;
2281 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2282 startPos
= doc
->MovePositionOutsideChar(minPos
, 1, false);
2283 endPos
= doc
->MovePositionOutsideChar(maxPos
, 1, false);
2285 lineRangeStart
= doc
->LineFromPosition(startPos
);
2286 lineRangeEnd
= doc
->LineFromPosition(endPos
);
2287 if ((increment
== 1) &&
2288 (startPos
>= doc
->LineEnd(lineRangeStart
)) &&
2289 (lineRangeStart
< lineRangeEnd
)) {
2290 // the start position is at end of line or between line end characters.
2292 startPos
= doc
->LineStart(lineRangeStart
);
2293 } else if ((increment
== -1) &&
2294 (startPos
<= doc
->LineStart(lineRangeStart
)) &&
2295 (lineRangeStart
> lineRangeEnd
)) {
2296 // the start position is at beginning of line.
2298 startPos
= doc
->LineEnd(lineRangeStart
);
2300 lineRangeBreak
= lineRangeEnd
+ increment
;
2302 Range
LineRange(int line
) const {
2303 Range
range(doc
->LineStart(line
), doc
->LineEnd(line
));
2304 if (increment
== 1) {
2305 if (line
== lineRangeStart
)
2306 range
.start
= startPos
;
2307 if (line
== lineRangeEnd
)
2310 if (line
== lineRangeEnd
)
2311 range
.start
= endPos
;
2312 if (line
== lineRangeStart
)
2313 range
.end
= startPos
;
2319 // Define a way for the Regular Expression code to access the document
2320 class DocumentIndexer
: public CharacterIndexer
{
2324 DocumentIndexer(Document
*pdoc_
, int end_
) :
2325 pdoc(pdoc_
), end(end_
) {
2328 virtual ~DocumentIndexer() {
2331 virtual char CharAt(int index
) {
2332 if (index
< 0 || index
>= end
)
2335 return pdoc
->CharAt(index
);
2341 class ByteIterator
: public std::iterator
<std::bidirectional_iterator_tag
, char> {
2343 const Document
*doc
;
2345 ByteIterator(const Document
*doc_
= 0, Position position_
= 0) : doc(doc_
), position(position_
) {
2347 ByteIterator(const ByteIterator
&other
) {
2349 position
= other
.position
;
2351 ByteIterator
&operator=(const ByteIterator
&other
) {
2352 if (this != &other
) {
2354 position
= other
.position
;
2358 char operator*() const {
2359 return doc
->CharAt(position
);
2361 ByteIterator
&operator++() {
2365 ByteIterator
operator++(int) {
2366 ByteIterator
retVal(*this);
2370 ByteIterator
&operator--() {
2374 bool operator==(const ByteIterator
&other
) const {
2375 return doc
== other
.doc
&& position
== other
.position
;
2377 bool operator!=(const ByteIterator
&other
) const {
2378 return doc
!= other
.doc
|| position
!= other
.position
;
2383 int PosRoundUp() const {
2388 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2389 // Would be better to use sizeof(wchar_t) or similar to differentiate
2390 // but easier for now to hard-code platforms.
2391 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2392 // appear to allow specializing basic_regex over these.
2395 #define WCHAR_T_IS_16 1
2397 #define WCHAR_T_IS_16 0
2402 // On Windows, report non-BMP characters as 2 separate surrogates as that
2403 // matches wregex since it is based on wchar_t.
2404 class UTF8Iterator
: public std::iterator
<std::bidirectional_iterator_tag
, wchar_t> {
2405 // These 3 fields determine the iterator position and are used for comparisons
2406 const Document
*doc
;
2408 size_t characterIndex
;
2409 // Remaining fields are derived from the determining fields so are excluded in comparisons
2410 unsigned int lenBytes
;
2411 size_t lenCharacters
;
2412 wchar_t buffered
[2];
2414 UTF8Iterator(const Document
*doc_
= 0, Position position_
= 0) :
2415 doc(doc_
), position(position_
), characterIndex(0), lenBytes(0), lenCharacters(0) {
2422 UTF8Iterator(const UTF8Iterator
&other
) {
2424 position
= other
.position
;
2425 characterIndex
= other
.characterIndex
;
2426 lenBytes
= other
.lenBytes
;
2427 lenCharacters
= other
.lenCharacters
;
2428 buffered
[0] = other
.buffered
[0];
2429 buffered
[1] = other
.buffered
[1];
2431 UTF8Iterator
&operator=(const UTF8Iterator
&other
) {
2432 if (this != &other
) {
2434 position
= other
.position
;
2435 characterIndex
= other
.characterIndex
;
2436 lenBytes
= other
.lenBytes
;
2437 lenCharacters
= other
.lenCharacters
;
2438 buffered
[0] = other
.buffered
[0];
2439 buffered
[1] = other
.buffered
[1];
2443 wchar_t operator*() const {
2444 assert(lenCharacters
!= 0);
2445 return buffered
[characterIndex
];
2447 UTF8Iterator
&operator++() {
2448 if ((characterIndex
+ 1) < (lenCharacters
)) {
2451 position
+= lenBytes
;
2457 UTF8Iterator
operator++(int) {
2458 UTF8Iterator
retVal(*this);
2459 if ((characterIndex
+ 1) < (lenCharacters
)) {
2462 position
+= lenBytes
;
2468 UTF8Iterator
&operator--() {
2469 if (characterIndex
) {
2472 position
= doc
->NextPosition(position
, -1);
2474 characterIndex
= lenCharacters
- 1;
2478 bool operator==(const UTF8Iterator
&other
) const {
2479 // Only test the determining fields, not the character widths and values derived from this
2480 return doc
== other
.doc
&&
2481 position
== other
.position
&&
2482 characterIndex
== other
.characterIndex
;
2484 bool operator!=(const UTF8Iterator
&other
) const {
2485 // Only test the determining fields, not the character widths and values derived from this
2486 return doc
!= other
.doc
||
2487 position
!= other
.position
||
2488 characterIndex
!= other
.characterIndex
;
2493 int PosRoundUp() const {
2495 return position
+ lenBytes
; // Force to end of character
2500 void ReadCharacter() {
2501 Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2502 lenBytes
= charExtracted
.widthBytes
;
2503 if (charExtracted
.character
== unicodeReplacementChar
) {
2505 buffered
[0] = static_cast<wchar_t>(charExtracted
.character
);
2507 lenCharacters
= UTF16FromUTF32Character(charExtracted
.character
, buffered
);
2514 // On Unix, report non-BMP characters as single characters
2516 class UTF8Iterator
: public std::iterator
<std::bidirectional_iterator_tag
, wchar_t> {
2517 const Document
*doc
;
2520 UTF8Iterator(const Document
*doc_
=0, Position position_
=0) : doc(doc_
), position(position_
) {
2522 UTF8Iterator(const UTF8Iterator
&other
) {
2524 position
= other
.position
;
2526 UTF8Iterator
&operator=(const UTF8Iterator
&other
) {
2527 if (this != &other
) {
2529 position
= other
.position
;
2533 wchar_t operator*() const {
2534 Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2535 return charExtracted
.character
;
2537 UTF8Iterator
&operator++() {
2538 position
= doc
->NextPosition(position
, 1);
2541 UTF8Iterator
operator++(int) {
2542 UTF8Iterator
retVal(*this);
2543 position
= doc
->NextPosition(position
, 1);
2546 UTF8Iterator
&operator--() {
2547 position
= doc
->NextPosition(position
, -1);
2550 bool operator==(const UTF8Iterator
&other
) const {
2551 return doc
== other
.doc
&& position
== other
.position
;
2553 bool operator!=(const UTF8Iterator
&other
) const {
2554 return doc
!= other
.doc
|| position
!= other
.position
;
2559 int PosRoundUp() const {
2566 std::regex_constants::match_flag_type
MatchFlags(const Document
*doc
, int startPos
, int endPos
) {
2567 std::regex_constants::match_flag_type flagsMatch
= std::regex_constants::match_default
;
2568 if (!doc
->IsLineStartPosition(startPos
))
2569 flagsMatch
|= std::regex_constants::match_not_bol
;
2570 if (!doc
->IsLineEndPosition(endPos
))
2571 flagsMatch
|= std::regex_constants::match_not_eol
;
2575 template<typename Iterator
, typename Regex
>
2576 bool MatchOnLines(const Document
*doc
, const Regex
®exp
, const RESearchRange
&resr
, RESearch
&search
) {
2577 bool matched
= false;
2578 std::match_results
<Iterator
> match
;
2580 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2581 // If they didn't then the line by line iteration could be removed for the forwards
2582 // case and replaced with the following 4 lines:
2583 // Iterator uiStart(doc, startPos);
2584 // Iterator uiEnd(doc, endPos);
2585 // flagsMatch = MatchFlags(doc, startPos, endPos);
2586 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2589 for (int line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
2590 const Range lineRange
= resr
.LineRange(line
);
2591 Iterator
itStart(doc
, lineRange
.start
);
2592 Iterator
itEnd(doc
, lineRange
.end
);
2593 std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, lineRange
.start
, lineRange
.end
);
2594 matched
= std::regex_search(itStart
, itEnd
, match
, regexp
, flagsMatch
);
2595 // Check for the last match on this line.
2597 if (resr
.increment
== -1) {
2599 Iterator
itNext(doc
, match
[0].second
.PosRoundUp());
2600 flagsMatch
= MatchFlags(doc
, itNext
.Pos(), lineRange
.end
);
2601 std::match_results
<Iterator
> matchNext
;
2602 matched
= std::regex_search(itNext
, itEnd
, matchNext
, regexp
, flagsMatch
);
2604 if (match
[0].first
== match
[0].second
) {
2605 // Empty match means failure so exit
2617 for (size_t co
= 0; co
< match
.size(); co
++) {
2618 search
.bopat
[co
] = match
[co
].first
.Pos();
2619 search
.eopat
[co
] = match
[co
].second
.PosRoundUp();
2620 size_t lenMatch
= search
.eopat
[co
] - search
.bopat
[co
];
2621 search
.pat
[co
].resize(lenMatch
);
2622 for (size_t iPos
= 0; iPos
< lenMatch
; iPos
++) {
2623 search
.pat
[co
][iPos
] = doc
->CharAt(iPos
+ search
.bopat
[co
]);
2630 long Cxx11RegexFindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2631 bool caseSensitive
, int *length
, RESearch
&search
) {
2632 const RESearchRange
resr(doc
, minPos
, maxPos
);
2635 std::regex::flag_type flagsRe
= std::regex::ECMAScript
;
2636 // Flags that apper to have no effect:
2637 // | std::regex::collate | std::regex::extended;
2639 flagsRe
= flagsRe
| std::regex::icase
;
2641 // Clear the RESearch so can fill in matches
2644 bool matched
= false;
2645 if (SC_CP_UTF8
== doc
->dbcsCodePage
) {
2646 unsigned int lenS
= static_cast<unsigned int>(strlen(s
));
2647 std::vector
<wchar_t> ws(lenS
+ 1);
2649 size_t outLen
= UTF16FromUTF8(s
, lenS
, &ws
[0], lenS
);
2651 size_t outLen
= UTF32FromUTF8(s
, lenS
, reinterpret_cast<unsigned int *>(&ws
[0]), lenS
);
2655 #if defined(__APPLE__)
2656 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2657 // is one byte not one character.
2658 // However, on OS X this makes wregex act as Unicode
2659 std::locale
localeU("en_US.UTF-8");
2660 regexp
.imbue(localeU
);
2662 regexp
.assign(&ws
[0], flagsRe
);
2663 matched
= MatchOnLines
<UTF8Iterator
>(doc
, regexp
, resr
, search
);
2667 regexp
.assign(s
, flagsRe
);
2668 matched
= MatchOnLines
<ByteIterator
>(doc
, regexp
, resr
, search
);
2673 posMatch
= search
.bopat
[0];
2674 *length
= search
.eopat
[0] - search
.bopat
[0];
2676 // Example - search in doc/ScintillaHistory.html for
2677 // [[:upper:]]eta[[:space:]]
2678 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2679 //double durSearch = et.Duration(true);
2680 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2682 } catch (std::regex_error
&) {
2683 // Failed to create regular expression
2686 // Failed in some other way
2695 long BuiltinRegex::FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2696 bool caseSensitive
, bool, bool, int flags
,
2700 if (flags
& SCFIND_CXX11REGEX
) {
2701 return Cxx11RegexFindText(doc
, minPos
, maxPos
, s
,
2702 caseSensitive
, length
, search
);
2706 const RESearchRange
resr(doc
, minPos
, maxPos
);
2708 const bool posix
= (flags
& SCFIND_POSIX
) != 0;
2710 const char *errmsg
= search
.Compile(s
, *length
, caseSensitive
, posix
);
2714 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2715 // Replace first '.' with '-' in each property file variable reference:
2716 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2717 // Replace: $(\1-\2)
2720 const char searchEnd
= s
[*length
- 1];
2721 const char searchEndPrev
= (*length
> 1) ? s
[*length
- 2] : '\0';
2722 for (int line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
2723 int startOfLine
= doc
->LineStart(line
);
2724 int endOfLine
= doc
->LineEnd(line
);
2725 if (resr
.increment
== 1) {
2726 if (line
== resr
.lineRangeStart
) {
2727 if ((resr
.startPos
!= startOfLine
) && (s
[0] == '^'))
2728 continue; // Can't match start of line if start position after start of line
2729 startOfLine
= resr
.startPos
;
2731 if (line
== resr
.lineRangeEnd
) {
2732 if ((resr
.endPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2733 continue; // Can't match end of line if end position before end of line
2734 endOfLine
= resr
.endPos
;
2737 if (line
== resr
.lineRangeEnd
) {
2738 if ((resr
.endPos
!= startOfLine
) && (s
[0] == '^'))
2739 continue; // Can't match start of line if end position after start of line
2740 startOfLine
= resr
.endPos
;
2742 if (line
== resr
.lineRangeStart
) {
2743 if ((resr
.startPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2744 continue; // Can't match end of line if start position before end of line
2745 endOfLine
= resr
.startPos
;
2749 DocumentIndexer
di(doc
, endOfLine
);
2750 int success
= search
.Execute(di
, startOfLine
, endOfLine
);
2752 pos
= search
.bopat
[0];
2753 // Ensure only whole characters selected
2754 search
.eopat
[0] = doc
->MovePositionOutsideChar(search
.eopat
[0], 1, false);
2755 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2756 // There can be only one start of a line, so no need to look for last match in line
2757 if ((resr
.increment
== -1) && (s
[0] != '^')) {
2758 // Check for the last match on this line.
2759 int repetitions
= 1000; // Break out of infinite loop
2760 while (success
&& (search
.eopat
[0] <= endOfLine
) && (repetitions
--)) {
2761 success
= search
.Execute(di
, pos
+1, endOfLine
);
2763 if (search
.eopat
[0] <= minPos
) {
2764 pos
= search
.bopat
[0];
2765 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2779 const char *BuiltinRegex::SubstituteByPosition(Document
*doc
, const char *text
, int *length
) {
2780 substituted
.clear();
2781 DocumentIndexer
di(doc
, doc
->Length());
2782 search
.GrabMatches(di
);
2783 for (int j
= 0; j
< *length
; j
++) {
2784 if (text
[j
] == '\\') {
2785 if (text
[j
+ 1] >= '0' && text
[j
+ 1] <= '9') {
2786 unsigned int patNum
= text
[j
+ 1] - '0';
2787 unsigned int len
= search
.eopat
[patNum
] - search
.bopat
[patNum
];
2788 if (!search
.pat
[patNum
].empty()) // Will be null if try for a match that did not occur
2789 substituted
.append(search
.pat
[patNum
].c_str(), len
);
2795 substituted
.push_back('\a');
2798 substituted
.push_back('\b');
2801 substituted
.push_back('\f');
2804 substituted
.push_back('\n');
2807 substituted
.push_back('\r');
2810 substituted
.push_back('\t');
2813 substituted
.push_back('\v');
2816 substituted
.push_back('\\');
2819 substituted
.push_back('\\');
2824 substituted
.push_back(text
[j
]);
2827 *length
= static_cast<int>(substituted
.length());
2828 return substituted
.c_str();
2831 #ifndef SCI_OWNREGEX
2833 #ifdef SCI_NAMESPACE
2835 RegexSearchBase
*Scintilla::CreateRegexSearch(CharClassify
*charClassTable
) {
2836 return new BuiltinRegex(charClassTable
);
2841 RegexSearchBase
*CreateRegexSearch(CharClassify
*charClassTable
) {
2842 return new BuiltinRegex(charClassTable
);