1 // Scintilla source code edit control
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "CellBuffer.h"
34 #include "CharClassify.h"
35 #include "Decoration.h"
36 #include "CaseFolder.h"
39 #include "UniConversion.h"
42 using namespace Scintilla
;
45 static inline bool IsPunctuation(char ch
) {
46 return IsASCII(ch
) && ispunct(ch
);
49 void LexInterface::Colourise(int start
, int end
) {
50 if (pdoc
&& instance
&& !performingStyle
) {
51 // Protect against reentrance, which may occur, for example, when
52 // fold points are discovered while performing styling and the folding
53 // code looks for child lines which may trigger styling.
54 performingStyle
= true;
56 int lengthDoc
= pdoc
->Length();
59 int len
= end
- start
;
61 PLATFORM_ASSERT(len
>= 0);
62 PLATFORM_ASSERT(start
+ len
<= lengthDoc
);
66 styleStart
= pdoc
->StyleAt(start
- 1);
69 instance
->Lex(start
, len
, styleStart
, pdoc
);
70 instance
->Fold(start
, len
, styleStart
, pdoc
);
73 performingStyle
= false;
77 int LexInterface::LineEndTypesSupported() {
79 int interfaceVersion
= instance
->Version();
80 if (interfaceVersion
>= lvSubStyles
) {
81 ILexerWithSubStyles
*ssinstance
= static_cast<ILexerWithSubStyles
*>(instance
);
82 return ssinstance
->LineEndTypesSupported();
88 Document::Document() {
92 eolMode
= SC_EOL_CRLF
;
97 lineEndBitSet
= SC_LINE_END_TYPE_DEFAULT
;
100 enteredModification
= 0;
102 enteredReadOnlyCount
= 0;
103 insertionSet
= false;
106 actualIndentInChars
= 8;
109 backspaceUnindents
= false;
111 matchesValid
= false;
114 UTF8BytesOfLeadInitialise();
116 perLineData
[ldMarkers
] = new LineMarkers();
117 perLineData
[ldLevels
] = new LineLevels();
118 perLineData
[ldState
] = new LineState();
119 perLineData
[ldMargin
] = new LineAnnotation();
120 perLineData
[ldAnnotation
] = new LineAnnotation();
127 Document::~Document() {
128 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
129 it
->watcher
->NotifyDeleted(this, it
->userData
);
131 for (int j
=0; j
<ldSize
; j
++) {
132 delete perLineData
[j
];
143 void Document::Init() {
144 for (int j
=0; j
<ldSize
; j
++) {
146 perLineData
[j
]->Init();
150 int Document::LineEndTypesSupported() const {
151 if ((SC_CP_UTF8
== dbcsCodePage
) && pli
)
152 return pli
->LineEndTypesSupported();
157 bool Document::SetDBCSCodePage(int dbcsCodePage_
) {
158 if (dbcsCodePage
!= dbcsCodePage_
) {
159 dbcsCodePage
= dbcsCodePage_
;
161 cb
.SetLineEndTypes(lineEndBitSet
& LineEndTypesSupported());
168 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_
) {
169 if (lineEndBitSet
!= lineEndBitSet_
) {
170 lineEndBitSet
= lineEndBitSet_
;
171 int lineEndBitSetActive
= lineEndBitSet
& LineEndTypesSupported();
172 if (lineEndBitSetActive
!= cb
.GetLineEndTypes()) {
174 cb
.SetLineEndTypes(lineEndBitSetActive
);
184 void Document::InsertLine(int line
) {
185 for (int j
=0; j
<ldSize
; j
++) {
187 perLineData
[j
]->InsertLine(line
);
191 void Document::RemoveLine(int line
) {
192 for (int j
=0; j
<ldSize
; j
++) {
194 perLineData
[j
]->RemoveLine(line
);
198 // Increase reference count and return its previous value.
199 int Document::AddRef() {
203 // Decrease reference count and return its previous value.
204 // Delete the document if reference count reaches zero.
205 int SCI_METHOD
Document::Release() {
206 int curRefCount
= --refCount
;
207 if (curRefCount
== 0)
212 void Document::SetSavePoint() {
214 NotifySavePoint(true);
217 void Document::TentativeUndo() {
219 if (enteredModification
== 0) {
220 enteredModification
++;
221 if (!cb
.IsReadOnly()) {
222 bool startSavePoint
= cb
.IsSavePoint();
223 bool multiLine
= false;
224 int steps
= cb
.TentativeSteps();
225 //Platform::DebugPrintf("Steps=%d\n", steps);
226 for (int step
= 0; step
< steps
; step
++) {
227 const int prevLinesTotal
= LinesTotal();
228 const Action
&action
= cb
.GetUndoStep();
229 if (action
.at
== removeAction
) {
230 NotifyModified(DocModification(
231 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
232 } else if (action
.at
== containerAction
) {
233 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
234 dm
.token
= action
.position
;
237 NotifyModified(DocModification(
238 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
240 cb
.PerformUndoStep();
241 if (action
.at
!= containerAction
) {
242 ModifiedAt(action
.position
);
245 int modFlags
= SC_PERFORMED_UNDO
;
246 // With undo, an insertion action becomes a deletion notification
247 if (action
.at
== removeAction
) {
248 modFlags
|= SC_MOD_INSERTTEXT
;
249 } else if (action
.at
== insertAction
) {
250 modFlags
|= SC_MOD_DELETETEXT
;
253 modFlags
|= SC_MULTISTEPUNDOREDO
;
254 const int linesAdded
= LinesTotal() - prevLinesTotal
;
257 if (step
== steps
- 1) {
258 modFlags
|= SC_LASTSTEPINUNDOREDO
;
260 modFlags
|= SC_MULTILINEUNDOREDO
;
262 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
263 linesAdded
, action
.data
));
266 bool endSavePoint
= cb
.IsSavePoint();
267 if (startSavePoint
!= endSavePoint
)
268 NotifySavePoint(endSavePoint
);
270 cb
.TentativeCommit();
272 enteredModification
--;
276 int Document::GetMark(int line
) {
277 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkValue(line
);
280 int Document::MarkerNext(int lineStart
, int mask
) const {
281 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkerNext(lineStart
, mask
);
284 int Document::AddMark(int line
, int markerNum
) {
285 if (line
>= 0 && line
<= LinesTotal()) {
286 int prev
= static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
287 AddMark(line
, markerNum
, LinesTotal());
288 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
296 void Document::AddMarkSet(int line
, int valueSet
) {
297 if (line
< 0 || line
> LinesTotal()) {
300 unsigned int m
= valueSet
;
301 for (int i
= 0; m
; i
++, m
>>= 1)
303 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
304 AddMark(line
, i
, LinesTotal());
305 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
309 void Document::DeleteMark(int line
, int markerNum
) {
310 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, false);
311 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
315 void Document::DeleteMarkFromHandle(int markerHandle
) {
316 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMarkFromHandle(markerHandle
);
317 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
322 void Document::DeleteAllMarks(int markerNum
) {
323 bool someChanges
= false;
324 for (int line
= 0; line
< LinesTotal(); line
++) {
325 if (static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, true))
329 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
335 int Document::LineFromHandle(int markerHandle
) {
336 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->LineFromHandle(markerHandle
);
339 int SCI_METHOD
Document::LineStart(int line
) const {
340 return cb
.LineStart(line
);
343 bool Document::IsLineStartPosition(int position
) const {
344 return LineStart(LineFromPosition(position
)) == position
;
347 int SCI_METHOD
Document::LineEnd(int line
) const {
348 if (line
>= LinesTotal() - 1) {
349 return LineStart(line
+ 1);
351 int position
= LineStart(line
+ 1);
352 if (SC_CP_UTF8
== dbcsCodePage
) {
353 unsigned char bytes
[] = {
354 static_cast<unsigned char>(cb
.CharAt(position
-3)),
355 static_cast<unsigned char>(cb
.CharAt(position
-2)),
356 static_cast<unsigned char>(cb
.CharAt(position
-1)),
358 if (UTF8IsSeparator(bytes
)) {
359 return position
- UTF8SeparatorLength
;
361 if (UTF8IsNEL(bytes
+1)) {
362 return position
- UTF8NELLength
;
365 position
--; // Back over CR or LF
366 // When line terminator is CR+LF, may need to go back one more
367 if ((position
> LineStart(line
)) && (cb
.CharAt(position
- 1) == '\r')) {
374 void SCI_METHOD
Document::SetErrorStatus(int status
) {
375 // Tell the watchers an error has occurred.
376 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
377 it
->watcher
->NotifyErrorOccurred(this, it
->userData
, status
);
381 int SCI_METHOD
Document::LineFromPosition(int pos
) const {
382 return cb
.LineFromPosition(pos
);
385 int Document::LineEndPosition(int position
) const {
386 return LineEnd(LineFromPosition(position
));
389 bool Document::IsLineEndPosition(int position
) const {
390 return LineEnd(LineFromPosition(position
)) == position
;
393 bool Document::IsPositionInLineEnd(int position
) const {
394 return position
>= LineEnd(LineFromPosition(position
));
397 int Document::VCHomePosition(int position
) const {
398 int line
= LineFromPosition(position
);
399 int startPosition
= LineStart(line
);
400 int endLine
= LineEnd(line
);
401 int startText
= startPosition
;
402 while (startText
< endLine
&& (cb
.CharAt(startText
) == ' ' || cb
.CharAt(startText
) == '\t'))
404 if (position
== startText
)
405 return startPosition
;
410 int SCI_METHOD
Document::SetLevel(int line
, int level
) {
411 int prev
= static_cast<LineLevels
*>(perLineData
[ldLevels
])->SetLevel(line
, level
, LinesTotal());
413 DocModification
mh(SC_MOD_CHANGEFOLD
| SC_MOD_CHANGEMARKER
,
414 LineStart(line
), 0, 0, 0, line
);
415 mh
.foldLevelNow
= level
;
416 mh
.foldLevelPrev
= prev
;
422 int SCI_METHOD
Document::GetLevel(int line
) const {
423 return static_cast<LineLevels
*>(perLineData
[ldLevels
])->GetLevel(line
);
426 void Document::ClearLevels() {
427 static_cast<LineLevels
*>(perLineData
[ldLevels
])->ClearLevels();
430 static bool IsSubordinate(int levelStart
, int levelTry
) {
431 if (levelTry
& SC_FOLDLEVELWHITEFLAG
)
434 return (levelStart
& SC_FOLDLEVELNUMBERMASK
) < (levelTry
& SC_FOLDLEVELNUMBERMASK
);
437 int Document::GetLastChild(int lineParent
, int level
, int lastLine
) {
439 level
= GetLevel(lineParent
) & SC_FOLDLEVELNUMBERMASK
;
440 int maxLine
= LinesTotal();
441 int lookLastLine
= (lastLine
!= -1) ? Platform::Minimum(LinesTotal() - 1, lastLine
) : -1;
442 int lineMaxSubord
= lineParent
;
443 while (lineMaxSubord
< maxLine
- 1) {
444 EnsureStyledTo(LineStart(lineMaxSubord
+ 2));
445 if (!IsSubordinate(level
, GetLevel(lineMaxSubord
+ 1)))
447 if ((lookLastLine
!= -1) && (lineMaxSubord
>= lookLastLine
) && !(GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
))
451 if (lineMaxSubord
> lineParent
) {
452 if (level
> (GetLevel(lineMaxSubord
+ 1) & SC_FOLDLEVELNUMBERMASK
)) {
453 // Have chewed up some whitespace that belongs to a parent so seek back
454 if (GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
) {
459 return lineMaxSubord
;
462 int Document::GetFoldParent(int line
) const {
463 int level
= GetLevel(line
) & SC_FOLDLEVELNUMBERMASK
;
464 int lineLook
= line
- 1;
465 while ((lineLook
> 0) && (
466 (!(GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
)) ||
467 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) >= level
))
471 if ((GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
) &&
472 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) < level
)) {
479 void Document::GetHighlightDelimiters(HighlightDelimiter
&highlightDelimiter
, int line
, int lastLine
) {
480 int level
= GetLevel(line
);
481 int lookLastLine
= Platform::Maximum(line
, lastLine
) + 1;
484 int lookLineLevel
= level
;
485 int lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
486 while ((lookLine
> 0) && ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) ||
487 ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
>= (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))))) {
488 lookLineLevel
= GetLevel(--lookLine
);
489 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
492 int beginFoldBlock
= (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) ? lookLine
: GetFoldParent(lookLine
);
493 if (beginFoldBlock
== -1) {
494 highlightDelimiter
.Clear();
498 int endFoldBlock
= GetLastChild(beginFoldBlock
, -1, lookLastLine
);
499 int firstChangeableLineBefore
= -1;
500 if (endFoldBlock
< line
) {
501 lookLine
= beginFoldBlock
- 1;
502 lookLineLevel
= GetLevel(lookLine
);
503 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
504 while ((lookLine
>= 0) && (lookLineLevelNum
>= SC_FOLDLEVELBASE
)) {
505 if (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) {
506 if (GetLastChild(lookLine
, -1, lookLastLine
) == line
) {
507 beginFoldBlock
= lookLine
;
509 firstChangeableLineBefore
= line
- 1;
512 if ((lookLine
> 0) && (lookLineLevelNum
== SC_FOLDLEVELBASE
) && ((GetLevel(lookLine
- 1) & SC_FOLDLEVELNUMBERMASK
) > lookLineLevelNum
))
514 lookLineLevel
= GetLevel(--lookLine
);
515 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
518 if (firstChangeableLineBefore
== -1) {
519 for (lookLine
= line
- 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
520 lookLine
>= beginFoldBlock
;
521 lookLineLevel
= GetLevel(--lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
522 if ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) || (lookLineLevelNum
> (level
& SC_FOLDLEVELNUMBERMASK
))) {
523 firstChangeableLineBefore
= lookLine
;
528 if (firstChangeableLineBefore
== -1)
529 firstChangeableLineBefore
= beginFoldBlock
- 1;
531 int firstChangeableLineAfter
= -1;
532 for (lookLine
= line
+ 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
533 lookLine
<= endFoldBlock
;
534 lookLineLevel
= GetLevel(++lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
535 if ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
< (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))) {
536 firstChangeableLineAfter
= lookLine
;
540 if (firstChangeableLineAfter
== -1)
541 firstChangeableLineAfter
= endFoldBlock
+ 1;
543 highlightDelimiter
.beginFoldBlock
= beginFoldBlock
;
544 highlightDelimiter
.endFoldBlock
= endFoldBlock
;
545 highlightDelimiter
.firstChangeableLineBefore
= firstChangeableLineBefore
;
546 highlightDelimiter
.firstChangeableLineAfter
= firstChangeableLineAfter
;
549 int Document::ClampPositionIntoDocument(int pos
) const {
550 return Platform::Clamp(pos
, 0, Length());
553 bool Document::IsCrLf(int pos
) const {
556 if (pos
>= (Length() - 1))
558 return (cb
.CharAt(pos
) == '\r') && (cb
.CharAt(pos
+ 1) == '\n');
561 int Document::LenChar(int pos
) {
564 } else if (IsCrLf(pos
)) {
566 } else if (SC_CP_UTF8
== dbcsCodePage
) {
567 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
568 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
569 int lengthDoc
= Length();
570 if ((pos
+ widthCharBytes
) > lengthDoc
)
571 return lengthDoc
- pos
;
573 return widthCharBytes
;
574 } else if (dbcsCodePage
) {
575 return IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
581 bool Document::InGoodUTF8(int pos
, int &start
, int &end
) const {
583 while ((trail
>0) && (pos
-trail
< UTF8MaxBytes
) && UTF8IsTrailByte(static_cast<unsigned char>(cb
.CharAt(trail
-1))))
585 start
= (trail
> 0) ? trail
-1 : trail
;
587 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(start
));
588 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
589 if (widthCharBytes
== 1) {
592 int trailBytes
= widthCharBytes
- 1;
593 int len
= pos
- start
;
594 if (len
> trailBytes
)
595 // pos too far from lead
597 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
598 for (int b
=1; b
<widthCharBytes
&& ((start
+b
) < Length()); b
++)
599 charBytes
[b
] = cb
.CharAt(static_cast<int>(start
+b
));
600 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
601 if (utf8status
& UTF8MaskInvalid
)
603 end
= start
+ widthCharBytes
;
608 // Normalise a position so that it is not halfway through a two byte character.
609 // This can occur in two situations -
610 // When lines are terminated with \r\n pairs which should be treated as one character.
611 // When displaying DBCS text such as Japanese.
612 // If moving, move the position in the indicated direction.
613 int Document::MovePositionOutsideChar(int pos
, int moveDir
, bool checkLineEnd
) const {
614 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
615 // If out of range, just return minimum/maximum value.
621 // PLATFORM_ASSERT(pos > 0 && pos < Length());
622 if (checkLineEnd
&& IsCrLf(pos
- 1)) {
630 if (SC_CP_UTF8
== dbcsCodePage
) {
631 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
632 // If ch is not a trail byte then pos is valid intercharacter position
633 if (UTF8IsTrailByte(ch
)) {
636 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
637 // ch is a trail byte within a UTF-8 character
643 // Else invalid UTF-8 so return position of isolated trail byte
646 // Anchor DBCS calculations at start of line because start of line can
647 // not be a DBCS trail byte.
648 int posStartLine
= LineStart(LineFromPosition(pos
));
649 if (pos
== posStartLine
)
652 // Step back until a non-lead-byte is found.
654 while ((posCheck
> posStartLine
) && IsDBCSLeadByte(cb
.CharAt(posCheck
-1)))
657 // Check from known start of character.
658 while (posCheck
< pos
) {
659 int mbsize
= IsDBCSLeadByte(cb
.CharAt(posCheck
)) ? 2 : 1;
660 if (posCheck
+ mbsize
== pos
) {
662 } else if (posCheck
+ mbsize
> pos
) {
664 return posCheck
+ mbsize
;
677 // NextPosition moves between valid positions - it can not handle a position in the middle of a
678 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
679 // A \r\n pair is treated as two characters.
680 int Document::NextPosition(int pos
, int moveDir
) const {
681 // If out of range, just return minimum/maximum value.
682 int increment
= (moveDir
> 0) ? 1 : -1;
683 if (pos
+ increment
<= 0)
685 if (pos
+ increment
>= Length())
689 if (SC_CP_UTF8
== dbcsCodePage
) {
690 if (increment
== 1) {
691 // Simple forward movement case so can avoid some checks
692 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
693 if (UTF8IsAscii(leadByte
)) {
694 // Single byte character or invalid
697 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
698 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
699 for (int b
=1; b
<widthCharBytes
; b
++)
700 charBytes
[b
] = cb
.CharAt(static_cast<int>(pos
+b
));
701 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
702 if (utf8status
& UTF8MaskInvalid
)
705 pos
+= utf8status
& UTF8MaskWidth
;
708 // Examine byte before position
710 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
711 // If ch is not a trail byte then pos is valid intercharacter position
712 if (UTF8IsTrailByte(ch
)) {
713 // If ch is a trail byte in a valid UTF-8 character then return start of character
716 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
719 // Else invalid UTF-8 so return position of isolated trail byte
724 int mbsize
= IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
729 // Anchor DBCS calculations at start of line because start of line can
730 // not be a DBCS trail byte.
731 int posStartLine
= LineStart(LineFromPosition(pos
));
732 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
733 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
734 if ((pos
- 1) <= posStartLine
) {
736 } else if (IsDBCSLeadByte(cb
.CharAt(pos
- 1))) {
737 // Must actually be trail byte
740 // Otherwise, step back until a non-lead-byte is found.
741 int posTemp
= pos
- 1;
742 while (posStartLine
<= --posTemp
&& IsDBCSLeadByte(cb
.CharAt(posTemp
)))
744 // Now posTemp+1 must point to the beginning of a character,
745 // so figure out whether we went back an even or an odd
746 // number of bytes and go back 1 or 2 bytes, respectively.
747 return (pos
- 1 - ((pos
- posTemp
) & 1));
758 bool Document::NextCharacter(int &pos
, int moveDir
) const {
759 // Returns true if pos changed
760 int posNext
= NextPosition(pos
, moveDir
);
761 if (posNext
== pos
) {
769 static inline int UnicodeFromBytes(const unsigned char *us
) {
772 } else if (us
[0] < 0xE0) {
773 return ((us
[0] & 0x1F) << 6) + (us
[1] & 0x3F);
774 } else if (us
[0] < 0xF0) {
775 return ((us
[0] & 0xF) << 12) + ((us
[1] & 0x3F) << 6) + (us
[2] & 0x3F);
776 } else if (us
[0] < 0xF5) {
777 return ((us
[0] & 0x7) << 18) + ((us
[1] & 0x3F) << 12) + ((us
[2] & 0x3F) << 6) + (us
[3] & 0x3F);
782 // Return -1 on out-of-bounds
783 int SCI_METHOD
Document::GetRelativePosition(int positionStart
, int characterOffset
) const {
784 int pos
= positionStart
;
786 const int increment
= (characterOffset
> 0) ? 1 : -1;
787 while (characterOffset
!= 0) {
788 const int posNext
= NextPosition(pos
, increment
);
790 return INVALID_POSITION
;
792 characterOffset
-= increment
;
795 pos
= positionStart
+ characterOffset
;
796 if ((pos
< 0) || (pos
> Length()))
797 return INVALID_POSITION
;
802 int SCI_METHOD
Document::GetCharacterAndWidth(int position
, int *pWidth
) const {
804 int bytesInCharacter
= 1;
806 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(position
));
807 if (SC_CP_UTF8
== dbcsCodePage
) {
808 if (UTF8IsAscii(leadByte
)) {
809 // Single byte character or invalid
810 character
= leadByte
;
812 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
813 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
814 for (int b
=1; b
<widthCharBytes
; b
++)
815 charBytes
[b
] = static_cast<unsigned char>(cb
.CharAt(position
+b
));
816 int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
817 if (utf8status
& UTF8MaskInvalid
) {
818 // Report as singleton surrogate values which are invalid Unicode
819 character
= 0xDC80 + leadByte
;
821 bytesInCharacter
= utf8status
& UTF8MaskWidth
;
822 character
= UnicodeFromBytes(charBytes
);
826 if (IsDBCSLeadByte(leadByte
)) {
827 bytesInCharacter
= 2;
828 character
= (leadByte
<< 8) | static_cast<unsigned char>(cb
.CharAt(position
+1));
830 character
= leadByte
;
834 character
= cb
.CharAt(position
);
837 *pWidth
= bytesInCharacter
;
842 int SCI_METHOD
Document::CodePage() const {
846 bool SCI_METHOD
Document::IsDBCSLeadByte(char ch
) const {
847 // Byte ranges found in Wikipedia articles with relevant search strings in each case
848 unsigned char uch
= static_cast<unsigned char>(ch
);
849 switch (dbcsCodePage
) {
852 return ((uch
>= 0x81) && (uch
<= 0x9F)) ||
853 ((uch
>= 0xE0) && (uch
<= 0xFC));
854 // Lead bytes F0 to FC may be a Microsoft addition.
857 return (uch
>= 0x81) && (uch
<= 0xFE);
859 // Korean Wansung KS C-5601-1987
860 return (uch
>= 0x81) && (uch
<= 0xFE);
863 return (uch
>= 0x81) && (uch
<= 0xFE);
865 // Korean Johab KS C-5601-1992
867 ((uch
>= 0x84) && (uch
<= 0xD3)) ||
868 ((uch
>= 0xD8) && (uch
<= 0xDE)) ||
869 ((uch
>= 0xE0) && (uch
<= 0xF9));
874 static inline bool IsSpaceOrTab(int ch
) {
875 return ch
== ' ' || ch
== '\t';
878 // Need to break text into segments near lengthSegment but taking into
879 // account the encoding to not break inside a UTF-8 or DBCS character
880 // and also trying to avoid breaking inside a pair of combining characters.
881 // The segment length must always be long enough (more than 4 bytes)
882 // so that there will be at least one whole character to make a segment.
883 // For UTF-8, text must consist only of valid whole characters.
884 // In preference order from best to worst:
885 // 1) Break after space
886 // 2) Break before punctuation
887 // 3) Break after whole character
889 int Document::SafeSegment(const char *text
, int length
, int lengthSegment
) const {
890 if (length
<= lengthSegment
)
892 int lastSpaceBreak
= -1;
893 int lastPunctuationBreak
= -1;
894 int lastEncodingAllowedBreak
= 0;
895 for (int j
=0; j
< lengthSegment
;) {
896 unsigned char ch
= static_cast<unsigned char>(text
[j
]);
898 if (IsSpaceOrTab(text
[j
- 1]) && !IsSpaceOrTab(text
[j
])) {
902 lastPunctuationBreak
= j
;
905 lastEncodingAllowedBreak
= j
;
907 if (dbcsCodePage
== SC_CP_UTF8
) {
908 j
+= UTF8BytesOfLead
[ch
];
909 } else if (dbcsCodePage
) {
910 j
+= IsDBCSLeadByte(ch
) ? 2 : 1;
915 if (lastSpaceBreak
>= 0) {
916 return lastSpaceBreak
;
917 } else if (lastPunctuationBreak
>= 0) {
918 return lastPunctuationBreak
;
920 return lastEncodingAllowedBreak
;
923 EncodingFamily
Document::CodePageFamily() const {
924 if (SC_CP_UTF8
== dbcsCodePage
)
926 else if (dbcsCodePage
)
932 void Document::ModifiedAt(int pos
) {
937 void Document::CheckReadOnly() {
938 if (cb
.IsReadOnly() && enteredReadOnlyCount
== 0) {
939 enteredReadOnlyCount
++;
940 NotifyModifyAttempt();
941 enteredReadOnlyCount
--;
945 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
946 // SetStyleAt does not change the persistent state of a document
948 bool Document::DeleteChars(int pos
, int len
) {
953 if ((pos
+ len
) > Length())
956 if (enteredModification
!= 0) {
959 enteredModification
++;
960 if (!cb
.IsReadOnly()) {
963 SC_MOD_BEFOREDELETE
| SC_PERFORMED_USER
,
966 int prevLinesTotal
= LinesTotal();
967 bool startSavePoint
= cb
.IsSavePoint();
968 bool startSequence
= false;
969 const char *text
= cb
.DeleteChars(pos
, len
, startSequence
);
970 if (startSavePoint
&& cb
.IsCollectingUndo())
971 NotifySavePoint(!startSavePoint
);
972 if ((pos
< Length()) || (pos
== 0))
978 SC_MOD_DELETETEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
980 LinesTotal() - prevLinesTotal
, text
));
982 enteredModification
--;
984 return !cb
.IsReadOnly();
988 * Insert a string with a length.
990 int Document::InsertString(int position
, const char *s
, int insertLength
) {
991 if (insertLength
<= 0) {
994 CheckReadOnly(); // Application may change read only state here
995 if (cb
.IsReadOnly()) {
998 if (enteredModification
!= 0) {
1001 enteredModification
++;
1002 insertionSet
= false;
1007 position
, insertLength
,
1010 s
= insertion
.c_str();
1011 insertLength
= static_cast<int>(insertion
.length());
1015 SC_MOD_BEFOREINSERT
| SC_PERFORMED_USER
,
1016 position
, insertLength
,
1018 int prevLinesTotal
= LinesTotal();
1019 bool startSavePoint
= cb
.IsSavePoint();
1020 bool startSequence
= false;
1021 const char *text
= cb
.InsertString(position
, s
, insertLength
, startSequence
);
1022 if (startSavePoint
&& cb
.IsCollectingUndo())
1023 NotifySavePoint(!startSavePoint
);
1024 ModifiedAt(position
);
1027 SC_MOD_INSERTTEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
1028 position
, insertLength
,
1029 LinesTotal() - prevLinesTotal
, text
));
1030 if (insertionSet
) { // Free memory as could be large
1031 std::string().swap(insertion
);
1033 enteredModification
--;
1034 return insertLength
;
1037 void Document::ChangeInsertion(const char *s
, int length
) {
1038 insertionSet
= true;
1039 insertion
.assign(s
, length
);
1042 int SCI_METHOD
Document::AddData(char *data
, int length
) {
1044 int position
= Length();
1045 InsertString(position
, data
, length
);
1046 } catch (std::bad_alloc
&) {
1047 return SC_STATUS_BADALLOC
;
1049 return SC_STATUS_FAILURE
;
1054 void * SCI_METHOD
Document::ConvertToDocument() {
1058 int Document::Undo() {
1061 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1062 enteredModification
++;
1063 if (!cb
.IsReadOnly()) {
1064 bool startSavePoint
= cb
.IsSavePoint();
1065 bool multiLine
= false;
1066 int steps
= cb
.StartUndo();
1067 //Platform::DebugPrintf("Steps=%d\n", steps);
1068 int coalescedRemovePos
= -1;
1069 int coalescedRemoveLen
= 0;
1070 int prevRemoveActionPos
= -1;
1071 int prevRemoveActionLen
= 0;
1072 for (int step
= 0; step
< steps
; step
++) {
1073 const int prevLinesTotal
= LinesTotal();
1074 const Action
&action
= cb
.GetUndoStep();
1075 if (action
.at
== removeAction
) {
1076 NotifyModified(DocModification(
1077 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
1078 } else if (action
.at
== containerAction
) {
1079 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
1080 dm
.token
= action
.position
;
1082 if (!action
.mayCoalesce
) {
1083 coalescedRemovePos
= -1;
1084 coalescedRemoveLen
= 0;
1085 prevRemoveActionPos
= -1;
1086 prevRemoveActionLen
= 0;
1089 NotifyModified(DocModification(
1090 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
1092 cb
.PerformUndoStep();
1093 if (action
.at
!= containerAction
) {
1094 ModifiedAt(action
.position
);
1095 newPos
= action
.position
;
1098 int modFlags
= SC_PERFORMED_UNDO
;
1099 // With undo, an insertion action becomes a deletion notification
1100 if (action
.at
== removeAction
) {
1101 newPos
+= action
.lenData
;
1102 modFlags
|= SC_MOD_INSERTTEXT
;
1103 if ((coalescedRemoveLen
> 0) &&
1104 (action
.position
== prevRemoveActionPos
|| action
.position
== (prevRemoveActionPos
+ prevRemoveActionLen
))) {
1105 coalescedRemoveLen
+= action
.lenData
;
1106 newPos
= coalescedRemovePos
+ coalescedRemoveLen
;
1108 coalescedRemovePos
= action
.position
;
1109 coalescedRemoveLen
= action
.lenData
;
1111 prevRemoveActionPos
= action
.position
;
1112 prevRemoveActionLen
= action
.lenData
;
1113 } else if (action
.at
== insertAction
) {
1114 modFlags
|= SC_MOD_DELETETEXT
;
1115 coalescedRemovePos
= -1;
1116 coalescedRemoveLen
= 0;
1117 prevRemoveActionPos
= -1;
1118 prevRemoveActionLen
= 0;
1121 modFlags
|= SC_MULTISTEPUNDOREDO
;
1122 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1123 if (linesAdded
!= 0)
1125 if (step
== steps
- 1) {
1126 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1128 modFlags
|= SC_MULTILINEUNDOREDO
;
1130 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
1131 linesAdded
, action
.data
));
1134 bool endSavePoint
= cb
.IsSavePoint();
1135 if (startSavePoint
!= endSavePoint
)
1136 NotifySavePoint(endSavePoint
);
1138 enteredModification
--;
1143 int Document::Redo() {
1146 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1147 enteredModification
++;
1148 if (!cb
.IsReadOnly()) {
1149 bool startSavePoint
= cb
.IsSavePoint();
1150 bool multiLine
= false;
1151 int steps
= cb
.StartRedo();
1152 for (int step
= 0; step
< steps
; step
++) {
1153 const int prevLinesTotal
= LinesTotal();
1154 const Action
&action
= cb
.GetRedoStep();
1155 if (action
.at
== insertAction
) {
1156 NotifyModified(DocModification(
1157 SC_MOD_BEFOREINSERT
| SC_PERFORMED_REDO
, action
));
1158 } else if (action
.at
== containerAction
) {
1159 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_REDO
);
1160 dm
.token
= action
.position
;
1163 NotifyModified(DocModification(
1164 SC_MOD_BEFOREDELETE
| SC_PERFORMED_REDO
, action
));
1166 cb
.PerformRedoStep();
1167 if (action
.at
!= containerAction
) {
1168 ModifiedAt(action
.position
);
1169 newPos
= action
.position
;
1172 int modFlags
= SC_PERFORMED_REDO
;
1173 if (action
.at
== insertAction
) {
1174 newPos
+= action
.lenData
;
1175 modFlags
|= SC_MOD_INSERTTEXT
;
1176 } else if (action
.at
== removeAction
) {
1177 modFlags
|= SC_MOD_DELETETEXT
;
1180 modFlags
|= SC_MULTISTEPUNDOREDO
;
1181 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1182 if (linesAdded
!= 0)
1184 if (step
== steps
- 1) {
1185 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1187 modFlags
|= SC_MULTILINEUNDOREDO
;
1190 DocModification(modFlags
, action
.position
, action
.lenData
,
1191 linesAdded
, action
.data
));
1194 bool endSavePoint
= cb
.IsSavePoint();
1195 if (startSavePoint
!= endSavePoint
)
1196 NotifySavePoint(endSavePoint
);
1198 enteredModification
--;
1203 void Document::DelChar(int pos
) {
1204 DeleteChars(pos
, LenChar(pos
));
1207 void Document::DelCharBack(int pos
) {
1210 } else if (IsCrLf(pos
- 2)) {
1211 DeleteChars(pos
- 2, 2);
1212 } else if (dbcsCodePage
) {
1213 int startChar
= NextPosition(pos
, -1);
1214 DeleteChars(startChar
, pos
- startChar
);
1216 DeleteChars(pos
- 1, 1);
1220 static int NextTab(int pos
, int tabSize
) {
1221 return ((pos
/ tabSize
) + 1) * tabSize
;
1224 static std::string
CreateIndentation(int indent
, int tabSize
, bool insertSpaces
) {
1225 std::string indentation
;
1226 if (!insertSpaces
) {
1227 while (indent
>= tabSize
) {
1228 indentation
+= '\t';
1232 while (indent
> 0) {
1239 int SCI_METHOD
Document::GetLineIndentation(int line
) {
1241 if ((line
>= 0) && (line
< LinesTotal())) {
1242 int lineStart
= LineStart(line
);
1243 int length
= Length();
1244 for (int i
= lineStart
; i
< length
; i
++) {
1245 char ch
= cb
.CharAt(i
);
1248 else if (ch
== '\t')
1249 indent
= NextTab(indent
, tabInChars
);
1257 int Document::SetLineIndentation(int line
, int indent
) {
1258 int indentOfLine
= GetLineIndentation(line
);
1261 if (indent
!= indentOfLine
) {
1262 std::string linebuf
= CreateIndentation(indent
, tabInChars
, !useTabs
);
1263 int thisLineStart
= LineStart(line
);
1264 int indentPos
= GetLineIndentPosition(line
);
1266 DeleteChars(thisLineStart
, indentPos
- thisLineStart
);
1267 return thisLineStart
+ InsertString(thisLineStart
, linebuf
.c_str(),
1268 static_cast<int>(linebuf
.length()));
1270 return GetLineIndentPosition(line
);
1274 int Document::GetLineIndentPosition(int line
) const {
1277 int pos
= LineStart(line
);
1278 int length
= Length();
1279 while ((pos
< length
) && IsSpaceOrTab(cb
.CharAt(pos
))) {
1285 int Document::GetColumn(int pos
) {
1287 int line
= LineFromPosition(pos
);
1288 if ((line
>= 0) && (line
< LinesTotal())) {
1289 for (int i
= LineStart(line
); i
< pos
;) {
1290 char ch
= cb
.CharAt(i
);
1292 column
= NextTab(column
, tabInChars
);
1294 } else if (ch
== '\r') {
1296 } else if (ch
== '\n') {
1298 } else if (i
>= Length()) {
1302 i
= NextPosition(i
, 1);
1309 int Document::CountCharacters(int startPos
, int endPos
) const {
1310 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1311 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1314 while (i
< endPos
) {
1318 i
= NextPosition(i
, 1);
1323 int Document::FindColumn(int line
, int column
) {
1324 int position
= LineStart(line
);
1325 if ((line
>= 0) && (line
< LinesTotal())) {
1326 int columnCurrent
= 0;
1327 while ((columnCurrent
< column
) && (position
< Length())) {
1328 char ch
= cb
.CharAt(position
);
1330 columnCurrent
= NextTab(columnCurrent
, tabInChars
);
1331 if (columnCurrent
> column
)
1334 } else if (ch
== '\r') {
1336 } else if (ch
== '\n') {
1340 position
= NextPosition(position
, 1);
1347 void Document::Indent(bool forwards
, int lineBottom
, int lineTop
) {
1348 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1349 for (int line
= lineBottom
; line
>= lineTop
; line
--) {
1350 int indentOfLine
= GetLineIndentation(line
);
1352 if (LineStart(line
) < LineEnd(line
)) {
1353 SetLineIndentation(line
, indentOfLine
+ IndentSize());
1356 SetLineIndentation(line
, indentOfLine
- IndentSize());
1361 // Convert line endings for a piece of text to a particular mode.
1362 // Stop at len or when a NUL is found.
1363 std::string
Document::TransformLineEnds(const char *s
, size_t len
, int eolModeWanted
) {
1365 for (size_t i
= 0; (i
< len
) && (s
[i
]); i
++) {
1366 if (s
[i
] == '\n' || s
[i
] == '\r') {
1367 if (eolModeWanted
== SC_EOL_CR
) {
1368 dest
.push_back('\r');
1369 } else if (eolModeWanted
== SC_EOL_LF
) {
1370 dest
.push_back('\n');
1371 } else { // eolModeWanted == SC_EOL_CRLF
1372 dest
.push_back('\r');
1373 dest
.push_back('\n');
1375 if ((s
[i
] == '\r') && (i
+1 < len
) && (s
[i
+1] == '\n')) {
1379 dest
.push_back(s
[i
]);
1385 void Document::ConvertLineEnds(int eolModeSet
) {
1388 for (int pos
= 0; pos
< Length(); pos
++) {
1389 if (cb
.CharAt(pos
) == '\r') {
1390 if (cb
.CharAt(pos
+ 1) == '\n') {
1392 if (eolModeSet
== SC_EOL_CR
) {
1393 DeleteChars(pos
+ 1, 1); // Delete the LF
1394 } else if (eolModeSet
== SC_EOL_LF
) {
1395 DeleteChars(pos
, 1); // Delete the CR
1401 if (eolModeSet
== SC_EOL_CRLF
) {
1402 pos
+= InsertString(pos
+ 1, "\n", 1); // Insert LF
1403 } else if (eolModeSet
== SC_EOL_LF
) {
1404 pos
+= InsertString(pos
, "\n", 1); // Insert LF
1405 DeleteChars(pos
, 1); // Delete CR
1409 } else if (cb
.CharAt(pos
) == '\n') {
1411 if (eolModeSet
== SC_EOL_CRLF
) {
1412 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1413 } else if (eolModeSet
== SC_EOL_CR
) {
1414 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1415 DeleteChars(pos
, 1); // Delete LF
1423 bool Document::IsWhiteLine(int line
) const {
1424 int currentChar
= LineStart(line
);
1425 int endLine
= LineEnd(line
);
1426 while (currentChar
< endLine
) {
1427 if (cb
.CharAt(currentChar
) != ' ' && cb
.CharAt(currentChar
) != '\t') {
1435 int Document::ParaUp(int pos
) const {
1436 int line
= LineFromPosition(pos
);
1438 while (line
>= 0 && IsWhiteLine(line
)) { // skip empty lines
1441 while (line
>= 0 && !IsWhiteLine(line
)) { // skip non-empty lines
1445 return LineStart(line
);
1448 int Document::ParaDown(int pos
) const {
1449 int line
= LineFromPosition(pos
);
1450 while (line
< LinesTotal() && !IsWhiteLine(line
)) { // skip non-empty lines
1453 while (line
< LinesTotal() && IsWhiteLine(line
)) { // skip empty lines
1456 if (line
< LinesTotal())
1457 return LineStart(line
);
1458 else // end of a document
1459 return LineEnd(line
-1);
1462 CharClassify::cc
Document::WordCharClass(unsigned char ch
) const {
1463 if ((SC_CP_UTF8
== dbcsCodePage
) && (!UTF8IsAscii(ch
)))
1464 return CharClassify::ccWord
;
1465 return charClass
.GetClass(ch
);
1469 * Used by commmands that want to select whole words.
1470 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1472 int Document::ExtendWordSelect(int pos
, int delta
, bool onlyWordCharacters
) {
1473 CharClassify::cc ccStart
= CharClassify::ccWord
;
1475 if (!onlyWordCharacters
)
1476 ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1477 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
))
1480 if (!onlyWordCharacters
&& pos
< Length())
1481 ccStart
= WordCharClass(cb
.CharAt(pos
));
1482 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1485 return MovePositionOutsideChar(pos
, delta
, true);
1489 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1491 * This is looking for a transition between character classes although there is also some
1492 * additional movement to transit white space.
1493 * Used by cursor movement by word commands.
1495 int Document::NextWordStart(int pos
, int delta
) {
1497 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
))
1500 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1501 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
)) {
1506 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1507 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1509 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
))
1516 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1518 * This is looking for a transition between character classes although there is also some
1519 * additional movement to transit white space.
1520 * Used by cursor movement by word commands.
1522 int Document::NextWordEnd(int pos
, int delta
) {
1525 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1526 if (ccStart
!= CharClassify::ccSpace
) {
1527 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
) {
1531 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
) {
1536 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
) {
1539 if (pos
< Length()) {
1540 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1541 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == ccStart
) {
1550 * Check that the character at the given position is a word or punctuation character and that
1551 * the previous character is of a different character class.
1553 bool Document::IsWordStartAt(int pos
) const {
1555 CharClassify::cc ccPos
= WordCharClass(CharAt(pos
));
1556 return (ccPos
== CharClassify::ccWord
|| ccPos
== CharClassify::ccPunctuation
) &&
1557 (ccPos
!= WordCharClass(CharAt(pos
- 1)));
1563 * Check that the character at the given position is a word or punctuation character and that
1564 * the next character is of a different character class.
1566 bool Document::IsWordEndAt(int pos
) const {
1567 if (pos
< Length()) {
1568 CharClassify::cc ccPrev
= WordCharClass(CharAt(pos
-1));
1569 return (ccPrev
== CharClassify::ccWord
|| ccPrev
== CharClassify::ccPunctuation
) &&
1570 (ccPrev
!= WordCharClass(CharAt(pos
)));
1576 * Check that the given range is has transitions between character classes at both
1577 * ends and where the characters on the inside are word or punctuation characters.
1579 bool Document::IsWordAt(int start
, int end
) const {
1580 return IsWordStartAt(start
) && IsWordEndAt(end
);
1583 bool Document::MatchesWordOptions(bool word
, bool wordStart
, int pos
, int length
) const {
1584 return (!word
&& !wordStart
) ||
1585 (word
&& IsWordAt(pos
, pos
+ length
)) ||
1586 (wordStart
&& IsWordStartAt(pos
));
1589 bool Document::HasCaseFolder(void) const {
1593 void Document::SetCaseFolder(CaseFolder
*pcf_
) {
1598 Document::CharacterExtracted
Document::ExtractCharacter(int position
) const {
1599 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(position
));
1600 if (UTF8IsAscii(leadByte
)) {
1601 // Common case: ASCII character
1602 return CharacterExtracted(leadByte
, 1);
1604 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1605 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
1606 for (int b
=1; b
<widthCharBytes
; b
++)
1607 charBytes
[b
] = static_cast<unsigned char>(cb
.CharAt(position
+ b
));
1608 int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
1609 if (utf8status
& UTF8MaskInvalid
) {
1610 // Treat as invalid and use up just one byte
1611 return CharacterExtracted(unicodeReplacementChar
, 1);
1613 return CharacterExtracted(UnicodeFromBytes(charBytes
), utf8status
& UTF8MaskWidth
);
1618 * Find text in document, supporting both forward and backward
1619 * searches (just pass minPos > maxPos to do a backward search)
1620 * Has not been tested with backwards DBCS searches yet.
1622 long Document::FindText(int minPos
, int maxPos
, const char *search
,
1623 bool caseSensitive
, bool word
, bool wordStart
, bool regExp
, int flags
,
1629 regex
= CreateRegexSearch(&charClass
);
1630 return regex
->FindText(this, minPos
, maxPos
, search
, caseSensitive
, word
, wordStart
, flags
, length
);
1633 const bool forward
= minPos
<= maxPos
;
1634 const int increment
= forward
? 1 : -1;
1636 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1637 const int startPos
= MovePositionOutsideChar(minPos
, increment
, false);
1638 const int endPos
= MovePositionOutsideChar(maxPos
, increment
, false);
1640 // Compute actual search ranges needed
1641 const int lengthFind
= *length
;
1643 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1644 const int limitPos
= Platform::Maximum(startPos
, endPos
);
1647 // Back all of a character
1648 pos
= NextPosition(pos
, increment
);
1650 if (caseSensitive
) {
1651 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1652 const char charStartSearch
= search
[0];
1653 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1654 if (CharAt(pos
) == charStartSearch
) {
1655 bool found
= (pos
+ lengthFind
) <= limitPos
;
1656 for (int indexSearch
= 1; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1657 found
= CharAt(pos
+ indexSearch
) == search
[indexSearch
];
1659 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1663 if (!NextCharacter(pos
, increment
))
1666 } else if (SC_CP_UTF8
== dbcsCodePage
) {
1667 const size_t maxFoldingExpansion
= 4;
1668 std::vector
<char> searchThing(lengthFind
* UTF8MaxBytes
* maxFoldingExpansion
+ 1);
1669 const int lenSearch
= static_cast<int>(
1670 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1671 char bytes
[UTF8MaxBytes
+ 1];
1672 char folded
[UTF8MaxBytes
* maxFoldingExpansion
+ 1];
1673 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1674 int widthFirstCharacter
= 0;
1675 int posIndexDocument
= pos
;
1676 int indexSearch
= 0;
1677 bool characterMatches
= true;
1679 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(posIndexDocument
));
1680 bytes
[0] = leadByte
;
1682 if (!UTF8IsAscii(leadByte
)) {
1683 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1684 for (int b
=1; b
<widthCharBytes
; b
++) {
1685 bytes
[b
] = cb
.CharAt(posIndexDocument
+b
);
1687 widthChar
= UTF8Classify(reinterpret_cast<const unsigned char *>(bytes
), widthCharBytes
) & UTF8MaskWidth
;
1689 if (!widthFirstCharacter
)
1690 widthFirstCharacter
= widthChar
;
1691 if ((posIndexDocument
+ widthChar
) > limitPos
)
1693 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1694 folded
[lenFlat
] = 0;
1695 // Does folded match the buffer
1696 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1697 if (!characterMatches
)
1699 posIndexDocument
+= widthChar
;
1700 indexSearch
+= lenFlat
;
1701 if (indexSearch
>= lenSearch
)
1704 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1705 if (MatchesWordOptions(word
, wordStart
, pos
, posIndexDocument
- pos
)) {
1706 *length
= posIndexDocument
- pos
;
1711 pos
+= widthFirstCharacter
;
1713 if (!NextCharacter(pos
, increment
))
1717 } else if (dbcsCodePage
) {
1718 const size_t maxBytesCharacter
= 2;
1719 const size_t maxFoldingExpansion
= 4;
1720 std::vector
<char> searchThing(lengthFind
* maxBytesCharacter
* maxFoldingExpansion
+ 1);
1721 const int lenSearch
= static_cast<int>(
1722 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1723 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1724 int indexDocument
= 0;
1725 int indexSearch
= 0;
1726 bool characterMatches
= true;
1727 while (characterMatches
&&
1728 ((pos
+ indexDocument
) < limitPos
) &&
1729 (indexSearch
< lenSearch
)) {
1730 char bytes
[maxBytesCharacter
+ 1];
1731 bytes
[0] = cb
.CharAt(pos
+ indexDocument
);
1732 const int widthChar
= IsDBCSLeadByte(bytes
[0]) ? 2 : 1;
1734 bytes
[1] = cb
.CharAt(pos
+ indexDocument
+ 1);
1735 if ((pos
+ indexDocument
+ widthChar
) > limitPos
)
1737 char folded
[maxBytesCharacter
* maxFoldingExpansion
+ 1];
1738 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1739 folded
[lenFlat
] = 0;
1740 // Does folded match the buffer
1741 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1742 indexDocument
+= widthChar
;
1743 indexSearch
+= lenFlat
;
1745 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1746 if (MatchesWordOptions(word
, wordStart
, pos
, indexDocument
)) {
1747 *length
= indexDocument
;
1751 if (!NextCharacter(pos
, increment
))
1755 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1756 std::vector
<char> searchThing(lengthFind
+ 1);
1757 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
1758 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1759 bool found
= (pos
+ lengthFind
) <= limitPos
;
1760 for (int indexSearch
= 0; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1761 char ch
= CharAt(pos
+ indexSearch
);
1763 pcf
->Fold(folded
, sizeof(folded
), &ch
, 1);
1764 found
= folded
[0] == searchThing
[indexSearch
];
1766 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1769 if (!NextCharacter(pos
, increment
))
1774 //Platform::DebugPrintf("Not found\n");
1778 const char *Document::SubstituteByPosition(const char *text
, int *length
) {
1780 return regex
->SubstituteByPosition(this, text
, length
);
1785 int Document::LinesTotal() const {
1789 void Document::SetDefaultCharClasses(bool includeWordClass
) {
1790 charClass
.SetDefaultCharClasses(includeWordClass
);
1793 void Document::SetCharClasses(const unsigned char *chars
, CharClassify::cc newCharClass
) {
1794 charClass
.SetCharClasses(chars
, newCharClass
);
1797 int Document::GetCharsOfClass(CharClassify::cc characterClass
, unsigned char *buffer
) {
1798 return charClass
.GetCharsOfClass(characterClass
, buffer
);
1801 void SCI_METHOD
Document::StartStyling(int position
, char) {
1802 endStyled
= position
;
1805 bool SCI_METHOD
Document::SetStyleFor(int length
, char style
) {
1806 if (enteredStyling
!= 0) {
1810 int prevEndStyled
= endStyled
;
1811 if (cb
.SetStyleFor(endStyled
, length
, style
)) {
1812 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1813 prevEndStyled
, length
);
1816 endStyled
+= length
;
1822 bool SCI_METHOD
Document::SetStyles(int length
, const char *styles
) {
1823 if (enteredStyling
!= 0) {
1827 bool didChange
= false;
1830 for (int iPos
= 0; iPos
< length
; iPos
++, endStyled
++) {
1831 PLATFORM_ASSERT(endStyled
< Length());
1832 if (cb
.SetStyleAt(endStyled
, styles
[iPos
])) {
1834 startMod
= endStyled
;
1841 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1842 startMod
, endMod
- startMod
+ 1);
1850 void Document::EnsureStyledTo(int pos
) {
1851 if ((enteredStyling
== 0) && (pos
> GetEndStyled())) {
1852 IncrementStyleClock();
1853 if (pli
&& !pli
->UseContainerLexing()) {
1854 int lineEndStyled
= LineFromPosition(GetEndStyled());
1855 int endStyledTo
= LineStart(lineEndStyled
);
1856 pli
->Colourise(endStyledTo
, pos
);
1858 // Ask the watchers to style, and stop as soon as one responds.
1859 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin();
1860 (pos
> GetEndStyled()) && (it
!= watchers
.end()); ++it
) {
1861 it
->watcher
->NotifyStyleNeeded(this, it
->userData
, pos
);
1867 void Document::LexerChanged() {
1868 // Tell the watchers the lexer has changed.
1869 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1870 it
->watcher
->NotifyLexerChanged(this, it
->userData
);
1874 int SCI_METHOD
Document::SetLineState(int line
, int state
) {
1875 int statePrevious
= static_cast<LineState
*>(perLineData
[ldState
])->SetLineState(line
, state
);
1876 if (state
!= statePrevious
) {
1877 DocModification
mh(SC_MOD_CHANGELINESTATE
, LineStart(line
), 0, 0, 0, line
);
1880 return statePrevious
;
1883 int SCI_METHOD
Document::GetLineState(int line
) const {
1884 return static_cast<LineState
*>(perLineData
[ldState
])->GetLineState(line
);
1887 int Document::GetMaxLineState() {
1888 return static_cast<LineState
*>(perLineData
[ldState
])->GetMaxLineState();
1891 void SCI_METHOD
Document::ChangeLexerState(int start
, int end
) {
1892 DocModification
mh(SC_MOD_LEXERSTATE
, start
, end
-start
, 0, 0, 0);
1896 StyledText
Document::MarginStyledText(int line
) const {
1897 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldMargin
]);
1898 return StyledText(pla
->Length(line
), pla
->Text(line
),
1899 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1902 void Document::MarginSetText(int line
, const char *text
) {
1903 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetText(line
, text
);
1904 DocModification
mh(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
);
1908 void Document::MarginSetStyle(int line
, int style
) {
1909 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyle(line
, style
);
1910 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1913 void Document::MarginSetStyles(int line
, const unsigned char *styles
) {
1914 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyles(line
, styles
);
1915 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1918 void Document::MarginClearAll() {
1919 int maxEditorLine
= LinesTotal();
1920 for (int l
=0; l
<maxEditorLine
; l
++)
1921 MarginSetText(l
, 0);
1922 // Free remaining data
1923 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->ClearAll();
1926 StyledText
Document::AnnotationStyledText(int line
) const {
1927 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
]);
1928 return StyledText(pla
->Length(line
), pla
->Text(line
),
1929 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1932 void Document::AnnotationSetText(int line
, const char *text
) {
1933 if (line
>= 0 && line
< LinesTotal()) {
1934 const int linesBefore
= AnnotationLines(line
);
1935 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetText(line
, text
);
1936 const int linesAfter
= AnnotationLines(line
);
1937 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
1938 mh
.annotationLinesAdded
= linesAfter
- linesBefore
;
1943 void Document::AnnotationSetStyle(int line
, int style
) {
1944 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyle(line
, style
);
1945 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
1949 void Document::AnnotationSetStyles(int line
, const unsigned char *styles
) {
1950 if (line
>= 0 && line
< LinesTotal()) {
1951 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyles(line
, styles
);
1955 int Document::AnnotationLines(int line
) const {
1956 return static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->Lines(line
);
1959 void Document::AnnotationClearAll() {
1960 int maxEditorLine
= LinesTotal();
1961 for (int l
=0; l
<maxEditorLine
; l
++)
1962 AnnotationSetText(l
, 0);
1963 // Free remaining data
1964 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->ClearAll();
1967 void Document::IncrementStyleClock() {
1968 styleClock
= (styleClock
+ 1) % 0x100000;
1971 void SCI_METHOD
Document::DecorationFillRange(int position
, int value
, int fillLength
) {
1972 if (decorations
.FillRange(position
, value
, fillLength
)) {
1973 DocModification
mh(SC_MOD_CHANGEINDICATOR
| SC_PERFORMED_USER
,
1974 position
, fillLength
);
1979 bool Document::AddWatcher(DocWatcher
*watcher
, void *userData
) {
1980 WatcherWithUserData
wwud(watcher
, userData
);
1981 std::vector
<WatcherWithUserData
>::iterator it
=
1982 std::find(watchers
.begin(), watchers
.end(), wwud
);
1983 if (it
!= watchers
.end())
1985 watchers
.push_back(wwud
);
1989 bool Document::RemoveWatcher(DocWatcher
*watcher
, void *userData
) {
1990 std::vector
<WatcherWithUserData
>::iterator it
=
1991 std::find(watchers
.begin(), watchers
.end(), WatcherWithUserData(watcher
, userData
));
1992 if (it
!= watchers
.end()) {
1999 void Document::NotifyModifyAttempt() {
2000 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2001 it
->watcher
->NotifyModifyAttempt(this, it
->userData
);
2005 void Document::NotifySavePoint(bool atSavePoint
) {
2006 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2007 it
->watcher
->NotifySavePoint(this, it
->userData
, atSavePoint
);
2011 void Document::NotifyModified(DocModification mh
) {
2012 if (mh
.modificationType
& SC_MOD_INSERTTEXT
) {
2013 decorations
.InsertSpace(mh
.position
, mh
.length
);
2014 } else if (mh
.modificationType
& SC_MOD_DELETETEXT
) {
2015 decorations
.DeleteRange(mh
.position
, mh
.length
);
2017 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
2018 it
->watcher
->NotifyModified(this, mh
, it
->userData
);
2022 bool Document::IsWordPartSeparator(char ch
) const {
2023 return (WordCharClass(ch
) == CharClassify::ccWord
) && IsPunctuation(ch
);
2026 int Document::WordPartLeft(int pos
) {
2029 char startChar
= cb
.CharAt(pos
);
2030 if (IsWordPartSeparator(startChar
)) {
2031 while (pos
> 0 && IsWordPartSeparator(cb
.CharAt(pos
))) {
2036 startChar
= cb
.CharAt(pos
);
2038 if (IsLowerCase(startChar
)) {
2039 while (pos
> 0 && IsLowerCase(cb
.CharAt(pos
)))
2041 if (!IsUpperCase(cb
.CharAt(pos
)) && !IsLowerCase(cb
.CharAt(pos
)))
2043 } else if (IsUpperCase(startChar
)) {
2044 while (pos
> 0 && IsUpperCase(cb
.CharAt(pos
)))
2046 if (!IsUpperCase(cb
.CharAt(pos
)))
2048 } else if (IsADigit(startChar
)) {
2049 while (pos
> 0 && IsADigit(cb
.CharAt(pos
)))
2051 if (!IsADigit(cb
.CharAt(pos
)))
2053 } else if (IsPunctuation(startChar
)) {
2054 while (pos
> 0 && IsPunctuation(cb
.CharAt(pos
)))
2056 if (!IsPunctuation(cb
.CharAt(pos
)))
2058 } else if (isspacechar(startChar
)) {
2059 while (pos
> 0 && isspacechar(cb
.CharAt(pos
)))
2061 if (!isspacechar(cb
.CharAt(pos
)))
2063 } else if (!IsASCII(startChar
)) {
2064 while (pos
> 0 && !IsASCII(cb
.CharAt(pos
)))
2066 if (IsASCII(cb
.CharAt(pos
)))
2076 int Document::WordPartRight(int pos
) {
2077 char startChar
= cb
.CharAt(pos
);
2078 int length
= Length();
2079 if (IsWordPartSeparator(startChar
)) {
2080 while (pos
< length
&& IsWordPartSeparator(cb
.CharAt(pos
)))
2082 startChar
= cb
.CharAt(pos
);
2084 if (!IsASCII(startChar
)) {
2085 while (pos
< length
&& !IsASCII(cb
.CharAt(pos
)))
2087 } else if (IsLowerCase(startChar
)) {
2088 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2090 } else if (IsUpperCase(startChar
)) {
2091 if (IsLowerCase(cb
.CharAt(pos
+ 1))) {
2093 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2096 while (pos
< length
&& IsUpperCase(cb
.CharAt(pos
)))
2099 if (IsLowerCase(cb
.CharAt(pos
)) && IsUpperCase(cb
.CharAt(pos
- 1)))
2101 } else if (IsADigit(startChar
)) {
2102 while (pos
< length
&& IsADigit(cb
.CharAt(pos
)))
2104 } else if (IsPunctuation(startChar
)) {
2105 while (pos
< length
&& IsPunctuation(cb
.CharAt(pos
)))
2107 } else if (isspacechar(startChar
)) {
2108 while (pos
< length
&& isspacechar(cb
.CharAt(pos
)))
2116 bool IsLineEndChar(char c
) {
2117 return (c
== '\n' || c
== '\r');
2120 int Document::ExtendStyleRange(int pos
, int delta
, bool singleLine
) {
2121 int sStart
= cb
.StyleAt(pos
);
2123 while (pos
> 0 && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2127 while (pos
< (Length()) && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2133 static char BraceOpposite(char ch
) {
2156 // TODO: should be able to extend styled region to find matching brace
2157 int Document::BraceMatch(int position
, int /*maxReStyle*/) {
2158 char chBrace
= CharAt(position
);
2159 char chSeek
= BraceOpposite(chBrace
);
2162 char styBrace
= static_cast<char>(StyleAt(position
));
2164 if (chBrace
== '(' || chBrace
== '[' || chBrace
== '{' || chBrace
== '<')
2167 position
= NextPosition(position
, direction
);
2168 while ((position
>= 0) && (position
< Length())) {
2169 char chAtPos
= CharAt(position
);
2170 char styAtPos
= static_cast<char>(StyleAt(position
));
2171 if ((position
> GetEndStyled()) || (styAtPos
== styBrace
)) {
2172 if (chAtPos
== chBrace
)
2174 if (chAtPos
== chSeek
)
2179 int positionBeforeMove
= position
;
2180 position
= NextPosition(position
, direction
);
2181 if (position
== positionBeforeMove
)
2188 * Implementation of RegexSearchBase for the default built-in regular expression engine
2190 class BuiltinRegex
: public RegexSearchBase
{
2192 explicit BuiltinRegex(CharClassify
*charClassTable
) : search(charClassTable
) {}
2194 virtual ~BuiltinRegex() {
2197 virtual long FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2198 bool caseSensitive
, bool word
, bool wordStart
, int flags
,
2201 virtual const char *SubstituteByPosition(Document
*doc
, const char *text
, int *length
);
2205 std::string substituted
;
2211 * RESearchRange keeps track of search range.
2213 class RESearchRange
{
2215 const Document
*doc
;
2222 RESearchRange(const Document
*doc_
, int minPos
, int maxPos
) : doc(doc_
) {
2223 increment
= (minPos
<= maxPos
) ? 1 : -1;
2225 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2226 startPos
= doc
->MovePositionOutsideChar(minPos
, 1, false);
2227 endPos
= doc
->MovePositionOutsideChar(maxPos
, 1, false);
2229 lineRangeStart
= doc
->LineFromPosition(startPos
);
2230 lineRangeEnd
= doc
->LineFromPosition(endPos
);
2231 if ((increment
== 1) &&
2232 (startPos
>= doc
->LineEnd(lineRangeStart
)) &&
2233 (lineRangeStart
< lineRangeEnd
)) {
2234 // the start position is at end of line or between line end characters.
2236 startPos
= doc
->LineStart(lineRangeStart
);
2237 } else if ((increment
== -1) &&
2238 (startPos
<= doc
->LineStart(lineRangeStart
)) &&
2239 (lineRangeStart
> lineRangeEnd
)) {
2240 // the start position is at beginning of line.
2242 startPos
= doc
->LineEnd(lineRangeStart
);
2244 lineRangeBreak
= lineRangeEnd
+ increment
;
2246 Range
LineRange(int line
) const {
2247 Range
range(doc
->LineStart(line
), doc
->LineEnd(line
));
2248 if (increment
== 1) {
2249 if (line
== lineRangeStart
)
2250 range
.start
= startPos
;
2251 if (line
== lineRangeEnd
)
2254 if (line
== lineRangeEnd
)
2255 range
.start
= endPos
;
2256 if (line
== lineRangeStart
)
2257 range
.end
= startPos
;
2263 // Define a way for the Regular Expression code to access the document
2264 class DocumentIndexer
: public CharacterIndexer
{
2268 DocumentIndexer(Document
*pdoc_
, int end_
) :
2269 pdoc(pdoc_
), end(end_
) {
2272 virtual ~DocumentIndexer() {
2275 virtual char CharAt(int index
) {
2276 if (index
< 0 || index
>= end
)
2279 return pdoc
->CharAt(index
);
2285 class ByteIterator
: public std::iterator
<std::bidirectional_iterator_tag
, char> {
2287 const Document
*doc
;
2289 ByteIterator(const Document
*doc_
= 0, Position position_
= 0) : doc(doc_
), position(position_
) {
2291 ByteIterator(const ByteIterator
&other
) {
2293 position
= other
.position
;
2295 ByteIterator
&operator=(const ByteIterator
&other
) {
2296 if (this != &other
) {
2298 position
= other
.position
;
2302 char operator*() const {
2303 return doc
->CharAt(position
);
2305 ByteIterator
&operator++() {
2309 ByteIterator
operator++(int) {
2310 ByteIterator
retVal(*this);
2314 ByteIterator
&operator--() {
2318 bool operator==(const ByteIterator
&other
) const {
2319 return doc
== other
.doc
&& position
== other
.position
;
2321 bool operator!=(const ByteIterator
&other
) const {
2322 return doc
!= other
.doc
|| position
!= other
.position
;
2327 int PosRoundUp() const {
2332 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2333 // Would be better to use sizeof(wchar_t) or similar to differentiate
2334 // but easier for now to hard-code platforms.
2335 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2336 // appear to allow specializing basic_regex over these.
2339 #define WCHAR_T_IS_16 1
2341 #define WCHAR_T_IS_16 0
2346 // On Windows, report non-BMP characters as 2 separate surrogates as that
2347 // matches wregex since it is based on wchar_t.
2348 class UTF8Iterator
: public std::iterator
<std::bidirectional_iterator_tag
, wchar_t> {
2349 // These 3 fields determine the iterator position and are used for comparisons
2350 const Document
*doc
;
2352 size_t characterIndex
;
2353 // Remaining fields are derived from the determining fields so are excluded in comparisons
2354 unsigned int lenBytes
;
2355 size_t lenCharacters
;
2356 wchar_t buffered
[2];
2358 UTF8Iterator(const Document
*doc_
= 0, Position position_
= 0) :
2359 doc(doc_
), position(position_
), characterIndex(0), lenBytes(0), lenCharacters(0) {
2363 UTF8Iterator(const UTF8Iterator
&other
) {
2365 position
= other
.position
;
2366 characterIndex
= other
.characterIndex
;
2367 lenBytes
= other
.lenBytes
;
2368 lenCharacters
= other
.lenCharacters
;
2369 buffered
[0] = other
.buffered
[0];
2370 buffered
[1] = other
.buffered
[1];
2372 UTF8Iterator
&operator=(const UTF8Iterator
&other
) {
2373 if (this != &other
) {
2375 position
= other
.position
;
2376 characterIndex
= other
.characterIndex
;
2377 lenBytes
= other
.lenBytes
;
2378 lenCharacters
= other
.lenCharacters
;
2379 buffered
[0] = other
.buffered
[0];
2380 buffered
[1] = other
.buffered
[1];
2384 wchar_t operator*() {
2385 if (lenCharacters
== 0) {
2388 return buffered
[characterIndex
];
2390 UTF8Iterator
&operator++() {
2391 if ((characterIndex
+ 1) < (lenCharacters
)) {
2394 position
+= lenBytes
;
2400 UTF8Iterator
operator++(int) {
2401 UTF8Iterator
retVal(*this);
2402 if ((characterIndex
+ 1) < (lenCharacters
)) {
2405 position
+= lenBytes
;
2411 UTF8Iterator
&operator--() {
2412 if (characterIndex
) {
2415 position
= doc
->NextPosition(position
, -1);
2417 characterIndex
= lenCharacters
- 1;
2421 bool operator==(const UTF8Iterator
&other
) const {
2422 // Only test the determining fields, not the character widths and values derived from this
2423 return doc
== other
.doc
&&
2424 position
== other
.position
&&
2425 characterIndex
== other
.characterIndex
;
2427 bool operator!=(const UTF8Iterator
&other
) const {
2428 // Only test the determining fields, not the character widths and values derived from this
2429 return doc
!= other
.doc
||
2430 position
!= other
.position
||
2431 characterIndex
!= other
.characterIndex
;
2436 int PosRoundUp() const {
2438 return position
+ lenBytes
; // Force to end of character
2443 void ReadCharacter() {
2444 Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2445 lenBytes
= charExtracted
.widthBytes
;
2446 if (charExtracted
.character
== unicodeReplacementChar
) {
2448 buffered
[0] = static_cast<wchar_t>(charExtracted
.character
);
2450 lenCharacters
= UTF16FromUTF32Character(charExtracted
.character
, buffered
);
2457 // On Unix, report non-BMP characters as single characters
2459 class UTF8Iterator
: public std::iterator
<std::bidirectional_iterator_tag
, wchar_t> {
2460 const Document
*doc
;
2463 UTF8Iterator(const Document
*doc_
=0, Position position_
=0) : doc(doc_
), position(position_
) {
2465 UTF8Iterator(const UTF8Iterator
&other
) {
2467 position
= other
.position
;
2469 UTF8Iterator
&operator=(const UTF8Iterator
&other
) {
2470 if (this != &other
) {
2472 position
= other
.position
;
2476 wchar_t operator*() const {
2477 Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2478 return charExtracted
.character
;
2480 UTF8Iterator
&operator++() {
2481 position
= doc
->NextPosition(position
, 1);
2484 UTF8Iterator
operator++(int) {
2485 UTF8Iterator
retVal(*this);
2486 position
= doc
->NextPosition(position
, 1);
2489 UTF8Iterator
&operator--() {
2490 position
= doc
->NextPosition(position
, -1);
2493 bool operator==(const UTF8Iterator
&other
) const {
2494 return doc
== other
.doc
&& position
== other
.position
;
2496 bool operator!=(const UTF8Iterator
&other
) const {
2497 return doc
!= other
.doc
|| position
!= other
.position
;
2502 int PosRoundUp() const {
2509 std::regex_constants::match_flag_type
MatchFlags(const Document
*doc
, int startPos
, int endPos
) {
2510 std::regex_constants::match_flag_type flagsMatch
= std::regex_constants::match_default
;
2511 if (!doc
->IsLineStartPosition(startPos
))
2512 flagsMatch
|= std::regex_constants::match_not_bol
;
2513 if (!doc
->IsLineEndPosition(endPos
))
2514 flagsMatch
|= std::regex_constants::match_not_eol
;
2518 template<typename Iterator
, typename Regex
>
2519 bool MatchOnLines(const Document
*doc
, const Regex
®exp
, const RESearchRange
&resr
, RESearch
&search
) {
2520 bool matched
= false;
2521 std::match_results
<Iterator
> match
;
2523 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2524 // If they didn't then the line by line iteration could be removed for the forwards
2525 // case and replaced with the following 4 lines:
2526 // Iterator uiStart(doc, startPos);
2527 // Iterator uiEnd(doc, endPos);
2528 // flagsMatch = MatchFlags(doc, startPos, endPos);
2529 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2532 for (int line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
2533 const Range lineRange
= resr
.LineRange(line
);
2534 Iterator
itStart(doc
, lineRange
.start
);
2535 Iterator
itEnd(doc
, lineRange
.end
);
2536 std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, lineRange
.start
, lineRange
.end
);
2537 matched
= std::regex_search(itStart
, itEnd
, match
, regexp
, flagsMatch
);
2538 // Check for the last match on this line.
2540 if (resr
.increment
== -1) {
2542 Iterator
itNext(doc
, match
[0].second
.PosRoundUp());
2543 flagsMatch
= MatchFlags(doc
, itNext
.Pos(), lineRange
.end
);
2544 std::match_results
<Iterator
> matchNext
;
2545 matched
= std::regex_search(itNext
, itEnd
, matchNext
, regexp
, flagsMatch
);
2547 if (match
[0].first
== match
[0].second
) {
2548 // Empty match means failure so exit
2560 for (size_t co
= 0; co
< match
.size(); co
++) {
2561 search
.bopat
[co
] = match
[co
].first
.Pos();
2562 search
.eopat
[co
] = match
[co
].second
.PosRoundUp();
2563 size_t lenMatch
= search
.eopat
[co
] - search
.bopat
[co
];
2564 search
.pat
[co
].resize(lenMatch
);
2565 for (size_t iPos
= 0; iPos
< lenMatch
; iPos
++) {
2566 search
.pat
[co
][iPos
] = doc
->CharAt(iPos
+ search
.bopat
[co
]);
2573 long Cxx11RegexFindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2574 bool caseSensitive
, int *length
, RESearch
&search
) {
2575 const RESearchRange
resr(doc
, minPos
, maxPos
);
2578 std::regex::flag_type flagsRe
= std::regex::ECMAScript
;
2579 // Flags that apper to have no effect:
2580 // | std::regex::collate | std::regex::extended;
2582 flagsRe
= flagsRe
| std::regex::icase
;
2584 // Clear the RESearch so can fill in matches
2587 bool matched
= false;
2588 if (SC_CP_UTF8
== doc
->dbcsCodePage
) {
2589 unsigned int lenS
= static_cast<unsigned int>(strlen(s
));
2590 std::vector
<wchar_t> ws(lenS
+ 1);
2592 size_t outLen
= UTF16FromUTF8(s
, lenS
, &ws
[0], lenS
);
2594 size_t outLen
= UTF32FromUTF8(s
, lenS
, reinterpret_cast<unsigned int *>(&ws
[0]), lenS
);
2598 #if defined(__APPLE__)
2599 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2600 // is one byte not one character.
2601 // However, on OS X this makes wregex act as Unicode
2602 std::locale
localeU("en_US.UTF-8");
2603 regexp
.imbue(localeU
);
2605 regexp
.assign(&ws
[0], flagsRe
);
2606 matched
= MatchOnLines
<UTF8Iterator
>(doc
, regexp
, resr
, search
);
2610 regexp
.assign(s
, flagsRe
);
2611 matched
= MatchOnLines
<ByteIterator
>(doc
, regexp
, resr
, search
);
2616 posMatch
= search
.bopat
[0];
2617 *length
= search
.eopat
[0] - search
.bopat
[0];
2619 // Example - search in doc/ScintillaHistory.html for
2620 // [[:upper:]]eta[[:space:]]
2621 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2622 //double durSearch = et.Duration(true);
2623 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2625 } catch (std::regex_error
&) {
2626 // Failed to create regular expression
2629 // Failed in some other way
2638 long BuiltinRegex::FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2639 bool caseSensitive
, bool, bool, int flags
,
2643 if (flags
& SCFIND_CXX11REGEX
) {
2644 return Cxx11RegexFindText(doc
, minPos
, maxPos
, s
,
2645 caseSensitive
, length
, search
);
2649 const RESearchRange
resr(doc
, minPos
, maxPos
);
2651 const bool posix
= (flags
& SCFIND_POSIX
) != 0;
2653 const char *errmsg
= search
.Compile(s
, *length
, caseSensitive
, posix
);
2657 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2658 // Replace first '.' with '-' in each property file variable reference:
2659 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2660 // Replace: $(\1-\2)
2663 const char searchEnd
= s
[*length
- 1];
2664 const char searchEndPrev
= (*length
> 1) ? s
[*length
- 2] : '\0';
2665 for (int line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
2666 int startOfLine
= doc
->LineStart(line
);
2667 int endOfLine
= doc
->LineEnd(line
);
2668 if (resr
.increment
== 1) {
2669 if (line
== resr
.lineRangeStart
) {
2670 if ((resr
.startPos
!= startOfLine
) && (s
[0] == '^'))
2671 continue; // Can't match start of line if start position after start of line
2672 startOfLine
= resr
.startPos
;
2674 if (line
== resr
.lineRangeEnd
) {
2675 if ((resr
.endPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2676 continue; // Can't match end of line if end position before end of line
2677 endOfLine
= resr
.endPos
;
2680 if (line
== resr
.lineRangeEnd
) {
2681 if ((resr
.endPos
!= startOfLine
) && (s
[0] == '^'))
2682 continue; // Can't match start of line if end position after start of line
2683 startOfLine
= resr
.endPos
;
2685 if (line
== resr
.lineRangeStart
) {
2686 if ((resr
.startPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2687 continue; // Can't match end of line if start position before end of line
2688 endOfLine
= resr
.startPos
;
2692 DocumentIndexer
di(doc
, endOfLine
);
2693 int success
= search
.Execute(di
, startOfLine
, endOfLine
);
2695 pos
= search
.bopat
[0];
2696 // Ensure only whole characters selected
2697 search
.eopat
[0] = doc
->MovePositionOutsideChar(search
.eopat
[0], 1, false);
2698 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2699 // There can be only one start of a line, so no need to look for last match in line
2700 if ((resr
.increment
== -1) && (s
[0] != '^')) {
2701 // Check for the last match on this line.
2702 int repetitions
= 1000; // Break out of infinite loop
2703 while (success
&& (search
.eopat
[0] <= endOfLine
) && (repetitions
--)) {
2704 success
= search
.Execute(di
, pos
+1, endOfLine
);
2706 if (search
.eopat
[0] <= minPos
) {
2707 pos
= search
.bopat
[0];
2708 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2722 const char *BuiltinRegex::SubstituteByPosition(Document
*doc
, const char *text
, int *length
) {
2723 substituted
.clear();
2724 DocumentIndexer
di(doc
, doc
->Length());
2725 search
.GrabMatches(di
);
2726 for (int j
= 0; j
< *length
; j
++) {
2727 if (text
[j
] == '\\') {
2728 if (text
[j
+ 1] >= '0' && text
[j
+ 1] <= '9') {
2729 unsigned int patNum
= text
[j
+ 1] - '0';
2730 unsigned int len
= search
.eopat
[patNum
] - search
.bopat
[patNum
];
2731 if (!search
.pat
[patNum
].empty()) // Will be null if try for a match that did not occur
2732 substituted
.append(search
.pat
[patNum
].c_str(), len
);
2738 substituted
.push_back('\a');
2741 substituted
.push_back('\b');
2744 substituted
.push_back('\f');
2747 substituted
.push_back('\n');
2750 substituted
.push_back('\r');
2753 substituted
.push_back('\t');
2756 substituted
.push_back('\v');
2759 substituted
.push_back('\\');
2762 substituted
.push_back('\\');
2767 substituted
.push_back(text
[j
]);
2770 *length
= static_cast<int>(substituted
.length());
2771 return substituted
.c_str();
2774 #ifndef SCI_OWNREGEX
2776 #ifdef SCI_NAMESPACE
2778 RegexSearchBase
*Scintilla::CreateRegexSearch(CharClassify
*charClassTable
) {
2779 return new BuiltinRegex(charClassTable
);
2784 RegexSearchBase
*CreateRegexSearch(CharClassify
*charClassTable
) {
2785 return new BuiltinRegex(charClassTable
);