1 // Scintilla source code edit control
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
21 #include "Scintilla.h"
23 #include "CharacterSet.h"
24 #include "SplitVector.h"
25 #include "Partitioning.h"
26 #include "RunStyles.h"
27 #include "CellBuffer.h"
29 #include "CharClassify.h"
30 #include "Decoration.h"
31 #include "CaseFolder.h"
34 #include "UniConversion.h"
37 using namespace Scintilla
;
40 static inline bool IsPunctuation(char ch
) {
41 return IsASCII(ch
) && ispunct(ch
);
44 void LexInterface::Colourise(int start
, int end
) {
45 if (pdoc
&& instance
&& !performingStyle
) {
46 // Protect against reentrance, which may occur, for example, when
47 // fold points are discovered while performing styling and the folding
48 // code looks for child lines which may trigger styling.
49 performingStyle
= true;
51 int lengthDoc
= pdoc
->Length();
54 int len
= end
- start
;
56 PLATFORM_ASSERT(len
>= 0);
57 PLATFORM_ASSERT(start
+ len
<= lengthDoc
);
61 styleStart
= pdoc
->StyleAt(start
- 1);
64 instance
->Lex(start
, len
, styleStart
, pdoc
);
65 instance
->Fold(start
, len
, styleStart
, pdoc
);
68 performingStyle
= false;
72 int LexInterface::LineEndTypesSupported() {
74 int interfaceVersion
= instance
->Version();
75 if (interfaceVersion
>= lvSubStyles
) {
76 ILexerWithSubStyles
*ssinstance
= static_cast<ILexerWithSubStyles
*>(instance
);
77 return ssinstance
->LineEndTypesSupported();
83 Document::Document() {
87 eolMode
= SC_EOL_CRLF
;
92 lineEndBitSet
= SC_LINE_END_TYPE_DEFAULT
;
95 enteredModification
= 0;
97 enteredReadOnlyCount
= 0;
101 actualIndentInChars
= 8;
104 backspaceUnindents
= false;
106 matchesValid
= false;
109 UTF8BytesOfLeadInitialise();
111 perLineData
[ldMarkers
] = new LineMarkers();
112 perLineData
[ldLevels
] = new LineLevels();
113 perLineData
[ldState
] = new LineState();
114 perLineData
[ldMargin
] = new LineAnnotation();
115 perLineData
[ldAnnotation
] = new LineAnnotation();
122 Document::~Document() {
123 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
124 it
->watcher
->NotifyDeleted(this, it
->userData
);
126 for (int j
=0; j
<ldSize
; j
++) {
127 delete perLineData
[j
];
138 void Document::Init() {
139 for (int j
=0; j
<ldSize
; j
++) {
141 perLineData
[j
]->Init();
145 int Document::LineEndTypesSupported() const {
146 if ((SC_CP_UTF8
== dbcsCodePage
) && pli
)
147 return pli
->LineEndTypesSupported();
152 bool Document::SetDBCSCodePage(int dbcsCodePage_
) {
153 if (dbcsCodePage
!= dbcsCodePage_
) {
154 dbcsCodePage
= dbcsCodePage_
;
156 cb
.SetLineEndTypes(lineEndBitSet
& LineEndTypesSupported());
163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_
) {
164 if (lineEndBitSet
!= lineEndBitSet_
) {
165 lineEndBitSet
= lineEndBitSet_
;
166 int lineEndBitSetActive
= lineEndBitSet
& LineEndTypesSupported();
167 if (lineEndBitSetActive
!= cb
.GetLineEndTypes()) {
169 cb
.SetLineEndTypes(lineEndBitSetActive
);
179 void Document::InsertLine(int line
) {
180 for (int j
=0; j
<ldSize
; j
++) {
182 perLineData
[j
]->InsertLine(line
);
186 void Document::RemoveLine(int line
) {
187 for (int j
=0; j
<ldSize
; j
++) {
189 perLineData
[j
]->RemoveLine(line
);
193 // Increase reference count and return its previous value.
194 int Document::AddRef() {
198 // Decrease reference count and return its previous value.
199 // Delete the document if reference count reaches zero.
200 int SCI_METHOD
Document::Release() {
201 int curRefCount
= --refCount
;
202 if (curRefCount
== 0)
207 void Document::SetSavePoint() {
209 NotifySavePoint(true);
212 void Document::TentativeUndo() {
214 if (enteredModification
== 0) {
215 enteredModification
++;
216 if (!cb
.IsReadOnly()) {
217 bool startSavePoint
= cb
.IsSavePoint();
218 bool multiLine
= false;
219 int steps
= cb
.TentativeSteps();
220 //Platform::DebugPrintf("Steps=%d\n", steps);
221 for (int step
= 0; step
< steps
; step
++) {
222 const int prevLinesTotal
= LinesTotal();
223 const Action
&action
= cb
.GetUndoStep();
224 if (action
.at
== removeAction
) {
225 NotifyModified(DocModification(
226 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
227 } else if (action
.at
== containerAction
) {
228 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
229 dm
.token
= action
.position
;
232 NotifyModified(DocModification(
233 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
235 cb
.PerformUndoStep();
236 if (action
.at
!= containerAction
) {
237 ModifiedAt(action
.position
);
240 int modFlags
= SC_PERFORMED_UNDO
;
241 // With undo, an insertion action becomes a deletion notification
242 if (action
.at
== removeAction
) {
243 modFlags
|= SC_MOD_INSERTTEXT
;
244 } else if (action
.at
== insertAction
) {
245 modFlags
|= SC_MOD_DELETETEXT
;
248 modFlags
|= SC_MULTISTEPUNDOREDO
;
249 const int linesAdded
= LinesTotal() - prevLinesTotal
;
252 if (step
== steps
- 1) {
253 modFlags
|= SC_LASTSTEPINUNDOREDO
;
255 modFlags
|= SC_MULTILINEUNDOREDO
;
257 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
258 linesAdded
, action
.data
));
261 bool endSavePoint
= cb
.IsSavePoint();
262 if (startSavePoint
!= endSavePoint
)
263 NotifySavePoint(endSavePoint
);
265 cb
.TentativeCommit();
267 enteredModification
--;
271 int Document::GetMark(int line
) {
272 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkValue(line
);
275 int Document::MarkerNext(int lineStart
, int mask
) const {
276 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->MarkerNext(lineStart
, mask
);
279 int Document::AddMark(int line
, int markerNum
) {
280 if (line
>= 0 && line
<= LinesTotal()) {
281 int prev
= static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
282 AddMark(line
, markerNum
, LinesTotal());
283 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
291 void Document::AddMarkSet(int line
, int valueSet
) {
292 if (line
< 0 || line
> LinesTotal()) {
295 unsigned int m
= valueSet
;
296 for (int i
= 0; m
; i
++, m
>>= 1)
298 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->
299 AddMark(line
, i
, LinesTotal());
300 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
304 void Document::DeleteMark(int line
, int markerNum
) {
305 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, false);
306 DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, 0, line
);
310 void Document::DeleteMarkFromHandle(int markerHandle
) {
311 static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMarkFromHandle(markerHandle
);
312 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
317 void Document::DeleteAllMarks(int markerNum
) {
318 bool someChanges
= false;
319 for (int line
= 0; line
< LinesTotal(); line
++) {
320 if (static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->DeleteMark(line
, markerNum
, true))
324 DocModification
mh(SC_MOD_CHANGEMARKER
, 0, 0, 0, 0);
330 int Document::LineFromHandle(int markerHandle
) {
331 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
])->LineFromHandle(markerHandle
);
334 int SCI_METHOD
Document::LineStart(int line
) const {
335 return cb
.LineStart(line
);
338 int SCI_METHOD
Document::LineEnd(int line
) const {
339 if (line
>= LinesTotal() - 1) {
340 return LineStart(line
+ 1);
342 int position
= LineStart(line
+ 1);
343 if (SC_CP_UTF8
== dbcsCodePage
) {
344 unsigned char bytes
[] = {
345 static_cast<unsigned char>(cb
.CharAt(position
-3)),
346 static_cast<unsigned char>(cb
.CharAt(position
-2)),
347 static_cast<unsigned char>(cb
.CharAt(position
-1)),
349 if (UTF8IsSeparator(bytes
)) {
350 return position
- UTF8SeparatorLength
;
352 if (UTF8IsNEL(bytes
+1)) {
353 return position
- UTF8NELLength
;
356 position
--; // Back over CR or LF
357 // When line terminator is CR+LF, may need to go back one more
358 if ((position
> LineStart(line
)) && (cb
.CharAt(position
- 1) == '\r')) {
365 void SCI_METHOD
Document::SetErrorStatus(int status
) {
366 // Tell the watchers an error has occurred.
367 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
368 it
->watcher
->NotifyErrorOccurred(this, it
->userData
, status
);
372 int SCI_METHOD
Document::LineFromPosition(int pos
) const {
373 return cb
.LineFromPosition(pos
);
376 int Document::LineEndPosition(int position
) const {
377 return LineEnd(LineFromPosition(position
));
380 bool Document::IsLineEndPosition(int position
) const {
381 return LineEnd(LineFromPosition(position
)) == position
;
384 bool Document::IsPositionInLineEnd(int position
) const {
385 return position
>= LineEnd(LineFromPosition(position
));
388 int Document::VCHomePosition(int position
) const {
389 int line
= LineFromPosition(position
);
390 int startPosition
= LineStart(line
);
391 int endLine
= LineEnd(line
);
392 int startText
= startPosition
;
393 while (startText
< endLine
&& (cb
.CharAt(startText
) == ' ' || cb
.CharAt(startText
) == '\t'))
395 if (position
== startText
)
396 return startPosition
;
401 int SCI_METHOD
Document::SetLevel(int line
, int level
) {
402 int prev
= static_cast<LineLevels
*>(perLineData
[ldLevels
])->SetLevel(line
, level
, LinesTotal());
404 DocModification
mh(SC_MOD_CHANGEFOLD
| SC_MOD_CHANGEMARKER
,
405 LineStart(line
), 0, 0, 0, line
);
406 mh
.foldLevelNow
= level
;
407 mh
.foldLevelPrev
= prev
;
413 int SCI_METHOD
Document::GetLevel(int line
) const {
414 return static_cast<LineLevels
*>(perLineData
[ldLevels
])->GetLevel(line
);
417 void Document::ClearLevels() {
418 static_cast<LineLevels
*>(perLineData
[ldLevels
])->ClearLevels();
421 static bool IsSubordinate(int levelStart
, int levelTry
) {
422 if (levelTry
& SC_FOLDLEVELWHITEFLAG
)
425 return (levelStart
& SC_FOLDLEVELNUMBERMASK
) < (levelTry
& SC_FOLDLEVELNUMBERMASK
);
428 int Document::GetLastChild(int lineParent
, int level
, int lastLine
) {
430 level
= GetLevel(lineParent
) & SC_FOLDLEVELNUMBERMASK
;
431 int maxLine
= LinesTotal();
432 int lookLastLine
= (lastLine
!= -1) ? Platform::Minimum(LinesTotal() - 1, lastLine
) : -1;
433 int lineMaxSubord
= lineParent
;
434 while (lineMaxSubord
< maxLine
- 1) {
435 EnsureStyledTo(LineStart(lineMaxSubord
+ 2));
436 if (!IsSubordinate(level
, GetLevel(lineMaxSubord
+ 1)))
438 if ((lookLastLine
!= -1) && (lineMaxSubord
>= lookLastLine
) && !(GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
))
442 if (lineMaxSubord
> lineParent
) {
443 if (level
> (GetLevel(lineMaxSubord
+ 1) & SC_FOLDLEVELNUMBERMASK
)) {
444 // Have chewed up some whitespace that belongs to a parent so seek back
445 if (GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
) {
450 return lineMaxSubord
;
453 int Document::GetFoldParent(int line
) const {
454 int level
= GetLevel(line
) & SC_FOLDLEVELNUMBERMASK
;
455 int lineLook
= line
- 1;
456 while ((lineLook
> 0) && (
457 (!(GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
)) ||
458 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) >= level
))
462 if ((GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
) &&
463 ((GetLevel(lineLook
) & SC_FOLDLEVELNUMBERMASK
) < level
)) {
470 void Document::GetHighlightDelimiters(HighlightDelimiter
&highlightDelimiter
, int line
, int lastLine
) {
471 int level
= GetLevel(line
);
472 int lookLastLine
= Platform::Maximum(line
, lastLine
) + 1;
475 int lookLineLevel
= level
;
476 int lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
477 while ((lookLine
> 0) && ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) ||
478 ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
>= (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))))) {
479 lookLineLevel
= GetLevel(--lookLine
);
480 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
483 int beginFoldBlock
= (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) ? lookLine
: GetFoldParent(lookLine
);
484 if (beginFoldBlock
== -1) {
485 highlightDelimiter
.Clear();
489 int endFoldBlock
= GetLastChild(beginFoldBlock
, -1, lookLastLine
);
490 int firstChangeableLineBefore
= -1;
491 if (endFoldBlock
< line
) {
492 lookLine
= beginFoldBlock
- 1;
493 lookLineLevel
= GetLevel(lookLine
);
494 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
495 while ((lookLine
>= 0) && (lookLineLevelNum
>= SC_FOLDLEVELBASE
)) {
496 if (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) {
497 if (GetLastChild(lookLine
, -1, lookLastLine
) == line
) {
498 beginFoldBlock
= lookLine
;
500 firstChangeableLineBefore
= line
- 1;
503 if ((lookLine
> 0) && (lookLineLevelNum
== SC_FOLDLEVELBASE
) && ((GetLevel(lookLine
- 1) & SC_FOLDLEVELNUMBERMASK
) > lookLineLevelNum
))
505 lookLineLevel
= GetLevel(--lookLine
);
506 lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
509 if (firstChangeableLineBefore
== -1) {
510 for (lookLine
= line
- 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
511 lookLine
>= beginFoldBlock
;
512 lookLineLevel
= GetLevel(--lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
513 if ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) || (lookLineLevelNum
> (level
& SC_FOLDLEVELNUMBERMASK
))) {
514 firstChangeableLineBefore
= lookLine
;
519 if (firstChangeableLineBefore
== -1)
520 firstChangeableLineBefore
= beginFoldBlock
- 1;
522 int firstChangeableLineAfter
= -1;
523 for (lookLine
= line
+ 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
;
524 lookLine
<= endFoldBlock
;
525 lookLineLevel
= GetLevel(++lookLine
), lookLineLevelNum
= lookLineLevel
& SC_FOLDLEVELNUMBERMASK
) {
526 if ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
< (GetLevel(lookLine
+ 1) & SC_FOLDLEVELNUMBERMASK
))) {
527 firstChangeableLineAfter
= lookLine
;
531 if (firstChangeableLineAfter
== -1)
532 firstChangeableLineAfter
= endFoldBlock
+ 1;
534 highlightDelimiter
.beginFoldBlock
= beginFoldBlock
;
535 highlightDelimiter
.endFoldBlock
= endFoldBlock
;
536 highlightDelimiter
.firstChangeableLineBefore
= firstChangeableLineBefore
;
537 highlightDelimiter
.firstChangeableLineAfter
= firstChangeableLineAfter
;
540 int Document::ClampPositionIntoDocument(int pos
) const {
541 return Platform::Clamp(pos
, 0, Length());
544 bool Document::IsCrLf(int pos
) const {
547 if (pos
>= (Length() - 1))
549 return (cb
.CharAt(pos
) == '\r') && (cb
.CharAt(pos
+ 1) == '\n');
552 int Document::LenChar(int pos
) {
555 } else if (IsCrLf(pos
)) {
557 } else if (SC_CP_UTF8
== dbcsCodePage
) {
558 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
559 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
560 int lengthDoc
= Length();
561 if ((pos
+ widthCharBytes
) > lengthDoc
)
562 return lengthDoc
- pos
;
564 return widthCharBytes
;
565 } else if (dbcsCodePage
) {
566 return IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
572 bool Document::InGoodUTF8(int pos
, int &start
, int &end
) const {
574 while ((trail
>0) && (pos
-trail
< UTF8MaxBytes
) && UTF8IsTrailByte(static_cast<unsigned char>(cb
.CharAt(trail
-1))))
576 start
= (trail
> 0) ? trail
-1 : trail
;
578 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(start
));
579 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
580 if (widthCharBytes
== 1) {
583 int trailBytes
= widthCharBytes
- 1;
584 int len
= pos
- start
;
585 if (len
> trailBytes
)
586 // pos too far from lead
588 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
589 for (int b
=1; b
<widthCharBytes
&& ((start
+b
) < Length()); b
++)
590 charBytes
[b
] = cb
.CharAt(static_cast<int>(start
+b
));
591 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
592 if (utf8status
& UTF8MaskInvalid
)
594 end
= start
+ widthCharBytes
;
599 // Normalise a position so that it is not halfway through a two byte character.
600 // This can occur in two situations -
601 // When lines are terminated with \r\n pairs which should be treated as one character.
602 // When displaying DBCS text such as Japanese.
603 // If moving, move the position in the indicated direction.
604 int Document::MovePositionOutsideChar(int pos
, int moveDir
, bool checkLineEnd
) {
605 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
606 // If out of range, just return minimum/maximum value.
612 // PLATFORM_ASSERT(pos > 0 && pos < Length());
613 if (checkLineEnd
&& IsCrLf(pos
- 1)) {
621 if (SC_CP_UTF8
== dbcsCodePage
) {
622 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
623 // If ch is not a trail byte then pos is valid intercharacter position
624 if (UTF8IsTrailByte(ch
)) {
627 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
628 // ch is a trail byte within a UTF-8 character
634 // Else invalid UTF-8 so return position of isolated trail byte
637 // Anchor DBCS calculations at start of line because start of line can
638 // not be a DBCS trail byte.
639 int posStartLine
= LineStart(LineFromPosition(pos
));
640 if (pos
== posStartLine
)
643 // Step back until a non-lead-byte is found.
645 while ((posCheck
> posStartLine
) && IsDBCSLeadByte(cb
.CharAt(posCheck
-1)))
648 // Check from known start of character.
649 while (posCheck
< pos
) {
650 int mbsize
= IsDBCSLeadByte(cb
.CharAt(posCheck
)) ? 2 : 1;
651 if (posCheck
+ mbsize
== pos
) {
653 } else if (posCheck
+ mbsize
> pos
) {
655 return posCheck
+ mbsize
;
668 // NextPosition moves between valid positions - it can not handle a position in the middle of a
669 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
670 // A \r\n pair is treated as two characters.
671 int Document::NextPosition(int pos
, int moveDir
) const {
672 // If out of range, just return minimum/maximum value.
673 int increment
= (moveDir
> 0) ? 1 : -1;
674 if (pos
+ increment
<= 0)
676 if (pos
+ increment
>= Length())
680 if (SC_CP_UTF8
== dbcsCodePage
) {
681 if (increment
== 1) {
682 // Simple forward movement case so can avoid some checks
683 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(pos
));
684 if (UTF8IsAscii(leadByte
)) {
685 // Single byte character or invalid
688 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
689 char charBytes
[UTF8MaxBytes
] = {static_cast<char>(leadByte
),0,0,0};
690 for (int b
=1; b
<widthCharBytes
; b
++)
691 charBytes
[b
] = cb
.CharAt(static_cast<int>(pos
+b
));
692 int utf8status
= UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes
), widthCharBytes
);
693 if (utf8status
& UTF8MaskInvalid
)
696 pos
+= utf8status
& UTF8MaskWidth
;
699 // Examine byte before position
701 unsigned char ch
= static_cast<unsigned char>(cb
.CharAt(pos
));
702 // If ch is not a trail byte then pos is valid intercharacter position
703 if (UTF8IsTrailByte(ch
)) {
704 // If ch is a trail byte in a valid UTF-8 character then return start of character
707 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
710 // Else invalid UTF-8 so return position of isolated trail byte
715 int mbsize
= IsDBCSLeadByte(cb
.CharAt(pos
)) ? 2 : 1;
720 // Anchor DBCS calculations at start of line because start of line can
721 // not be a DBCS trail byte.
722 int posStartLine
= LineStart(LineFromPosition(pos
));
723 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
724 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
725 if ((pos
- 1) <= posStartLine
) {
727 } else if (IsDBCSLeadByte(cb
.CharAt(pos
- 1))) {
728 // Must actually be trail byte
731 // Otherwise, step back until a non-lead-byte is found.
732 int posTemp
= pos
- 1;
733 while (posStartLine
<= --posTemp
&& IsDBCSLeadByte(cb
.CharAt(posTemp
)))
735 // Now posTemp+1 must point to the beginning of a character,
736 // so figure out whether we went back an even or an odd
737 // number of bytes and go back 1 or 2 bytes, respectively.
738 return (pos
- 1 - ((pos
- posTemp
) & 1));
749 bool Document::NextCharacter(int &pos
, int moveDir
) const {
750 // Returns true if pos changed
751 int posNext
= NextPosition(pos
, moveDir
);
752 if (posNext
== pos
) {
760 static inline int UnicodeFromBytes(const unsigned char *us
) {
763 } else if (us
[0] < 0xE0) {
764 return ((us
[0] & 0x1F) << 6) + (us
[1] & 0x3F);
765 } else if (us
[0] < 0xF0) {
766 return ((us
[0] & 0xF) << 12) + ((us
[1] & 0x3F) << 6) + (us
[2] & 0x3F);
767 } else if (us
[0] < 0xF5) {
768 return ((us
[0] & 0x7) << 18) + ((us
[1] & 0x3F) << 12) + ((us
[2] & 0x3F) << 6) + (us
[3] & 0x3F);
773 // Return -1 on out-of-bounds
774 int SCI_METHOD
Document::GetRelativePosition(int positionStart
, int characterOffset
) const {
775 int pos
= positionStart
;
777 const int increment
= (characterOffset
> 0) ? 1 : -1;
778 while (characterOffset
!= 0) {
779 const int posNext
= NextPosition(pos
, increment
);
781 return INVALID_POSITION
;
783 characterOffset
-= increment
;
786 pos
= positionStart
+ characterOffset
;
787 if ((pos
< 0) || (pos
> Length()))
788 return INVALID_POSITION
;
793 int SCI_METHOD
Document::GetCharacterAndWidth(int position
, int *pWidth
) const {
795 int bytesInCharacter
= 1;
797 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(position
));
798 if (SC_CP_UTF8
== dbcsCodePage
) {
799 if (UTF8IsAscii(leadByte
)) {
800 // Single byte character or invalid
801 character
= leadByte
;
803 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
804 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
805 for (int b
=1; b
<widthCharBytes
; b
++)
806 charBytes
[b
] = static_cast<unsigned char>(cb
.CharAt(position
+b
));
807 int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
808 if (utf8status
& UTF8MaskInvalid
) {
809 // Report as singleton surrogate values which are invalid Unicode
810 character
= 0xDC80 + leadByte
;
812 bytesInCharacter
= utf8status
& UTF8MaskWidth
;
813 character
= UnicodeFromBytes(charBytes
);
817 if (IsDBCSLeadByte(leadByte
)) {
818 bytesInCharacter
= 2;
819 character
= (leadByte
<< 8) | static_cast<unsigned char>(cb
.CharAt(position
+1));
821 character
= leadByte
;
825 character
= cb
.CharAt(position
);
828 *pWidth
= bytesInCharacter
;
833 int SCI_METHOD
Document::CodePage() const {
837 bool SCI_METHOD
Document::IsDBCSLeadByte(char ch
) const {
838 // Byte ranges found in Wikipedia articles with relevant search strings in each case
839 unsigned char uch
= static_cast<unsigned char>(ch
);
840 switch (dbcsCodePage
) {
843 return ((uch
>= 0x81) && (uch
<= 0x9F)) ||
844 ((uch
>= 0xE0) && (uch
<= 0xFC));
845 // Lead bytes F0 to FC may be a Microsoft addition.
848 return (uch
>= 0x81) && (uch
<= 0xFE);
850 // Korean Wansung KS C-5601-1987
851 return (uch
>= 0x81) && (uch
<= 0xFE);
854 return (uch
>= 0x81) && (uch
<= 0xFE);
856 // Korean Johab KS C-5601-1992
858 ((uch
>= 0x84) && (uch
<= 0xD3)) ||
859 ((uch
>= 0xD8) && (uch
<= 0xDE)) ||
860 ((uch
>= 0xE0) && (uch
<= 0xF9));
865 static inline bool IsSpaceOrTab(int ch
) {
866 return ch
== ' ' || ch
== '\t';
869 // Need to break text into segments near lengthSegment but taking into
870 // account the encoding to not break inside a UTF-8 or DBCS character
871 // and also trying to avoid breaking inside a pair of combining characters.
872 // The segment length must always be long enough (more than 4 bytes)
873 // so that there will be at least one whole character to make a segment.
874 // For UTF-8, text must consist only of valid whole characters.
875 // In preference order from best to worst:
876 // 1) Break after space
877 // 2) Break before punctuation
878 // 3) Break after whole character
880 int Document::SafeSegment(const char *text
, int length
, int lengthSegment
) const {
881 if (length
<= lengthSegment
)
883 int lastSpaceBreak
= -1;
884 int lastPunctuationBreak
= -1;
885 int lastEncodingAllowedBreak
= 0;
886 for (int j
=0; j
< lengthSegment
;) {
887 unsigned char ch
= static_cast<unsigned char>(text
[j
]);
889 if (IsSpaceOrTab(text
[j
- 1]) && !IsSpaceOrTab(text
[j
])) {
893 lastPunctuationBreak
= j
;
896 lastEncodingAllowedBreak
= j
;
898 if (dbcsCodePage
== SC_CP_UTF8
) {
899 j
+= UTF8BytesOfLead
[ch
];
900 } else if (dbcsCodePage
) {
901 j
+= IsDBCSLeadByte(ch
) ? 2 : 1;
906 if (lastSpaceBreak
>= 0) {
907 return lastSpaceBreak
;
908 } else if (lastPunctuationBreak
>= 0) {
909 return lastPunctuationBreak
;
911 return lastEncodingAllowedBreak
;
914 EncodingFamily
Document::CodePageFamily() const {
915 if (SC_CP_UTF8
== dbcsCodePage
)
917 else if (dbcsCodePage
)
923 void Document::ModifiedAt(int pos
) {
928 void Document::CheckReadOnly() {
929 if (cb
.IsReadOnly() && enteredReadOnlyCount
== 0) {
930 enteredReadOnlyCount
++;
931 NotifyModifyAttempt();
932 enteredReadOnlyCount
--;
936 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
937 // SetStyleAt does not change the persistent state of a document
939 bool Document::DeleteChars(int pos
, int len
) {
944 if ((pos
+ len
) > Length())
947 if (enteredModification
!= 0) {
950 enteredModification
++;
951 if (!cb
.IsReadOnly()) {
954 SC_MOD_BEFOREDELETE
| SC_PERFORMED_USER
,
957 int prevLinesTotal
= LinesTotal();
958 bool startSavePoint
= cb
.IsSavePoint();
959 bool startSequence
= false;
960 const char *text
= cb
.DeleteChars(pos
, len
, startSequence
);
961 if (startSavePoint
&& cb
.IsCollectingUndo())
962 NotifySavePoint(!startSavePoint
);
963 if ((pos
< Length()) || (pos
== 0))
969 SC_MOD_DELETETEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
971 LinesTotal() - prevLinesTotal
, text
));
973 enteredModification
--;
975 return !cb
.IsReadOnly();
979 * Insert a string with a length.
981 int Document::InsertString(int position
, const char *s
, int insertLength
) {
982 if (insertLength
<= 0) {
985 CheckReadOnly(); // Application may change read only state here
986 if (cb
.IsReadOnly()) {
989 if (enteredModification
!= 0) {
992 enteredModification
++;
993 insertionSet
= false;
998 position
, insertLength
,
1001 s
= insertion
.c_str();
1002 insertLength
= static_cast<int>(insertion
.length());
1006 SC_MOD_BEFOREINSERT
| SC_PERFORMED_USER
,
1007 position
, insertLength
,
1009 int prevLinesTotal
= LinesTotal();
1010 bool startSavePoint
= cb
.IsSavePoint();
1011 bool startSequence
= false;
1012 const char *text
= cb
.InsertString(position
, s
, insertLength
, startSequence
);
1013 if (startSavePoint
&& cb
.IsCollectingUndo())
1014 NotifySavePoint(!startSavePoint
);
1015 ModifiedAt(position
);
1018 SC_MOD_INSERTTEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
1019 position
, insertLength
,
1020 LinesTotal() - prevLinesTotal
, text
));
1021 if (insertionSet
) { // Free memory as could be large
1022 std::string().swap(insertion
);
1024 enteredModification
--;
1025 return insertLength
;
1028 void Document::ChangeInsertion(const char *s
, int length
) {
1029 insertionSet
= true;
1030 insertion
.assign(s
, length
);
1033 int SCI_METHOD
Document::AddData(char *data
, int length
) {
1035 int position
= Length();
1036 InsertString(position
, data
, length
);
1037 } catch (std::bad_alloc
&) {
1038 return SC_STATUS_BADALLOC
;
1040 return SC_STATUS_FAILURE
;
1045 void * SCI_METHOD
Document::ConvertToDocument() {
1049 int Document::Undo() {
1052 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1053 enteredModification
++;
1054 if (!cb
.IsReadOnly()) {
1055 bool startSavePoint
= cb
.IsSavePoint();
1056 bool multiLine
= false;
1057 int steps
= cb
.StartUndo();
1058 //Platform::DebugPrintf("Steps=%d\n", steps);
1059 int coalescedRemovePos
= -1;
1060 int coalescedRemoveLen
= 0;
1061 int prevRemoveActionPos
= -1;
1062 int prevRemoveActionLen
= 0;
1063 for (int step
= 0; step
< steps
; step
++) {
1064 const int prevLinesTotal
= LinesTotal();
1065 const Action
&action
= cb
.GetUndoStep();
1066 if (action
.at
== removeAction
) {
1067 NotifyModified(DocModification(
1068 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
1069 } else if (action
.at
== containerAction
) {
1070 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
1071 dm
.token
= action
.position
;
1073 if (!action
.mayCoalesce
) {
1074 coalescedRemovePos
= -1;
1075 coalescedRemoveLen
= 0;
1076 prevRemoveActionPos
= -1;
1077 prevRemoveActionLen
= 0;
1080 NotifyModified(DocModification(
1081 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
1083 cb
.PerformUndoStep();
1084 if (action
.at
!= containerAction
) {
1085 ModifiedAt(action
.position
);
1086 newPos
= action
.position
;
1089 int modFlags
= SC_PERFORMED_UNDO
;
1090 // With undo, an insertion action becomes a deletion notification
1091 if (action
.at
== removeAction
) {
1092 newPos
+= action
.lenData
;
1093 modFlags
|= SC_MOD_INSERTTEXT
;
1094 if ((coalescedRemoveLen
> 0) &&
1095 (action
.position
== prevRemoveActionPos
|| action
.position
== (prevRemoveActionPos
+ prevRemoveActionLen
))) {
1096 coalescedRemoveLen
+= action
.lenData
;
1097 newPos
= coalescedRemovePos
+ coalescedRemoveLen
;
1099 coalescedRemovePos
= action
.position
;
1100 coalescedRemoveLen
= action
.lenData
;
1102 prevRemoveActionPos
= action
.position
;
1103 prevRemoveActionLen
= action
.lenData
;
1104 } else if (action
.at
== insertAction
) {
1105 modFlags
|= SC_MOD_DELETETEXT
;
1106 coalescedRemovePos
= -1;
1107 coalescedRemoveLen
= 0;
1108 prevRemoveActionPos
= -1;
1109 prevRemoveActionLen
= 0;
1112 modFlags
|= SC_MULTISTEPUNDOREDO
;
1113 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1114 if (linesAdded
!= 0)
1116 if (step
== steps
- 1) {
1117 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1119 modFlags
|= SC_MULTILINEUNDOREDO
;
1121 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
1122 linesAdded
, action
.data
));
1125 bool endSavePoint
= cb
.IsSavePoint();
1126 if (startSavePoint
!= endSavePoint
)
1127 NotifySavePoint(endSavePoint
);
1129 enteredModification
--;
1134 int Document::Redo() {
1137 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1138 enteredModification
++;
1139 if (!cb
.IsReadOnly()) {
1140 bool startSavePoint
= cb
.IsSavePoint();
1141 bool multiLine
= false;
1142 int steps
= cb
.StartRedo();
1143 for (int step
= 0; step
< steps
; step
++) {
1144 const int prevLinesTotal
= LinesTotal();
1145 const Action
&action
= cb
.GetRedoStep();
1146 if (action
.at
== insertAction
) {
1147 NotifyModified(DocModification(
1148 SC_MOD_BEFOREINSERT
| SC_PERFORMED_REDO
, action
));
1149 } else if (action
.at
== containerAction
) {
1150 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_REDO
);
1151 dm
.token
= action
.position
;
1154 NotifyModified(DocModification(
1155 SC_MOD_BEFOREDELETE
| SC_PERFORMED_REDO
, action
));
1157 cb
.PerformRedoStep();
1158 if (action
.at
!= containerAction
) {
1159 ModifiedAt(action
.position
);
1160 newPos
= action
.position
;
1163 int modFlags
= SC_PERFORMED_REDO
;
1164 if (action
.at
== insertAction
) {
1165 newPos
+= action
.lenData
;
1166 modFlags
|= SC_MOD_INSERTTEXT
;
1167 } else if (action
.at
== removeAction
) {
1168 modFlags
|= SC_MOD_DELETETEXT
;
1171 modFlags
|= SC_MULTISTEPUNDOREDO
;
1172 const int linesAdded
= LinesTotal() - prevLinesTotal
;
1173 if (linesAdded
!= 0)
1175 if (step
== steps
- 1) {
1176 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1178 modFlags
|= SC_MULTILINEUNDOREDO
;
1181 DocModification(modFlags
, action
.position
, action
.lenData
,
1182 linesAdded
, action
.data
));
1185 bool endSavePoint
= cb
.IsSavePoint();
1186 if (startSavePoint
!= endSavePoint
)
1187 NotifySavePoint(endSavePoint
);
1189 enteredModification
--;
1194 void Document::DelChar(int pos
) {
1195 DeleteChars(pos
, LenChar(pos
));
1198 void Document::DelCharBack(int pos
) {
1201 } else if (IsCrLf(pos
- 2)) {
1202 DeleteChars(pos
- 2, 2);
1203 } else if (dbcsCodePage
) {
1204 int startChar
= NextPosition(pos
, -1);
1205 DeleteChars(startChar
, pos
- startChar
);
1207 DeleteChars(pos
- 1, 1);
1211 static int NextTab(int pos
, int tabSize
) {
1212 return ((pos
/ tabSize
) + 1) * tabSize
;
1215 static std::string
CreateIndentation(int indent
, int tabSize
, bool insertSpaces
) {
1216 std::string indentation
;
1217 if (!insertSpaces
) {
1218 while (indent
>= tabSize
) {
1219 indentation
+= '\t';
1223 while (indent
> 0) {
1230 int SCI_METHOD
Document::GetLineIndentation(int line
) {
1232 if ((line
>= 0) && (line
< LinesTotal())) {
1233 int lineStart
= LineStart(line
);
1234 int length
= Length();
1235 for (int i
= lineStart
; i
< length
; i
++) {
1236 char ch
= cb
.CharAt(i
);
1239 else if (ch
== '\t')
1240 indent
= NextTab(indent
, tabInChars
);
1248 int Document::SetLineIndentation(int line
, int indent
) {
1249 int indentOfLine
= GetLineIndentation(line
);
1252 if (indent
!= indentOfLine
) {
1253 std::string linebuf
= CreateIndentation(indent
, tabInChars
, !useTabs
);
1254 int thisLineStart
= LineStart(line
);
1255 int indentPos
= GetLineIndentPosition(line
);
1257 DeleteChars(thisLineStart
, indentPos
- thisLineStart
);
1258 return thisLineStart
+ InsertString(thisLineStart
, linebuf
.c_str(),
1259 static_cast<int>(linebuf
.length()));
1261 return GetLineIndentPosition(line
);
1265 int Document::GetLineIndentPosition(int line
) const {
1268 int pos
= LineStart(line
);
1269 int length
= Length();
1270 while ((pos
< length
) && IsSpaceOrTab(cb
.CharAt(pos
))) {
1276 int Document::GetColumn(int pos
) {
1278 int line
= LineFromPosition(pos
);
1279 if ((line
>= 0) && (line
< LinesTotal())) {
1280 for (int i
= LineStart(line
); i
< pos
;) {
1281 char ch
= cb
.CharAt(i
);
1283 column
= NextTab(column
, tabInChars
);
1285 } else if (ch
== '\r') {
1287 } else if (ch
== '\n') {
1289 } else if (i
>= Length()) {
1293 i
= NextPosition(i
, 1);
1300 int Document::CountCharacters(int startPos
, int endPos
) {
1301 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1302 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1305 while (i
< endPos
) {
1309 i
= NextPosition(i
, 1);
1314 int Document::FindColumn(int line
, int column
) {
1315 int position
= LineStart(line
);
1316 if ((line
>= 0) && (line
< LinesTotal())) {
1317 int columnCurrent
= 0;
1318 while ((columnCurrent
< column
) && (position
< Length())) {
1319 char ch
= cb
.CharAt(position
);
1321 columnCurrent
= NextTab(columnCurrent
, tabInChars
);
1322 if (columnCurrent
> column
)
1325 } else if (ch
== '\r') {
1327 } else if (ch
== '\n') {
1331 position
= NextPosition(position
, 1);
1338 void Document::Indent(bool forwards
, int lineBottom
, int lineTop
) {
1339 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1340 for (int line
= lineBottom
; line
>= lineTop
; line
--) {
1341 int indentOfLine
= GetLineIndentation(line
);
1343 if (LineStart(line
) < LineEnd(line
)) {
1344 SetLineIndentation(line
, indentOfLine
+ IndentSize());
1347 SetLineIndentation(line
, indentOfLine
- IndentSize());
1352 // Convert line endings for a piece of text to a particular mode.
1353 // Stop at len or when a NUL is found.
1354 std::string
Document::TransformLineEnds(const char *s
, size_t len
, int eolModeWanted
) {
1356 for (size_t i
= 0; (i
< len
) && (s
[i
]); i
++) {
1357 if (s
[i
] == '\n' || s
[i
] == '\r') {
1358 if (eolModeWanted
== SC_EOL_CR
) {
1359 dest
.push_back('\r');
1360 } else if (eolModeWanted
== SC_EOL_LF
) {
1361 dest
.push_back('\n');
1362 } else { // eolModeWanted == SC_EOL_CRLF
1363 dest
.push_back('\r');
1364 dest
.push_back('\n');
1366 if ((s
[i
] == '\r') && (i
+1 < len
) && (s
[i
+1] == '\n')) {
1370 dest
.push_back(s
[i
]);
1376 void Document::ConvertLineEnds(int eolModeSet
) {
1379 for (int pos
= 0; pos
< Length(); pos
++) {
1380 if (cb
.CharAt(pos
) == '\r') {
1381 if (cb
.CharAt(pos
+ 1) == '\n') {
1383 if (eolModeSet
== SC_EOL_CR
) {
1384 DeleteChars(pos
+ 1, 1); // Delete the LF
1385 } else if (eolModeSet
== SC_EOL_LF
) {
1386 DeleteChars(pos
, 1); // Delete the CR
1392 if (eolModeSet
== SC_EOL_CRLF
) {
1393 pos
+= InsertString(pos
+ 1, "\n", 1); // Insert LF
1394 } else if (eolModeSet
== SC_EOL_LF
) {
1395 pos
+= InsertString(pos
, "\n", 1); // Insert LF
1396 DeleteChars(pos
, 1); // Delete CR
1400 } else if (cb
.CharAt(pos
) == '\n') {
1402 if (eolModeSet
== SC_EOL_CRLF
) {
1403 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1404 } else if (eolModeSet
== SC_EOL_CR
) {
1405 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1406 DeleteChars(pos
, 1); // Delete LF
1414 bool Document::IsWhiteLine(int line
) const {
1415 int currentChar
= LineStart(line
);
1416 int endLine
= LineEnd(line
);
1417 while (currentChar
< endLine
) {
1418 if (cb
.CharAt(currentChar
) != ' ' && cb
.CharAt(currentChar
) != '\t') {
1426 int Document::ParaUp(int pos
) const {
1427 int line
= LineFromPosition(pos
);
1429 while (line
>= 0 && IsWhiteLine(line
)) { // skip empty lines
1432 while (line
>= 0 && !IsWhiteLine(line
)) { // skip non-empty lines
1436 return LineStart(line
);
1439 int Document::ParaDown(int pos
) const {
1440 int line
= LineFromPosition(pos
);
1441 while (line
< LinesTotal() && !IsWhiteLine(line
)) { // skip non-empty lines
1444 while (line
< LinesTotal() && IsWhiteLine(line
)) { // skip empty lines
1447 if (line
< LinesTotal())
1448 return LineStart(line
);
1449 else // end of a document
1450 return LineEnd(line
-1);
1453 CharClassify::cc
Document::WordCharClass(unsigned char ch
) const {
1454 if ((SC_CP_UTF8
== dbcsCodePage
) && (!UTF8IsAscii(ch
)))
1455 return CharClassify::ccWord
;
1456 return charClass
.GetClass(ch
);
1460 * Used by commmands that want to select whole words.
1461 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1463 int Document::ExtendWordSelect(int pos
, int delta
, bool onlyWordCharacters
) {
1464 CharClassify::cc ccStart
= CharClassify::ccWord
;
1466 if (!onlyWordCharacters
)
1467 ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1468 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
))
1471 if (!onlyWordCharacters
&& pos
< Length())
1472 ccStart
= WordCharClass(cb
.CharAt(pos
));
1473 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1476 return MovePositionOutsideChar(pos
, delta
, true);
1480 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1482 * This is looking for a transition between character classes although there is also some
1483 * additional movement to transit white space.
1484 * Used by cursor movement by word commands.
1486 int Document::NextWordStart(int pos
, int delta
) {
1488 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
))
1491 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1492 while (pos
> 0 && (WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
)) {
1497 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1498 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == ccStart
))
1500 while (pos
< (Length()) && (WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
))
1507 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1509 * This is looking for a transition between character classes although there is also some
1510 * additional movement to transit white space.
1511 * Used by cursor movement by word commands.
1513 int Document::NextWordEnd(int pos
, int delta
) {
1516 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
-1));
1517 if (ccStart
!= CharClassify::ccSpace
) {
1518 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == ccStart
) {
1522 while (pos
> 0 && WordCharClass(cb
.CharAt(pos
- 1)) == CharClassify::ccSpace
) {
1527 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == CharClassify::ccSpace
) {
1530 if (pos
< Length()) {
1531 CharClassify::cc ccStart
= WordCharClass(cb
.CharAt(pos
));
1532 while (pos
< Length() && WordCharClass(cb
.CharAt(pos
)) == ccStart
) {
1541 * Check that the character at the given position is a word or punctuation character and that
1542 * the previous character is of a different character class.
1544 bool Document::IsWordStartAt(int pos
) const {
1546 CharClassify::cc ccPos
= WordCharClass(CharAt(pos
));
1547 return (ccPos
== CharClassify::ccWord
|| ccPos
== CharClassify::ccPunctuation
) &&
1548 (ccPos
!= WordCharClass(CharAt(pos
- 1)));
1554 * Check that the character at the given position is a word or punctuation character and that
1555 * the next character is of a different character class.
1557 bool Document::IsWordEndAt(int pos
) const {
1558 if (pos
< Length()) {
1559 CharClassify::cc ccPrev
= WordCharClass(CharAt(pos
-1));
1560 return (ccPrev
== CharClassify::ccWord
|| ccPrev
== CharClassify::ccPunctuation
) &&
1561 (ccPrev
!= WordCharClass(CharAt(pos
)));
1567 * Check that the given range is has transitions between character classes at both
1568 * ends and where the characters on the inside are word or punctuation characters.
1570 bool Document::IsWordAt(int start
, int end
) const {
1571 return IsWordStartAt(start
) && IsWordEndAt(end
);
1574 bool Document::MatchesWordOptions(bool word
, bool wordStart
, int pos
, int length
) const {
1575 return (!word
&& !wordStart
) ||
1576 (word
&& IsWordAt(pos
, pos
+ length
)) ||
1577 (wordStart
&& IsWordStartAt(pos
));
1580 bool Document::HasCaseFolder(void) const {
1584 void Document::SetCaseFolder(CaseFolder
*pcf_
) {
1590 * Find text in document, supporting both forward and backward
1591 * searches (just pass minPos > maxPos to do a backward search)
1592 * Has not been tested with backwards DBCS searches yet.
1594 long Document::FindText(int minPos
, int maxPos
, const char *search
,
1595 bool caseSensitive
, bool word
, bool wordStart
, bool regExp
, int flags
,
1601 regex
= CreateRegexSearch(&charClass
);
1602 return regex
->FindText(this, minPos
, maxPos
, search
, caseSensitive
, word
, wordStart
, flags
, length
);
1605 const bool forward
= minPos
<= maxPos
;
1606 const int increment
= forward
? 1 : -1;
1608 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1609 const int startPos
= MovePositionOutsideChar(minPos
, increment
, false);
1610 const int endPos
= MovePositionOutsideChar(maxPos
, increment
, false);
1612 // Compute actual search ranges needed
1613 const int lengthFind
= *length
;
1615 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1616 const int limitPos
= Platform::Maximum(startPos
, endPos
);
1619 // Back all of a character
1620 pos
= NextPosition(pos
, increment
);
1622 if (caseSensitive
) {
1623 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1624 const char charStartSearch
= search
[0];
1625 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1626 if (CharAt(pos
) == charStartSearch
) {
1627 bool found
= (pos
+ lengthFind
) <= limitPos
;
1628 for (int indexSearch
= 1; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1629 found
= CharAt(pos
+ indexSearch
) == search
[indexSearch
];
1631 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1635 if (!NextCharacter(pos
, increment
))
1638 } else if (SC_CP_UTF8
== dbcsCodePage
) {
1639 const size_t maxFoldingExpansion
= 4;
1640 std::vector
<char> searchThing(lengthFind
* UTF8MaxBytes
* maxFoldingExpansion
+ 1);
1641 const int lenSearch
= static_cast<int>(
1642 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1643 char bytes
[UTF8MaxBytes
+ 1];
1644 char folded
[UTF8MaxBytes
* maxFoldingExpansion
+ 1];
1645 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1646 int widthFirstCharacter
= 0;
1647 int posIndexDocument
= pos
;
1648 int indexSearch
= 0;
1649 bool characterMatches
= true;
1651 const unsigned char leadByte
= static_cast<unsigned char>(cb
.CharAt(posIndexDocument
));
1652 bytes
[0] = leadByte
;
1654 if (!UTF8IsAscii(leadByte
)) {
1655 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1656 for (int b
=1; b
<widthCharBytes
; b
++) {
1657 bytes
[b
] = cb
.CharAt(posIndexDocument
+b
);
1659 widthChar
= UTF8Classify(reinterpret_cast<const unsigned char *>(bytes
), widthCharBytes
) & UTF8MaskWidth
;
1661 if (!widthFirstCharacter
)
1662 widthFirstCharacter
= widthChar
;
1663 if ((posIndexDocument
+ widthChar
) > limitPos
)
1665 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1666 folded
[lenFlat
] = 0;
1667 // Does folded match the buffer
1668 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1669 if (!characterMatches
)
1671 posIndexDocument
+= widthChar
;
1672 indexSearch
+= lenFlat
;
1673 if (indexSearch
>= lenSearch
)
1676 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1677 if (MatchesWordOptions(word
, wordStart
, pos
, posIndexDocument
- pos
)) {
1678 *length
= posIndexDocument
- pos
;
1683 pos
+= widthFirstCharacter
;
1685 if (!NextCharacter(pos
, increment
))
1689 } else if (dbcsCodePage
) {
1690 const size_t maxBytesCharacter
= 2;
1691 const size_t maxFoldingExpansion
= 4;
1692 std::vector
<char> searchThing(lengthFind
* maxBytesCharacter
* maxFoldingExpansion
+ 1);
1693 const int lenSearch
= static_cast<int>(
1694 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
));
1695 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
1696 int indexDocument
= 0;
1697 int indexSearch
= 0;
1698 bool characterMatches
= true;
1699 while (characterMatches
&&
1700 ((pos
+ indexDocument
) < limitPos
) &&
1701 (indexSearch
< lenSearch
)) {
1702 char bytes
[maxBytesCharacter
+ 1];
1703 bytes
[0] = cb
.CharAt(pos
+ indexDocument
);
1704 const int widthChar
= IsDBCSLeadByte(bytes
[0]) ? 2 : 1;
1706 bytes
[1] = cb
.CharAt(pos
+ indexDocument
+ 1);
1707 if ((pos
+ indexDocument
+ widthChar
) > limitPos
)
1709 char folded
[maxBytesCharacter
* maxFoldingExpansion
+ 1];
1710 const int lenFlat
= static_cast<int>(pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
));
1711 folded
[lenFlat
] = 0;
1712 // Does folded match the buffer
1713 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
1714 indexDocument
+= widthChar
;
1715 indexSearch
+= lenFlat
;
1717 if (characterMatches
&& (indexSearch
== static_cast<int>(lenSearch
))) {
1718 if (MatchesWordOptions(word
, wordStart
, pos
, indexDocument
)) {
1719 *length
= indexDocument
;
1723 if (!NextCharacter(pos
, increment
))
1727 const int endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
1728 std::vector
<char> searchThing(lengthFind
+ 1);
1729 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
1730 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
1731 bool found
= (pos
+ lengthFind
) <= limitPos
;
1732 for (int indexSearch
= 0; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
1733 char ch
= CharAt(pos
+ indexSearch
);
1735 pcf
->Fold(folded
, sizeof(folded
), &ch
, 1);
1736 found
= folded
[0] == searchThing
[indexSearch
];
1738 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
1741 if (!NextCharacter(pos
, increment
))
1746 //Platform::DebugPrintf("Not found\n");
1750 const char *Document::SubstituteByPosition(const char *text
, int *length
) {
1752 return regex
->SubstituteByPosition(this, text
, length
);
1757 int Document::LinesTotal() const {
1761 void Document::SetDefaultCharClasses(bool includeWordClass
) {
1762 charClass
.SetDefaultCharClasses(includeWordClass
);
1765 void Document::SetCharClasses(const unsigned char *chars
, CharClassify::cc newCharClass
) {
1766 charClass
.SetCharClasses(chars
, newCharClass
);
1769 int Document::GetCharsOfClass(CharClassify::cc characterClass
, unsigned char *buffer
) {
1770 return charClass
.GetCharsOfClass(characterClass
, buffer
);
1773 void SCI_METHOD
Document::StartStyling(int position
, char) {
1774 endStyled
= position
;
1777 bool SCI_METHOD
Document::SetStyleFor(int length
, char style
) {
1778 if (enteredStyling
!= 0) {
1782 int prevEndStyled
= endStyled
;
1783 if (cb
.SetStyleFor(endStyled
, length
, style
)) {
1784 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1785 prevEndStyled
, length
);
1788 endStyled
+= length
;
1794 bool SCI_METHOD
Document::SetStyles(int length
, const char *styles
) {
1795 if (enteredStyling
!= 0) {
1799 bool didChange
= false;
1802 for (int iPos
= 0; iPos
< length
; iPos
++, endStyled
++) {
1803 PLATFORM_ASSERT(endStyled
< Length());
1804 if (cb
.SetStyleAt(endStyled
, styles
[iPos
])) {
1806 startMod
= endStyled
;
1813 DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
1814 startMod
, endMod
- startMod
+ 1);
1822 void Document::EnsureStyledTo(int pos
) {
1823 if ((enteredStyling
== 0) && (pos
> GetEndStyled())) {
1824 IncrementStyleClock();
1825 if (pli
&& !pli
->UseContainerLexing()) {
1826 int lineEndStyled
= LineFromPosition(GetEndStyled());
1827 int endStyledTo
= LineStart(lineEndStyled
);
1828 pli
->Colourise(endStyledTo
, pos
);
1830 // Ask the watchers to style, and stop as soon as one responds.
1831 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin();
1832 (pos
> GetEndStyled()) && (it
!= watchers
.end()); ++it
) {
1833 it
->watcher
->NotifyStyleNeeded(this, it
->userData
, pos
);
1839 void Document::LexerChanged() {
1840 // Tell the watchers the lexer has changed.
1841 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1842 it
->watcher
->NotifyLexerChanged(this, it
->userData
);
1846 int SCI_METHOD
Document::SetLineState(int line
, int state
) {
1847 int statePrevious
= static_cast<LineState
*>(perLineData
[ldState
])->SetLineState(line
, state
);
1848 if (state
!= statePrevious
) {
1849 DocModification
mh(SC_MOD_CHANGELINESTATE
, LineStart(line
), 0, 0, 0, line
);
1852 return statePrevious
;
1855 int SCI_METHOD
Document::GetLineState(int line
) const {
1856 return static_cast<LineState
*>(perLineData
[ldState
])->GetLineState(line
);
1859 int Document::GetMaxLineState() {
1860 return static_cast<LineState
*>(perLineData
[ldState
])->GetMaxLineState();
1863 void SCI_METHOD
Document::ChangeLexerState(int start
, int end
) {
1864 DocModification
mh(SC_MOD_LEXERSTATE
, start
, end
-start
, 0, 0, 0);
1868 StyledText
Document::MarginStyledText(int line
) const {
1869 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldMargin
]);
1870 return StyledText(pla
->Length(line
), pla
->Text(line
),
1871 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1874 void Document::MarginSetText(int line
, const char *text
) {
1875 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetText(line
, text
);
1876 DocModification
mh(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
);
1880 void Document::MarginSetStyle(int line
, int style
) {
1881 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyle(line
, style
);
1882 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1885 void Document::MarginSetStyles(int line
, const unsigned char *styles
) {
1886 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->SetStyles(line
, styles
);
1887 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
), 0, 0, 0, line
));
1890 void Document::MarginClearAll() {
1891 int maxEditorLine
= LinesTotal();
1892 for (int l
=0; l
<maxEditorLine
; l
++)
1893 MarginSetText(l
, 0);
1894 // Free remaining data
1895 static_cast<LineAnnotation
*>(perLineData
[ldMargin
])->ClearAll();
1898 StyledText
Document::AnnotationStyledText(int line
) const {
1899 LineAnnotation
*pla
= static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
]);
1900 return StyledText(pla
->Length(line
), pla
->Text(line
),
1901 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
1904 void Document::AnnotationSetText(int line
, const char *text
) {
1905 if (line
>= 0 && line
< LinesTotal()) {
1906 const int linesBefore
= AnnotationLines(line
);
1907 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetText(line
, text
);
1908 const int linesAfter
= AnnotationLines(line
);
1909 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
1910 mh
.annotationLinesAdded
= linesAfter
- linesBefore
;
1915 void Document::AnnotationSetStyle(int line
, int style
) {
1916 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyle(line
, style
);
1917 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
), 0, 0, 0, line
);
1921 void Document::AnnotationSetStyles(int line
, const unsigned char *styles
) {
1922 if (line
>= 0 && line
< LinesTotal()) {
1923 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->SetStyles(line
, styles
);
1927 int Document::AnnotationLines(int line
) const {
1928 return static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->Lines(line
);
1931 void Document::AnnotationClearAll() {
1932 int maxEditorLine
= LinesTotal();
1933 for (int l
=0; l
<maxEditorLine
; l
++)
1934 AnnotationSetText(l
, 0);
1935 // Free remaining data
1936 static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
])->ClearAll();
1939 void Document::IncrementStyleClock() {
1940 styleClock
= (styleClock
+ 1) % 0x100000;
1943 void SCI_METHOD
Document::DecorationFillRange(int position
, int value
, int fillLength
) {
1944 if (decorations
.FillRange(position
, value
, fillLength
)) {
1945 DocModification
mh(SC_MOD_CHANGEINDICATOR
| SC_PERFORMED_USER
,
1946 position
, fillLength
);
1951 bool Document::AddWatcher(DocWatcher
*watcher
, void *userData
) {
1952 WatcherWithUserData
wwud(watcher
, userData
);
1953 std::vector
<WatcherWithUserData
>::iterator it
=
1954 std::find(watchers
.begin(), watchers
.end(), wwud
);
1955 if (it
!= watchers
.end())
1957 watchers
.push_back(wwud
);
1961 bool Document::RemoveWatcher(DocWatcher
*watcher
, void *userData
) {
1962 std::vector
<WatcherWithUserData
>::iterator it
=
1963 std::find(watchers
.begin(), watchers
.end(), WatcherWithUserData(watcher
, userData
));
1964 if (it
!= watchers
.end()) {
1971 void Document::NotifyModifyAttempt() {
1972 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1973 it
->watcher
->NotifyModifyAttempt(this, it
->userData
);
1977 void Document::NotifySavePoint(bool atSavePoint
) {
1978 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1979 it
->watcher
->NotifySavePoint(this, it
->userData
, atSavePoint
);
1983 void Document::NotifyModified(DocModification mh
) {
1984 if (mh
.modificationType
& SC_MOD_INSERTTEXT
) {
1985 decorations
.InsertSpace(mh
.position
, mh
.length
);
1986 } else if (mh
.modificationType
& SC_MOD_DELETETEXT
) {
1987 decorations
.DeleteRange(mh
.position
, mh
.length
);
1989 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin(); it
!= watchers
.end(); ++it
) {
1990 it
->watcher
->NotifyModified(this, mh
, it
->userData
);
1994 bool Document::IsWordPartSeparator(char ch
) const {
1995 return (WordCharClass(ch
) == CharClassify::ccWord
) && IsPunctuation(ch
);
1998 int Document::WordPartLeft(int pos
) {
2001 char startChar
= cb
.CharAt(pos
);
2002 if (IsWordPartSeparator(startChar
)) {
2003 while (pos
> 0 && IsWordPartSeparator(cb
.CharAt(pos
))) {
2008 startChar
= cb
.CharAt(pos
);
2010 if (IsLowerCase(startChar
)) {
2011 while (pos
> 0 && IsLowerCase(cb
.CharAt(pos
)))
2013 if (!IsUpperCase(cb
.CharAt(pos
)) && !IsLowerCase(cb
.CharAt(pos
)))
2015 } else if (IsUpperCase(startChar
)) {
2016 while (pos
> 0 && IsUpperCase(cb
.CharAt(pos
)))
2018 if (!IsUpperCase(cb
.CharAt(pos
)))
2020 } else if (IsADigit(startChar
)) {
2021 while (pos
> 0 && IsADigit(cb
.CharAt(pos
)))
2023 if (!IsADigit(cb
.CharAt(pos
)))
2025 } else if (IsPunctuation(startChar
)) {
2026 while (pos
> 0 && IsPunctuation(cb
.CharAt(pos
)))
2028 if (!IsPunctuation(cb
.CharAt(pos
)))
2030 } else if (isspacechar(startChar
)) {
2031 while (pos
> 0 && isspacechar(cb
.CharAt(pos
)))
2033 if (!isspacechar(cb
.CharAt(pos
)))
2035 } else if (!IsASCII(startChar
)) {
2036 while (pos
> 0 && !IsASCII(cb
.CharAt(pos
)))
2038 if (IsASCII(cb
.CharAt(pos
)))
2048 int Document::WordPartRight(int pos
) {
2049 char startChar
= cb
.CharAt(pos
);
2050 int length
= Length();
2051 if (IsWordPartSeparator(startChar
)) {
2052 while (pos
< length
&& IsWordPartSeparator(cb
.CharAt(pos
)))
2054 startChar
= cb
.CharAt(pos
);
2056 if (!IsASCII(startChar
)) {
2057 while (pos
< length
&& !IsASCII(cb
.CharAt(pos
)))
2059 } else if (IsLowerCase(startChar
)) {
2060 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2062 } else if (IsUpperCase(startChar
)) {
2063 if (IsLowerCase(cb
.CharAt(pos
+ 1))) {
2065 while (pos
< length
&& IsLowerCase(cb
.CharAt(pos
)))
2068 while (pos
< length
&& IsUpperCase(cb
.CharAt(pos
)))
2071 if (IsLowerCase(cb
.CharAt(pos
)) && IsUpperCase(cb
.CharAt(pos
- 1)))
2073 } else if (IsADigit(startChar
)) {
2074 while (pos
< length
&& IsADigit(cb
.CharAt(pos
)))
2076 } else if (IsPunctuation(startChar
)) {
2077 while (pos
< length
&& IsPunctuation(cb
.CharAt(pos
)))
2079 } else if (isspacechar(startChar
)) {
2080 while (pos
< length
&& isspacechar(cb
.CharAt(pos
)))
2088 bool IsLineEndChar(char c
) {
2089 return (c
== '\n' || c
== '\r');
2092 int Document::ExtendStyleRange(int pos
, int delta
, bool singleLine
) {
2093 int sStart
= cb
.StyleAt(pos
);
2095 while (pos
> 0 && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2099 while (pos
< (Length()) && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2105 static char BraceOpposite(char ch
) {
2128 // TODO: should be able to extend styled region to find matching brace
2129 int Document::BraceMatch(int position
, int /*maxReStyle*/) {
2130 char chBrace
= CharAt(position
);
2131 char chSeek
= BraceOpposite(chBrace
);
2134 char styBrace
= static_cast<char>(StyleAt(position
));
2136 if (chBrace
== '(' || chBrace
== '[' || chBrace
== '{' || chBrace
== '<')
2139 position
= NextPosition(position
, direction
);
2140 while ((position
>= 0) && (position
< Length())) {
2141 char chAtPos
= CharAt(position
);
2142 char styAtPos
= static_cast<char>(StyleAt(position
));
2143 if ((position
> GetEndStyled()) || (styAtPos
== styBrace
)) {
2144 if (chAtPos
== chBrace
)
2146 if (chAtPos
== chSeek
)
2151 int positionBeforeMove
= position
;
2152 position
= NextPosition(position
, direction
);
2153 if (position
== positionBeforeMove
)
2160 * Implementation of RegexSearchBase for the default built-in regular expression engine
2162 class BuiltinRegex
: public RegexSearchBase
{
2164 explicit BuiltinRegex(CharClassify
*charClassTable
) : search(charClassTable
) {}
2166 virtual ~BuiltinRegex() {
2169 virtual long FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2170 bool caseSensitive
, bool word
, bool wordStart
, int flags
,
2173 virtual const char *SubstituteByPosition(Document
*doc
, const char *text
, int *length
);
2177 std::string substituted
;
2180 // Define a way for the Regular Expression code to access the document
2181 class DocumentIndexer
: public CharacterIndexer
{
2185 DocumentIndexer(Document
*pdoc_
, int end_
) :
2186 pdoc(pdoc_
), end(end_
) {
2189 virtual ~DocumentIndexer() {
2192 virtual char CharAt(int index
) {
2193 if (index
< 0 || index
>= end
)
2196 return pdoc
->CharAt(index
);
2200 long BuiltinRegex::FindText(Document
*doc
, int minPos
, int maxPos
, const char *s
,
2201 bool caseSensitive
, bool, bool, int flags
,
2203 const bool posix
= (flags
& SCFIND_POSIX
) != 0;
2204 const int increment
= (minPos
<= maxPos
) ? 1 : -1;
2206 int startPos
= minPos
;
2207 int endPos
= maxPos
;
2209 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2210 startPos
= doc
->MovePositionOutsideChar(startPos
, 1, false);
2211 endPos
= doc
->MovePositionOutsideChar(endPos
, 1, false);
2213 const char *errmsg
= search
.Compile(s
, *length
, caseSensitive
, posix
);
2217 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2218 // Replace first '.' with '-' in each property file variable reference:
2219 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2220 // Replace: $(\1-\2)
2221 int lineRangeStart
= doc
->LineFromPosition(startPos
);
2222 const int lineRangeEnd
= doc
->LineFromPosition(endPos
);
2223 if ((increment
== 1) &&
2224 (startPos
>= doc
->LineEnd(lineRangeStart
)) &&
2225 (lineRangeStart
< lineRangeEnd
)) {
2226 // the start position is at end of line or between line end characters.
2228 startPos
= doc
->LineStart(lineRangeStart
);
2229 } else if ((increment
== -1) &&
2230 (startPos
<= doc
->LineStart(lineRangeStart
)) &&
2231 (lineRangeStart
> lineRangeEnd
)) {
2232 // the start position is at beginning of line.
2234 startPos
= doc
->LineEnd(lineRangeStart
);
2238 const char searchEnd
= s
[*length
- 1];
2239 const char searchEndPrev
= (*length
> 1) ? s
[*length
- 2] : '\0';
2240 const int lineRangeBreak
= lineRangeEnd
+ increment
;
2241 for (int line
= lineRangeStart
; line
!= lineRangeBreak
; line
+= increment
) {
2242 int startOfLine
= doc
->LineStart(line
);
2243 int endOfLine
= doc
->LineEnd(line
);
2244 if (increment
== 1) {
2245 if (line
== lineRangeStart
) {
2246 if ((startPos
!= startOfLine
) && (s
[0] == '^'))
2247 continue; // Can't match start of line if start position after start of line
2248 startOfLine
= startPos
;
2250 if (line
== lineRangeEnd
) {
2251 if ((endPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2252 continue; // Can't match end of line if end position before end of line
2256 if (line
== lineRangeEnd
) {
2257 if ((endPos
!= startOfLine
) && (s
[0] == '^'))
2258 continue; // Can't match start of line if end position after start of line
2259 startOfLine
= endPos
;
2261 if (line
== lineRangeStart
) {
2262 if ((startPos
!= endOfLine
) && (searchEnd
== '$') && (searchEndPrev
!= '\\'))
2263 continue; // Can't match end of line if start position before end of line
2264 endOfLine
= startPos
;
2268 DocumentIndexer
di(doc
, endOfLine
);
2269 int success
= search
.Execute(di
, startOfLine
, endOfLine
);
2271 pos
= search
.bopat
[0];
2272 // Ensure only whole characters selected
2273 search
.eopat
[0] = doc
->MovePositionOutsideChar(search
.eopat
[0], 1, false);
2274 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2275 // There can be only one start of a line, so no need to look for last match in line
2276 if ((increment
== -1) && (s
[0] != '^')) {
2277 // Check for the last match on this line.
2278 int repetitions
= 1000; // Break out of infinite loop
2279 while (success
&& (search
.eopat
[0] <= endOfLine
) && (repetitions
--)) {
2280 success
= search
.Execute(di
, pos
+1, endOfLine
);
2282 if (search
.eopat
[0] <= minPos
) {
2283 pos
= search
.bopat
[0];
2284 lenRet
= search
.eopat
[0] - search
.bopat
[0];
2298 const char *BuiltinRegex::SubstituteByPosition(Document
*doc
, const char *text
, int *length
) {
2299 substituted
.clear();
2300 DocumentIndexer
di(doc
, doc
->Length());
2301 search
.GrabMatches(di
);
2302 for (int j
= 0; j
< *length
; j
++) {
2303 if (text
[j
] == '\\') {
2304 if (text
[j
+ 1] >= '0' && text
[j
+ 1] <= '9') {
2305 unsigned int patNum
= text
[j
+ 1] - '0';
2306 unsigned int len
= search
.eopat
[patNum
] - search
.bopat
[patNum
];
2307 if (!search
.pat
[patNum
].empty()) // Will be null if try for a match that did not occur
2308 substituted
.append(search
.pat
[patNum
].c_str(), len
);
2314 substituted
.push_back('\a');
2317 substituted
.push_back('\b');
2320 substituted
.push_back('\f');
2323 substituted
.push_back('\n');
2326 substituted
.push_back('\r');
2329 substituted
.push_back('\t');
2332 substituted
.push_back('\v');
2335 substituted
.push_back('\\');
2338 substituted
.push_back('\\');
2343 substituted
.push_back(text
[j
]);
2346 *length
= static_cast<int>(substituted
.length());
2347 return substituted
.c_str();
2350 #ifndef SCI_OWNREGEX
2352 #ifdef SCI_NAMESPACE
2354 RegexSearchBase
*Scintilla::CreateRegexSearch(CharClassify
*charClassTable
) {
2355 return new BuiltinRegex(charClassTable
);
2360 RegexSearchBase
*CreateRegexSearch(CharClassify
*charClassTable
) {
2361 return new BuiltinRegex(charClassTable
);