Update Scintilla to version 3.5.4
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blob47c2302e98e5fd24097e032c79cb007796419a8e
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "CellBuffer.h"
33 #include "PerLine.h"
34 #include "CharClassify.h"
35 #include "Decoration.h"
36 #include "CaseFolder.h"
37 #include "Document.h"
38 #include "RESearch.h"
39 #include "UniConversion.h"
40 #include "UnicodeFromUTF8.h"
42 #ifdef SCI_NAMESPACE
43 using namespace Scintilla;
44 #endif
46 static inline bool IsPunctuation(char ch) {
47 return IsASCII(ch) && ispunct(ch);
50 void LexInterface::Colourise(int start, int end) {
51 if (pdoc && instance && !performingStyle) {
52 // Protect against reentrance, which may occur, for example, when
53 // fold points are discovered while performing styling and the folding
54 // code looks for child lines which may trigger styling.
55 performingStyle = true;
57 int lengthDoc = pdoc->Length();
58 if (end == -1)
59 end = lengthDoc;
60 int len = end - start;
62 PLATFORM_ASSERT(len >= 0);
63 PLATFORM_ASSERT(start + len <= lengthDoc);
65 int styleStart = 0;
66 if (start > 0)
67 styleStart = pdoc->StyleAt(start - 1);
69 if (len > 0) {
70 instance->Lex(start, len, styleStart, pdoc);
71 instance->Fold(start, len, styleStart, pdoc);
74 performingStyle = false;
78 int LexInterface::LineEndTypesSupported() {
79 if (instance) {
80 int interfaceVersion = instance->Version();
81 if (interfaceVersion >= lvSubStyles) {
82 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
83 return ssinstance->LineEndTypesSupported();
86 return 0;
89 Document::Document() {
90 refCount = 0;
91 pcf = NULL;
92 #ifdef _WIN32
93 eolMode = SC_EOL_CRLF;
94 #else
95 eolMode = SC_EOL_LF;
96 #endif
97 dbcsCodePage = 0;
98 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
99 endStyled = 0;
100 styleClock = 0;
101 enteredModification = 0;
102 enteredStyling = 0;
103 enteredReadOnlyCount = 0;
104 insertionSet = false;
105 tabInChars = 8;
106 indentInChars = 0;
107 actualIndentInChars = 8;
108 useTabs = true;
109 tabIndents = true;
110 backspaceUnindents = false;
112 matchesValid = false;
113 regex = 0;
115 UTF8BytesOfLeadInitialise();
117 perLineData[ldMarkers] = new LineMarkers();
118 perLineData[ldLevels] = new LineLevels();
119 perLineData[ldState] = new LineState();
120 perLineData[ldMargin] = new LineAnnotation();
121 perLineData[ldAnnotation] = new LineAnnotation();
123 cb.SetPerLine(this);
125 pli = 0;
128 Document::~Document() {
129 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
130 it->watcher->NotifyDeleted(this, it->userData);
132 for (int j=0; j<ldSize; j++) {
133 delete perLineData[j];
134 perLineData[j] = 0;
136 delete regex;
137 regex = 0;
138 delete pli;
139 pli = 0;
140 delete pcf;
141 pcf = 0;
144 void Document::Init() {
145 for (int j=0; j<ldSize; j++) {
146 if (perLineData[j])
147 perLineData[j]->Init();
151 int Document::LineEndTypesSupported() const {
152 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
153 return pli->LineEndTypesSupported();
154 else
155 return 0;
158 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
159 if (dbcsCodePage != dbcsCodePage_) {
160 dbcsCodePage = dbcsCodePage_;
161 SetCaseFolder(NULL);
162 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
163 return true;
164 } else {
165 return false;
169 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
170 if (lineEndBitSet != lineEndBitSet_) {
171 lineEndBitSet = lineEndBitSet_;
172 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
173 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
174 ModifiedAt(0);
175 cb.SetLineEndTypes(lineEndBitSetActive);
176 return true;
177 } else {
178 return false;
180 } else {
181 return false;
185 void Document::InsertLine(int line) {
186 for (int j=0; j<ldSize; j++) {
187 if (perLineData[j])
188 perLineData[j]->InsertLine(line);
192 void Document::RemoveLine(int line) {
193 for (int j=0; j<ldSize; j++) {
194 if (perLineData[j])
195 perLineData[j]->RemoveLine(line);
199 // Increase reference count and return its previous value.
200 int Document::AddRef() {
201 return refCount++;
204 // Decrease reference count and return its previous value.
205 // Delete the document if reference count reaches zero.
206 int SCI_METHOD Document::Release() {
207 int curRefCount = --refCount;
208 if (curRefCount == 0)
209 delete this;
210 return curRefCount;
213 void Document::SetSavePoint() {
214 cb.SetSavePoint();
215 NotifySavePoint(true);
218 void Document::TentativeUndo() {
219 CheckReadOnly();
220 if (enteredModification == 0) {
221 enteredModification++;
222 if (!cb.IsReadOnly()) {
223 bool startSavePoint = cb.IsSavePoint();
224 bool multiLine = false;
225 int steps = cb.TentativeSteps();
226 //Platform::DebugPrintf("Steps=%d\n", steps);
227 for (int step = 0; step < steps; step++) {
228 const int prevLinesTotal = LinesTotal();
229 const Action &action = cb.GetUndoStep();
230 if (action.at == removeAction) {
231 NotifyModified(DocModification(
232 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
233 } else if (action.at == containerAction) {
234 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
235 dm.token = action.position;
236 NotifyModified(dm);
237 } else {
238 NotifyModified(DocModification(
239 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
241 cb.PerformUndoStep();
242 if (action.at != containerAction) {
243 ModifiedAt(action.position);
246 int modFlags = SC_PERFORMED_UNDO;
247 // With undo, an insertion action becomes a deletion notification
248 if (action.at == removeAction) {
249 modFlags |= SC_MOD_INSERTTEXT;
250 } else if (action.at == insertAction) {
251 modFlags |= SC_MOD_DELETETEXT;
253 if (steps > 1)
254 modFlags |= SC_MULTISTEPUNDOREDO;
255 const int linesAdded = LinesTotal() - prevLinesTotal;
256 if (linesAdded != 0)
257 multiLine = true;
258 if (step == steps - 1) {
259 modFlags |= SC_LASTSTEPINUNDOREDO;
260 if (multiLine)
261 modFlags |= SC_MULTILINEUNDOREDO;
263 NotifyModified(DocModification(modFlags, action.position, action.lenData,
264 linesAdded, action.data));
267 bool endSavePoint = cb.IsSavePoint();
268 if (startSavePoint != endSavePoint)
269 NotifySavePoint(endSavePoint);
271 cb.TentativeCommit();
273 enteredModification--;
277 int Document::GetMark(int line) {
278 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
281 int Document::MarkerNext(int lineStart, int mask) const {
282 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
285 int Document::AddMark(int line, int markerNum) {
286 if (line >= 0 && line <= LinesTotal()) {
287 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
288 AddMark(line, markerNum, LinesTotal());
289 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
290 NotifyModified(mh);
291 return prev;
292 } else {
293 return 0;
297 void Document::AddMarkSet(int line, int valueSet) {
298 if (line < 0 || line > LinesTotal()) {
299 return;
301 unsigned int m = valueSet;
302 for (int i = 0; m; i++, m >>= 1)
303 if (m & 1)
304 static_cast<LineMarkers *>(perLineData[ldMarkers])->
305 AddMark(line, i, LinesTotal());
306 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
307 NotifyModified(mh);
310 void Document::DeleteMark(int line, int markerNum) {
311 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
312 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
313 NotifyModified(mh);
316 void Document::DeleteMarkFromHandle(int markerHandle) {
317 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
318 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
319 mh.line = -1;
320 NotifyModified(mh);
323 void Document::DeleteAllMarks(int markerNum) {
324 bool someChanges = false;
325 for (int line = 0; line < LinesTotal(); line++) {
326 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
327 someChanges = true;
329 if (someChanges) {
330 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
331 mh.line = -1;
332 NotifyModified(mh);
336 int Document::LineFromHandle(int markerHandle) {
337 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
340 int SCI_METHOD Document::LineStart(int line) const {
341 return cb.LineStart(line);
344 bool Document::IsLineStartPosition(int position) const {
345 return LineStart(LineFromPosition(position)) == position;
348 int SCI_METHOD Document::LineEnd(int line) const {
349 if (line >= LinesTotal() - 1) {
350 return LineStart(line + 1);
351 } else {
352 int position = LineStart(line + 1);
353 if (SC_CP_UTF8 == dbcsCodePage) {
354 unsigned char bytes[] = {
355 static_cast<unsigned char>(cb.CharAt(position-3)),
356 static_cast<unsigned char>(cb.CharAt(position-2)),
357 static_cast<unsigned char>(cb.CharAt(position-1)),
359 if (UTF8IsSeparator(bytes)) {
360 return position - UTF8SeparatorLength;
362 if (UTF8IsNEL(bytes+1)) {
363 return position - UTF8NELLength;
366 position--; // Back over CR or LF
367 // When line terminator is CR+LF, may need to go back one more
368 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
369 position--;
371 return position;
375 void SCI_METHOD Document::SetErrorStatus(int status) {
376 // Tell the watchers an error has occurred.
377 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
378 it->watcher->NotifyErrorOccurred(this, it->userData, status);
382 int SCI_METHOD Document::LineFromPosition(int pos) const {
383 return cb.LineFromPosition(pos);
386 int Document::LineEndPosition(int position) const {
387 return LineEnd(LineFromPosition(position));
390 bool Document::IsLineEndPosition(int position) const {
391 return LineEnd(LineFromPosition(position)) == position;
394 bool Document::IsPositionInLineEnd(int position) const {
395 return position >= LineEnd(LineFromPosition(position));
398 int Document::VCHomePosition(int position) const {
399 int line = LineFromPosition(position);
400 int startPosition = LineStart(line);
401 int endLine = LineEnd(line);
402 int startText = startPosition;
403 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
404 startText++;
405 if (position == startText)
406 return startPosition;
407 else
408 return startText;
411 int SCI_METHOD Document::SetLevel(int line, int level) {
412 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
413 if (prev != level) {
414 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
415 LineStart(line), 0, 0, 0, line);
416 mh.foldLevelNow = level;
417 mh.foldLevelPrev = prev;
418 NotifyModified(mh);
420 return prev;
423 int SCI_METHOD Document::GetLevel(int line) const {
424 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
427 void Document::ClearLevels() {
428 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
431 static bool IsSubordinate(int levelStart, int levelTry) {
432 if (levelTry & SC_FOLDLEVELWHITEFLAG)
433 return true;
434 else
435 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
438 int Document::GetLastChild(int lineParent, int level, int lastLine) {
439 if (level == -1)
440 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
441 int maxLine = LinesTotal();
442 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
443 int lineMaxSubord = lineParent;
444 while (lineMaxSubord < maxLine - 1) {
445 EnsureStyledTo(LineStart(lineMaxSubord + 2));
446 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
447 break;
448 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
449 break;
450 lineMaxSubord++;
452 if (lineMaxSubord > lineParent) {
453 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
454 // Have chewed up some whitespace that belongs to a parent so seek back
455 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
456 lineMaxSubord--;
460 return lineMaxSubord;
463 int Document::GetFoldParent(int line) const {
464 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
465 int lineLook = line - 1;
466 while ((lineLook > 0) && (
467 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
468 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
470 lineLook--;
472 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
473 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
474 return lineLook;
475 } else {
476 return -1;
480 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
481 int level = GetLevel(line);
482 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
484 int lookLine = line;
485 int lookLineLevel = level;
486 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
487 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
488 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
489 lookLineLevel = GetLevel(--lookLine);
490 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
493 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
494 if (beginFoldBlock == -1) {
495 highlightDelimiter.Clear();
496 return;
499 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
500 int firstChangeableLineBefore = -1;
501 if (endFoldBlock < line) {
502 lookLine = beginFoldBlock - 1;
503 lookLineLevel = GetLevel(lookLine);
504 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
505 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
506 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
507 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
508 beginFoldBlock = lookLine;
509 endFoldBlock = line;
510 firstChangeableLineBefore = line - 1;
513 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
514 break;
515 lookLineLevel = GetLevel(--lookLine);
516 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
519 if (firstChangeableLineBefore == -1) {
520 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
521 lookLine >= beginFoldBlock;
522 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
523 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
524 firstChangeableLineBefore = lookLine;
525 break;
529 if (firstChangeableLineBefore == -1)
530 firstChangeableLineBefore = beginFoldBlock - 1;
532 int firstChangeableLineAfter = -1;
533 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
534 lookLine <= endFoldBlock;
535 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
536 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
537 firstChangeableLineAfter = lookLine;
538 break;
541 if (firstChangeableLineAfter == -1)
542 firstChangeableLineAfter = endFoldBlock + 1;
544 highlightDelimiter.beginFoldBlock = beginFoldBlock;
545 highlightDelimiter.endFoldBlock = endFoldBlock;
546 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
547 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
550 int Document::ClampPositionIntoDocument(int pos) const {
551 return Platform::Clamp(pos, 0, Length());
554 bool Document::IsCrLf(int pos) const {
555 if (pos < 0)
556 return false;
557 if (pos >= (Length() - 1))
558 return false;
559 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
562 int Document::LenChar(int pos) {
563 if (pos < 0) {
564 return 1;
565 } else if (IsCrLf(pos)) {
566 return 2;
567 } else if (SC_CP_UTF8 == dbcsCodePage) {
568 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
569 const int widthCharBytes = UTF8BytesOfLead[leadByte];
570 int lengthDoc = Length();
571 if ((pos + widthCharBytes) > lengthDoc)
572 return lengthDoc - pos;
573 else
574 return widthCharBytes;
575 } else if (dbcsCodePage) {
576 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
577 } else {
578 return 1;
582 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
583 int trail = pos;
584 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
585 trail--;
586 start = (trail > 0) ? trail-1 : trail;
588 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
589 const int widthCharBytes = UTF8BytesOfLead[leadByte];
590 if (widthCharBytes == 1) {
591 return false;
592 } else {
593 int trailBytes = widthCharBytes - 1;
594 int len = pos - start;
595 if (len > trailBytes)
596 // pos too far from lead
597 return false;
598 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
599 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
600 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
601 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
602 if (utf8status & UTF8MaskInvalid)
603 return false;
604 end = start + widthCharBytes;
605 return true;
609 // Normalise a position so that it is not halfway through a two byte character.
610 // This can occur in two situations -
611 // When lines are terminated with \r\n pairs which should be treated as one character.
612 // When displaying DBCS text such as Japanese.
613 // If moving, move the position in the indicated direction.
614 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
615 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
616 // If out of range, just return minimum/maximum value.
617 if (pos <= 0)
618 return 0;
619 if (pos >= Length())
620 return Length();
622 // PLATFORM_ASSERT(pos > 0 && pos < Length());
623 if (checkLineEnd && IsCrLf(pos - 1)) {
624 if (moveDir > 0)
625 return pos + 1;
626 else
627 return pos - 1;
630 if (dbcsCodePage) {
631 if (SC_CP_UTF8 == dbcsCodePage) {
632 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
633 // If ch is not a trail byte then pos is valid intercharacter position
634 if (UTF8IsTrailByte(ch)) {
635 int startUTF = pos;
636 int endUTF = pos;
637 if (InGoodUTF8(pos, startUTF, endUTF)) {
638 // ch is a trail byte within a UTF-8 character
639 if (moveDir > 0)
640 pos = endUTF;
641 else
642 pos = startUTF;
644 // Else invalid UTF-8 so return position of isolated trail byte
646 } else {
647 // Anchor DBCS calculations at start of line because start of line can
648 // not be a DBCS trail byte.
649 int posStartLine = LineStart(LineFromPosition(pos));
650 if (pos == posStartLine)
651 return pos;
653 // Step back until a non-lead-byte is found.
654 int posCheck = pos;
655 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
656 posCheck--;
658 // Check from known start of character.
659 while (posCheck < pos) {
660 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
661 if (posCheck + mbsize == pos) {
662 return pos;
663 } else if (posCheck + mbsize > pos) {
664 if (moveDir > 0) {
665 return posCheck + mbsize;
666 } else {
667 return posCheck;
670 posCheck += mbsize;
675 return pos;
678 // NextPosition moves between valid positions - it can not handle a position in the middle of a
679 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
680 // A \r\n pair is treated as two characters.
681 int Document::NextPosition(int pos, int moveDir) const {
682 // If out of range, just return minimum/maximum value.
683 int increment = (moveDir > 0) ? 1 : -1;
684 if (pos + increment <= 0)
685 return 0;
686 if (pos + increment >= Length())
687 return Length();
689 if (dbcsCodePage) {
690 if (SC_CP_UTF8 == dbcsCodePage) {
691 if (increment == 1) {
692 // Simple forward movement case so can avoid some checks
693 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
694 if (UTF8IsAscii(leadByte)) {
695 // Single byte character or invalid
696 pos++;
697 } else {
698 const int widthCharBytes = UTF8BytesOfLead[leadByte];
699 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
700 for (int b=1; b<widthCharBytes; b++)
701 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
702 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
703 if (utf8status & UTF8MaskInvalid)
704 pos++;
705 else
706 pos += utf8status & UTF8MaskWidth;
708 } else {
709 // Examine byte before position
710 pos--;
711 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
712 // If ch is not a trail byte then pos is valid intercharacter position
713 if (UTF8IsTrailByte(ch)) {
714 // If ch is a trail byte in a valid UTF-8 character then return start of character
715 int startUTF = pos;
716 int endUTF = pos;
717 if (InGoodUTF8(pos, startUTF, endUTF)) {
718 pos = startUTF;
720 // Else invalid UTF-8 so return position of isolated trail byte
723 } else {
724 if (moveDir > 0) {
725 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
726 pos += mbsize;
727 if (pos > Length())
728 pos = Length();
729 } else {
730 // Anchor DBCS calculations at start of line because start of line can
731 // not be a DBCS trail byte.
732 int posStartLine = LineStart(LineFromPosition(pos));
733 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
734 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
735 if ((pos - 1) <= posStartLine) {
736 return pos - 1;
737 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
738 // Must actually be trail byte
739 return pos - 2;
740 } else {
741 // Otherwise, step back until a non-lead-byte is found.
742 int posTemp = pos - 1;
743 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
745 // Now posTemp+1 must point to the beginning of a character,
746 // so figure out whether we went back an even or an odd
747 // number of bytes and go back 1 or 2 bytes, respectively.
748 return (pos - 1 - ((pos - posTemp) & 1));
752 } else {
753 pos += increment;
756 return pos;
759 bool Document::NextCharacter(int &pos, int moveDir) const {
760 // Returns true if pos changed
761 int posNext = NextPosition(pos, moveDir);
762 if (posNext == pos) {
763 return false;
764 } else {
765 pos = posNext;
766 return true;
770 // Return -1 on out-of-bounds
771 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
772 int pos = positionStart;
773 if (dbcsCodePage) {
774 const int increment = (characterOffset > 0) ? 1 : -1;
775 while (characterOffset != 0) {
776 const int posNext = NextPosition(pos, increment);
777 if (posNext == pos)
778 return INVALID_POSITION;
779 pos = posNext;
780 characterOffset -= increment;
782 } else {
783 pos = positionStart + characterOffset;
784 if ((pos < 0) || (pos > Length()))
785 return INVALID_POSITION;
787 return pos;
790 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
791 int pos = positionStart;
792 if (dbcsCodePage) {
793 const int increment = (characterOffset > 0) ? 1 : -1;
794 while (characterOffset != 0) {
795 const int posNext = NextPosition(pos, increment);
796 if (posNext == pos)
797 return INVALID_POSITION;
798 if (abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
799 characterOffset -= increment;
800 pos = posNext;
801 characterOffset -= increment;
803 } else {
804 pos = positionStart + characterOffset;
805 if ((pos < 0) || (pos > Length()))
806 return INVALID_POSITION;
808 return pos;
811 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
812 int character;
813 int bytesInCharacter = 1;
814 if (dbcsCodePage) {
815 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
816 if (SC_CP_UTF8 == dbcsCodePage) {
817 if (UTF8IsAscii(leadByte)) {
818 // Single byte character or invalid
819 character = leadByte;
820 } else {
821 const int widthCharBytes = UTF8BytesOfLead[leadByte];
822 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
823 for (int b=1; b<widthCharBytes; b++)
824 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
825 int utf8status = UTF8Classify(charBytes, widthCharBytes);
826 if (utf8status & UTF8MaskInvalid) {
827 // Report as singleton surrogate values which are invalid Unicode
828 character = 0xDC80 + leadByte;
829 } else {
830 bytesInCharacter = utf8status & UTF8MaskWidth;
831 character = UnicodeFromUTF8(charBytes);
834 } else {
835 if (IsDBCSLeadByte(leadByte)) {
836 bytesInCharacter = 2;
837 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
838 } else {
839 character = leadByte;
842 } else {
843 character = cb.CharAt(position);
845 if (pWidth) {
846 *pWidth = bytesInCharacter;
848 return character;
851 int SCI_METHOD Document::CodePage() const {
852 return dbcsCodePage;
855 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
856 // Byte ranges found in Wikipedia articles with relevant search strings in each case
857 unsigned char uch = static_cast<unsigned char>(ch);
858 switch (dbcsCodePage) {
859 case 932:
860 // Shift_jis
861 return ((uch >= 0x81) && (uch <= 0x9F)) ||
862 ((uch >= 0xE0) && (uch <= 0xFC));
863 // Lead bytes F0 to FC may be a Microsoft addition.
864 case 936:
865 // GBK
866 return (uch >= 0x81) && (uch <= 0xFE);
867 case 949:
868 // Korean Wansung KS C-5601-1987
869 return (uch >= 0x81) && (uch <= 0xFE);
870 case 950:
871 // Big5
872 return (uch >= 0x81) && (uch <= 0xFE);
873 case 1361:
874 // Korean Johab KS C-5601-1992
875 return
876 ((uch >= 0x84) && (uch <= 0xD3)) ||
877 ((uch >= 0xD8) && (uch <= 0xDE)) ||
878 ((uch >= 0xE0) && (uch <= 0xF9));
880 return false;
883 static inline bool IsSpaceOrTab(int ch) {
884 return ch == ' ' || ch == '\t';
887 // Need to break text into segments near lengthSegment but taking into
888 // account the encoding to not break inside a UTF-8 or DBCS character
889 // and also trying to avoid breaking inside a pair of combining characters.
890 // The segment length must always be long enough (more than 4 bytes)
891 // so that there will be at least one whole character to make a segment.
892 // For UTF-8, text must consist only of valid whole characters.
893 // In preference order from best to worst:
894 // 1) Break after space
895 // 2) Break before punctuation
896 // 3) Break after whole character
898 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
899 if (length <= lengthSegment)
900 return length;
901 int lastSpaceBreak = -1;
902 int lastPunctuationBreak = -1;
903 int lastEncodingAllowedBreak = 0;
904 for (int j=0; j < lengthSegment;) {
905 unsigned char ch = static_cast<unsigned char>(text[j]);
906 if (j > 0) {
907 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
908 lastSpaceBreak = j;
910 if (ch < 'A') {
911 lastPunctuationBreak = j;
914 lastEncodingAllowedBreak = j;
916 if (dbcsCodePage == SC_CP_UTF8) {
917 j += UTF8BytesOfLead[ch];
918 } else if (dbcsCodePage) {
919 j += IsDBCSLeadByte(ch) ? 2 : 1;
920 } else {
921 j++;
924 if (lastSpaceBreak >= 0) {
925 return lastSpaceBreak;
926 } else if (lastPunctuationBreak >= 0) {
927 return lastPunctuationBreak;
929 return lastEncodingAllowedBreak;
932 EncodingFamily Document::CodePageFamily() const {
933 if (SC_CP_UTF8 == dbcsCodePage)
934 return efUnicode;
935 else if (dbcsCodePage)
936 return efDBCS;
937 else
938 return efEightBit;
941 void Document::ModifiedAt(int pos) {
942 if (endStyled > pos)
943 endStyled = pos;
946 void Document::CheckReadOnly() {
947 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
948 enteredReadOnlyCount++;
949 NotifyModifyAttempt();
950 enteredReadOnlyCount--;
954 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
955 // SetStyleAt does not change the persistent state of a document
957 bool Document::DeleteChars(int pos, int len) {
958 if (pos < 0)
959 return false;
960 if (len <= 0)
961 return false;
962 if ((pos + len) > Length())
963 return false;
964 CheckReadOnly();
965 if (enteredModification != 0) {
966 return false;
967 } else {
968 enteredModification++;
969 if (!cb.IsReadOnly()) {
970 NotifyModified(
971 DocModification(
972 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
973 pos, len,
974 0, 0));
975 int prevLinesTotal = LinesTotal();
976 bool startSavePoint = cb.IsSavePoint();
977 bool startSequence = false;
978 const char *text = cb.DeleteChars(pos, len, startSequence);
979 if (startSavePoint && cb.IsCollectingUndo())
980 NotifySavePoint(!startSavePoint);
981 if ((pos < Length()) || (pos == 0))
982 ModifiedAt(pos);
983 else
984 ModifiedAt(pos-1);
985 NotifyModified(
986 DocModification(
987 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
988 pos, len,
989 LinesTotal() - prevLinesTotal, text));
991 enteredModification--;
993 return !cb.IsReadOnly();
997 * Insert a string with a length.
999 int Document::InsertString(int position, const char *s, int insertLength) {
1000 if (insertLength <= 0) {
1001 return 0;
1003 CheckReadOnly(); // Application may change read only state here
1004 if (cb.IsReadOnly()) {
1005 return 0;
1007 if (enteredModification != 0) {
1008 return 0;
1010 enteredModification++;
1011 insertionSet = false;
1012 insertion.clear();
1013 NotifyModified(
1014 DocModification(
1015 SC_MOD_INSERTCHECK,
1016 position, insertLength,
1017 0, s));
1018 if (insertionSet) {
1019 s = insertion.c_str();
1020 insertLength = static_cast<int>(insertion.length());
1022 NotifyModified(
1023 DocModification(
1024 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1025 position, insertLength,
1026 0, s));
1027 int prevLinesTotal = LinesTotal();
1028 bool startSavePoint = cb.IsSavePoint();
1029 bool startSequence = false;
1030 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1031 if (startSavePoint && cb.IsCollectingUndo())
1032 NotifySavePoint(!startSavePoint);
1033 ModifiedAt(position);
1034 NotifyModified(
1035 DocModification(
1036 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1037 position, insertLength,
1038 LinesTotal() - prevLinesTotal, text));
1039 if (insertionSet) { // Free memory as could be large
1040 std::string().swap(insertion);
1042 enteredModification--;
1043 return insertLength;
1046 void Document::ChangeInsertion(const char *s, int length) {
1047 insertionSet = true;
1048 insertion.assign(s, length);
1051 int SCI_METHOD Document::AddData(char *data, int length) {
1052 try {
1053 int position = Length();
1054 InsertString(position, data, length);
1055 } catch (std::bad_alloc &) {
1056 return SC_STATUS_BADALLOC;
1057 } catch (...) {
1058 return SC_STATUS_FAILURE;
1060 return 0;
1063 void * SCI_METHOD Document::ConvertToDocument() {
1064 return this;
1067 int Document::Undo() {
1068 int newPos = -1;
1069 CheckReadOnly();
1070 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1071 enteredModification++;
1072 if (!cb.IsReadOnly()) {
1073 bool startSavePoint = cb.IsSavePoint();
1074 bool multiLine = false;
1075 int steps = cb.StartUndo();
1076 //Platform::DebugPrintf("Steps=%d\n", steps);
1077 int coalescedRemovePos = -1;
1078 int coalescedRemoveLen = 0;
1079 int prevRemoveActionPos = -1;
1080 int prevRemoveActionLen = 0;
1081 for (int step = 0; step < steps; step++) {
1082 const int prevLinesTotal = LinesTotal();
1083 const Action &action = cb.GetUndoStep();
1084 if (action.at == removeAction) {
1085 NotifyModified(DocModification(
1086 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1087 } else if (action.at == containerAction) {
1088 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1089 dm.token = action.position;
1090 NotifyModified(dm);
1091 if (!action.mayCoalesce) {
1092 coalescedRemovePos = -1;
1093 coalescedRemoveLen = 0;
1094 prevRemoveActionPos = -1;
1095 prevRemoveActionLen = 0;
1097 } else {
1098 NotifyModified(DocModification(
1099 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1101 cb.PerformUndoStep();
1102 if (action.at != containerAction) {
1103 ModifiedAt(action.position);
1104 newPos = action.position;
1107 int modFlags = SC_PERFORMED_UNDO;
1108 // With undo, an insertion action becomes a deletion notification
1109 if (action.at == removeAction) {
1110 newPos += action.lenData;
1111 modFlags |= SC_MOD_INSERTTEXT;
1112 if ((coalescedRemoveLen > 0) &&
1113 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1114 coalescedRemoveLen += action.lenData;
1115 newPos = coalescedRemovePos + coalescedRemoveLen;
1116 } else {
1117 coalescedRemovePos = action.position;
1118 coalescedRemoveLen = action.lenData;
1120 prevRemoveActionPos = action.position;
1121 prevRemoveActionLen = action.lenData;
1122 } else if (action.at == insertAction) {
1123 modFlags |= SC_MOD_DELETETEXT;
1124 coalescedRemovePos = -1;
1125 coalescedRemoveLen = 0;
1126 prevRemoveActionPos = -1;
1127 prevRemoveActionLen = 0;
1129 if (steps > 1)
1130 modFlags |= SC_MULTISTEPUNDOREDO;
1131 const int linesAdded = LinesTotal() - prevLinesTotal;
1132 if (linesAdded != 0)
1133 multiLine = true;
1134 if (step == steps - 1) {
1135 modFlags |= SC_LASTSTEPINUNDOREDO;
1136 if (multiLine)
1137 modFlags |= SC_MULTILINEUNDOREDO;
1139 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1140 linesAdded, action.data));
1143 bool endSavePoint = cb.IsSavePoint();
1144 if (startSavePoint != endSavePoint)
1145 NotifySavePoint(endSavePoint);
1147 enteredModification--;
1149 return newPos;
1152 int Document::Redo() {
1153 int newPos = -1;
1154 CheckReadOnly();
1155 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1156 enteredModification++;
1157 if (!cb.IsReadOnly()) {
1158 bool startSavePoint = cb.IsSavePoint();
1159 bool multiLine = false;
1160 int steps = cb.StartRedo();
1161 for (int step = 0; step < steps; step++) {
1162 const int prevLinesTotal = LinesTotal();
1163 const Action &action = cb.GetRedoStep();
1164 if (action.at == insertAction) {
1165 NotifyModified(DocModification(
1166 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1167 } else if (action.at == containerAction) {
1168 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1169 dm.token = action.position;
1170 NotifyModified(dm);
1171 } else {
1172 NotifyModified(DocModification(
1173 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1175 cb.PerformRedoStep();
1176 if (action.at != containerAction) {
1177 ModifiedAt(action.position);
1178 newPos = action.position;
1181 int modFlags = SC_PERFORMED_REDO;
1182 if (action.at == insertAction) {
1183 newPos += action.lenData;
1184 modFlags |= SC_MOD_INSERTTEXT;
1185 } else if (action.at == removeAction) {
1186 modFlags |= SC_MOD_DELETETEXT;
1188 if (steps > 1)
1189 modFlags |= SC_MULTISTEPUNDOREDO;
1190 const int linesAdded = LinesTotal() - prevLinesTotal;
1191 if (linesAdded != 0)
1192 multiLine = true;
1193 if (step == steps - 1) {
1194 modFlags |= SC_LASTSTEPINUNDOREDO;
1195 if (multiLine)
1196 modFlags |= SC_MULTILINEUNDOREDO;
1198 NotifyModified(
1199 DocModification(modFlags, action.position, action.lenData,
1200 linesAdded, action.data));
1203 bool endSavePoint = cb.IsSavePoint();
1204 if (startSavePoint != endSavePoint)
1205 NotifySavePoint(endSavePoint);
1207 enteredModification--;
1209 return newPos;
1212 void Document::DelChar(int pos) {
1213 DeleteChars(pos, LenChar(pos));
1216 void Document::DelCharBack(int pos) {
1217 if (pos <= 0) {
1218 return;
1219 } else if (IsCrLf(pos - 2)) {
1220 DeleteChars(pos - 2, 2);
1221 } else if (dbcsCodePage) {
1222 int startChar = NextPosition(pos, -1);
1223 DeleteChars(startChar, pos - startChar);
1224 } else {
1225 DeleteChars(pos - 1, 1);
1229 static int NextTab(int pos, int tabSize) {
1230 return ((pos / tabSize) + 1) * tabSize;
1233 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1234 std::string indentation;
1235 if (!insertSpaces) {
1236 while (indent >= tabSize) {
1237 indentation += '\t';
1238 indent -= tabSize;
1241 while (indent > 0) {
1242 indentation += ' ';
1243 indent--;
1245 return indentation;
1248 int SCI_METHOD Document::GetLineIndentation(int line) {
1249 int indent = 0;
1250 if ((line >= 0) && (line < LinesTotal())) {
1251 int lineStart = LineStart(line);
1252 int length = Length();
1253 for (int i = lineStart; i < length; i++) {
1254 char ch = cb.CharAt(i);
1255 if (ch == ' ')
1256 indent++;
1257 else if (ch == '\t')
1258 indent = NextTab(indent, tabInChars);
1259 else
1260 return indent;
1263 return indent;
1266 int Document::SetLineIndentation(int line, int indent) {
1267 int indentOfLine = GetLineIndentation(line);
1268 if (indent < 0)
1269 indent = 0;
1270 if (indent != indentOfLine) {
1271 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1272 int thisLineStart = LineStart(line);
1273 int indentPos = GetLineIndentPosition(line);
1274 UndoGroup ug(this);
1275 DeleteChars(thisLineStart, indentPos - thisLineStart);
1276 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1277 static_cast<int>(linebuf.length()));
1278 } else {
1279 return GetLineIndentPosition(line);
1283 int Document::GetLineIndentPosition(int line) const {
1284 if (line < 0)
1285 return 0;
1286 int pos = LineStart(line);
1287 int length = Length();
1288 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1289 pos++;
1291 return pos;
1294 int Document::GetColumn(int pos) {
1295 int column = 0;
1296 int line = LineFromPosition(pos);
1297 if ((line >= 0) && (line < LinesTotal())) {
1298 for (int i = LineStart(line); i < pos;) {
1299 char ch = cb.CharAt(i);
1300 if (ch == '\t') {
1301 column = NextTab(column, tabInChars);
1302 i++;
1303 } else if (ch == '\r') {
1304 return column;
1305 } else if (ch == '\n') {
1306 return column;
1307 } else if (i >= Length()) {
1308 return column;
1309 } else {
1310 column++;
1311 i = NextPosition(i, 1);
1315 return column;
1318 int Document::CountCharacters(int startPos, int endPos) const {
1319 startPos = MovePositionOutsideChar(startPos, 1, false);
1320 endPos = MovePositionOutsideChar(endPos, -1, false);
1321 int count = 0;
1322 int i = startPos;
1323 while (i < endPos) {
1324 count++;
1325 if (IsCrLf(i))
1326 i++;
1327 i = NextPosition(i, 1);
1329 return count;
1332 int Document::CountUTF16(int startPos, int endPos) const {
1333 startPos = MovePositionOutsideChar(startPos, 1, false);
1334 endPos = MovePositionOutsideChar(endPos, -1, false);
1335 int count = 0;
1336 int i = startPos;
1337 while (i < endPos) {
1338 count++;
1339 const int next = NextPosition(i, 1);
1340 if ((next - i) > 3)
1341 count++;
1342 i = next;
1344 return count;
1347 int Document::FindColumn(int line, int column) {
1348 int position = LineStart(line);
1349 if ((line >= 0) && (line < LinesTotal())) {
1350 int columnCurrent = 0;
1351 while ((columnCurrent < column) && (position < Length())) {
1352 char ch = cb.CharAt(position);
1353 if (ch == '\t') {
1354 columnCurrent = NextTab(columnCurrent, tabInChars);
1355 if (columnCurrent > column)
1356 return position;
1357 position++;
1358 } else if (ch == '\r') {
1359 return position;
1360 } else if (ch == '\n') {
1361 return position;
1362 } else {
1363 columnCurrent++;
1364 position = NextPosition(position, 1);
1368 return position;
1371 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1372 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1373 for (int line = lineBottom; line >= lineTop; line--) {
1374 int indentOfLine = GetLineIndentation(line);
1375 if (forwards) {
1376 if (LineStart(line) < LineEnd(line)) {
1377 SetLineIndentation(line, indentOfLine + IndentSize());
1379 } else {
1380 SetLineIndentation(line, indentOfLine - IndentSize());
1385 // Convert line endings for a piece of text to a particular mode.
1386 // Stop at len or when a NUL is found.
1387 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1388 std::string dest;
1389 for (size_t i = 0; (i < len) && (s[i]); i++) {
1390 if (s[i] == '\n' || s[i] == '\r') {
1391 if (eolModeWanted == SC_EOL_CR) {
1392 dest.push_back('\r');
1393 } else if (eolModeWanted == SC_EOL_LF) {
1394 dest.push_back('\n');
1395 } else { // eolModeWanted == SC_EOL_CRLF
1396 dest.push_back('\r');
1397 dest.push_back('\n');
1399 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1400 i++;
1402 } else {
1403 dest.push_back(s[i]);
1406 return dest;
1409 void Document::ConvertLineEnds(int eolModeSet) {
1410 UndoGroup ug(this);
1412 for (int pos = 0; pos < Length(); pos++) {
1413 if (cb.CharAt(pos) == '\r') {
1414 if (cb.CharAt(pos + 1) == '\n') {
1415 // CRLF
1416 if (eolModeSet == SC_EOL_CR) {
1417 DeleteChars(pos + 1, 1); // Delete the LF
1418 } else if (eolModeSet == SC_EOL_LF) {
1419 DeleteChars(pos, 1); // Delete the CR
1420 } else {
1421 pos++;
1423 } else {
1424 // CR
1425 if (eolModeSet == SC_EOL_CRLF) {
1426 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1427 } else if (eolModeSet == SC_EOL_LF) {
1428 pos += InsertString(pos, "\n", 1); // Insert LF
1429 DeleteChars(pos, 1); // Delete CR
1430 pos--;
1433 } else if (cb.CharAt(pos) == '\n') {
1434 // LF
1435 if (eolModeSet == SC_EOL_CRLF) {
1436 pos += InsertString(pos, "\r", 1); // Insert CR
1437 } else if (eolModeSet == SC_EOL_CR) {
1438 pos += InsertString(pos, "\r", 1); // Insert CR
1439 DeleteChars(pos, 1); // Delete LF
1440 pos--;
1447 bool Document::IsWhiteLine(int line) const {
1448 int currentChar = LineStart(line);
1449 int endLine = LineEnd(line);
1450 while (currentChar < endLine) {
1451 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1452 return false;
1454 ++currentChar;
1456 return true;
1459 int Document::ParaUp(int pos) const {
1460 int line = LineFromPosition(pos);
1461 line--;
1462 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1463 line--;
1465 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1466 line--;
1468 line++;
1469 return LineStart(line);
1472 int Document::ParaDown(int pos) const {
1473 int line = LineFromPosition(pos);
1474 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1475 line++;
1477 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1478 line++;
1480 if (line < LinesTotal())
1481 return LineStart(line);
1482 else // end of a document
1483 return LineEnd(line-1);
1486 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1487 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1488 return CharClassify::ccWord;
1489 return charClass.GetClass(ch);
1493 * Used by commmands that want to select whole words.
1494 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1496 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1497 CharClassify::cc ccStart = CharClassify::ccWord;
1498 if (delta < 0) {
1499 if (!onlyWordCharacters)
1500 ccStart = WordCharClass(cb.CharAt(pos-1));
1501 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1502 pos--;
1503 } else {
1504 if (!onlyWordCharacters && pos < Length())
1505 ccStart = WordCharClass(cb.CharAt(pos));
1506 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1507 pos++;
1509 return MovePositionOutsideChar(pos, delta, true);
1513 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1514 * (delta < 0).
1515 * This is looking for a transition between character classes although there is also some
1516 * additional movement to transit white space.
1517 * Used by cursor movement by word commands.
1519 int Document::NextWordStart(int pos, int delta) {
1520 if (delta < 0) {
1521 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1522 pos--;
1523 if (pos > 0) {
1524 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1525 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1526 pos--;
1529 } else {
1530 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1531 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1532 pos++;
1533 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1534 pos++;
1536 return pos;
1540 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1541 * (delta < 0).
1542 * This is looking for a transition between character classes although there is also some
1543 * additional movement to transit white space.
1544 * Used by cursor movement by word commands.
1546 int Document::NextWordEnd(int pos, int delta) {
1547 if (delta < 0) {
1548 if (pos > 0) {
1549 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1550 if (ccStart != CharClassify::ccSpace) {
1551 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1552 pos--;
1555 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1556 pos--;
1559 } else {
1560 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1561 pos++;
1563 if (pos < Length()) {
1564 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1565 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1566 pos++;
1570 return pos;
1574 * Check that the character at the given position is a word or punctuation character and that
1575 * the previous character is of a different character class.
1577 bool Document::IsWordStartAt(int pos) const {
1578 if (pos > 0) {
1579 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1580 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1581 (ccPos != WordCharClass(CharAt(pos - 1)));
1583 return true;
1587 * Check that the character at the given position is a word or punctuation character and that
1588 * the next character is of a different character class.
1590 bool Document::IsWordEndAt(int pos) const {
1591 if (pos < Length()) {
1592 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1593 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1594 (ccPrev != WordCharClass(CharAt(pos)));
1596 return true;
1600 * Check that the given range is has transitions between character classes at both
1601 * ends and where the characters on the inside are word or punctuation characters.
1603 bool Document::IsWordAt(int start, int end) const {
1604 return IsWordStartAt(start) && IsWordEndAt(end);
1607 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1608 return (!word && !wordStart) ||
1609 (word && IsWordAt(pos, pos + length)) ||
1610 (wordStart && IsWordStartAt(pos));
1613 bool Document::HasCaseFolder(void) const {
1614 return pcf != 0;
1617 void Document::SetCaseFolder(CaseFolder *pcf_) {
1618 delete pcf;
1619 pcf = pcf_;
1622 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1623 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1624 if (UTF8IsAscii(leadByte)) {
1625 // Common case: ASCII character
1626 return CharacterExtracted(leadByte, 1);
1628 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1629 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1630 for (int b=1; b<widthCharBytes; b++)
1631 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1632 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1633 if (utf8status & UTF8MaskInvalid) {
1634 // Treat as invalid and use up just one byte
1635 return CharacterExtracted(unicodeReplacementChar, 1);
1636 } else {
1637 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1642 * Find text in document, supporting both forward and backward
1643 * searches (just pass minPos > maxPos to do a backward search)
1644 * Has not been tested with backwards DBCS searches yet.
1646 long Document::FindText(int minPos, int maxPos, const char *search,
1647 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1648 int *length) {
1649 if (*length <= 0)
1650 return minPos;
1651 if (regExp) {
1652 if (!regex)
1653 regex = CreateRegexSearch(&charClass);
1654 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1655 } else {
1657 const bool forward = minPos <= maxPos;
1658 const int increment = forward ? 1 : -1;
1660 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1661 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1662 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1664 // Compute actual search ranges needed
1665 const int lengthFind = *length;
1667 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1668 const int limitPos = Platform::Maximum(startPos, endPos);
1669 int pos = startPos;
1670 if (!forward) {
1671 // Back all of a character
1672 pos = NextPosition(pos, increment);
1674 if (caseSensitive) {
1675 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1676 const char charStartSearch = search[0];
1677 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1678 if (CharAt(pos) == charStartSearch) {
1679 bool found = (pos + lengthFind) <= limitPos;
1680 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1681 found = CharAt(pos + indexSearch) == search[indexSearch];
1683 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1684 return pos;
1687 if (!NextCharacter(pos, increment))
1688 break;
1690 } else if (SC_CP_UTF8 == dbcsCodePage) {
1691 const size_t maxFoldingExpansion = 4;
1692 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1693 const int lenSearch = static_cast<int>(
1694 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1695 char bytes[UTF8MaxBytes + 1];
1696 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1697 while (forward ? (pos < endPos) : (pos >= endPos)) {
1698 int widthFirstCharacter = 0;
1699 int posIndexDocument = pos;
1700 int indexSearch = 0;
1701 bool characterMatches = true;
1702 for (;;) {
1703 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1704 bytes[0] = leadByte;
1705 int widthChar = 1;
1706 if (!UTF8IsAscii(leadByte)) {
1707 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1708 for (int b=1; b<widthCharBytes; b++) {
1709 bytes[b] = cb.CharAt(posIndexDocument+b);
1711 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1713 if (!widthFirstCharacter)
1714 widthFirstCharacter = widthChar;
1715 if ((posIndexDocument + widthChar) > limitPos)
1716 break;
1717 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1718 folded[lenFlat] = 0;
1719 // Does folded match the buffer
1720 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1721 if (!characterMatches)
1722 break;
1723 posIndexDocument += widthChar;
1724 indexSearch += lenFlat;
1725 if (indexSearch >= lenSearch)
1726 break;
1728 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1729 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1730 *length = posIndexDocument - pos;
1731 return pos;
1734 if (forward) {
1735 pos += widthFirstCharacter;
1736 } else {
1737 if (!NextCharacter(pos, increment))
1738 break;
1741 } else if (dbcsCodePage) {
1742 const size_t maxBytesCharacter = 2;
1743 const size_t maxFoldingExpansion = 4;
1744 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1745 const int lenSearch = static_cast<int>(
1746 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1747 while (forward ? (pos < endPos) : (pos >= endPos)) {
1748 int indexDocument = 0;
1749 int indexSearch = 0;
1750 bool characterMatches = true;
1751 while (characterMatches &&
1752 ((pos + indexDocument) < limitPos) &&
1753 (indexSearch < lenSearch)) {
1754 char bytes[maxBytesCharacter + 1];
1755 bytes[0] = cb.CharAt(pos + indexDocument);
1756 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1757 if (widthChar == 2)
1758 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1759 if ((pos + indexDocument + widthChar) > limitPos)
1760 break;
1761 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1762 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1763 folded[lenFlat] = 0;
1764 // Does folded match the buffer
1765 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1766 indexDocument += widthChar;
1767 indexSearch += lenFlat;
1769 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1770 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1771 *length = indexDocument;
1772 return pos;
1775 if (!NextCharacter(pos, increment))
1776 break;
1778 } else {
1779 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1780 std::vector<char> searchThing(lengthFind + 1);
1781 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1782 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1783 bool found = (pos + lengthFind) <= limitPos;
1784 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1785 char ch = CharAt(pos + indexSearch);
1786 char folded[2];
1787 pcf->Fold(folded, sizeof(folded), &ch, 1);
1788 found = folded[0] == searchThing[indexSearch];
1790 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1791 return pos;
1793 if (!NextCharacter(pos, increment))
1794 break;
1798 //Platform::DebugPrintf("Not found\n");
1799 return -1;
1802 const char *Document::SubstituteByPosition(const char *text, int *length) {
1803 if (regex)
1804 return regex->SubstituteByPosition(this, text, length);
1805 else
1806 return 0;
1809 int Document::LinesTotal() const {
1810 return cb.Lines();
1813 void Document::SetDefaultCharClasses(bool includeWordClass) {
1814 charClass.SetDefaultCharClasses(includeWordClass);
1817 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1818 charClass.SetCharClasses(chars, newCharClass);
1821 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1822 return charClass.GetCharsOfClass(characterClass, buffer);
1825 void SCI_METHOD Document::StartStyling(int position, char) {
1826 endStyled = position;
1829 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1830 if (enteredStyling != 0) {
1831 return false;
1832 } else {
1833 enteredStyling++;
1834 int prevEndStyled = endStyled;
1835 if (cb.SetStyleFor(endStyled, length, style)) {
1836 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1837 prevEndStyled, length);
1838 NotifyModified(mh);
1840 endStyled += length;
1841 enteredStyling--;
1842 return true;
1846 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1847 if (enteredStyling != 0) {
1848 return false;
1849 } else {
1850 enteredStyling++;
1851 bool didChange = false;
1852 int startMod = 0;
1853 int endMod = 0;
1854 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1855 PLATFORM_ASSERT(endStyled < Length());
1856 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1857 if (!didChange) {
1858 startMod = endStyled;
1860 didChange = true;
1861 endMod = endStyled;
1864 if (didChange) {
1865 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1866 startMod, endMod - startMod + 1);
1867 NotifyModified(mh);
1869 enteredStyling--;
1870 return true;
1874 void Document::EnsureStyledTo(int pos) {
1875 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1876 IncrementStyleClock();
1877 if (pli && !pli->UseContainerLexing()) {
1878 int lineEndStyled = LineFromPosition(GetEndStyled());
1879 int endStyledTo = LineStart(lineEndStyled);
1880 pli->Colourise(endStyledTo, pos);
1881 } else {
1882 // Ask the watchers to style, and stop as soon as one responds.
1883 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1884 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1885 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1891 void Document::LexerChanged() {
1892 // Tell the watchers the lexer has changed.
1893 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1894 it->watcher->NotifyLexerChanged(this, it->userData);
1898 int SCI_METHOD Document::SetLineState(int line, int state) {
1899 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1900 if (state != statePrevious) {
1901 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1902 NotifyModified(mh);
1904 return statePrevious;
1907 int SCI_METHOD Document::GetLineState(int line) const {
1908 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1911 int Document::GetMaxLineState() {
1912 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1915 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1916 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1917 NotifyModified(mh);
1920 StyledText Document::MarginStyledText(int line) const {
1921 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1922 return StyledText(pla->Length(line), pla->Text(line),
1923 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1926 void Document::MarginSetText(int line, const char *text) {
1927 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1928 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1929 NotifyModified(mh);
1932 void Document::MarginSetStyle(int line, int style) {
1933 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1934 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1937 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1938 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1939 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1942 void Document::MarginClearAll() {
1943 int maxEditorLine = LinesTotal();
1944 for (int l=0; l<maxEditorLine; l++)
1945 MarginSetText(l, 0);
1946 // Free remaining data
1947 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1950 StyledText Document::AnnotationStyledText(int line) const {
1951 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1952 return StyledText(pla->Length(line), pla->Text(line),
1953 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1956 void Document::AnnotationSetText(int line, const char *text) {
1957 if (line >= 0 && line < LinesTotal()) {
1958 const int linesBefore = AnnotationLines(line);
1959 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1960 const int linesAfter = AnnotationLines(line);
1961 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1962 mh.annotationLinesAdded = linesAfter - linesBefore;
1963 NotifyModified(mh);
1967 void Document::AnnotationSetStyle(int line, int style) {
1968 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1969 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1970 NotifyModified(mh);
1973 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1974 if (line >= 0 && line < LinesTotal()) {
1975 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1979 int Document::AnnotationLines(int line) const {
1980 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1983 void Document::AnnotationClearAll() {
1984 int maxEditorLine = LinesTotal();
1985 for (int l=0; l<maxEditorLine; l++)
1986 AnnotationSetText(l, 0);
1987 // Free remaining data
1988 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1991 void Document::IncrementStyleClock() {
1992 styleClock = (styleClock + 1) % 0x100000;
1995 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1996 if (decorations.FillRange(position, value, fillLength)) {
1997 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1998 position, fillLength);
1999 NotifyModified(mh);
2003 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2004 WatcherWithUserData wwud(watcher, userData);
2005 std::vector<WatcherWithUserData>::iterator it =
2006 std::find(watchers.begin(), watchers.end(), wwud);
2007 if (it != watchers.end())
2008 return false;
2009 watchers.push_back(wwud);
2010 return true;
2013 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2014 std::vector<WatcherWithUserData>::iterator it =
2015 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2016 if (it != watchers.end()) {
2017 watchers.erase(it);
2018 return true;
2020 return false;
2023 void Document::NotifyModifyAttempt() {
2024 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2025 it->watcher->NotifyModifyAttempt(this, it->userData);
2029 void Document::NotifySavePoint(bool atSavePoint) {
2030 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2031 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2035 void Document::NotifyModified(DocModification mh) {
2036 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2037 decorations.InsertSpace(mh.position, mh.length);
2038 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2039 decorations.DeleteRange(mh.position, mh.length);
2041 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2042 it->watcher->NotifyModified(this, mh, it->userData);
2046 bool Document::IsWordPartSeparator(char ch) const {
2047 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2050 int Document::WordPartLeft(int pos) {
2051 if (pos > 0) {
2052 --pos;
2053 char startChar = cb.CharAt(pos);
2054 if (IsWordPartSeparator(startChar)) {
2055 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2056 --pos;
2059 if (pos > 0) {
2060 startChar = cb.CharAt(pos);
2061 --pos;
2062 if (IsLowerCase(startChar)) {
2063 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2064 --pos;
2065 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2066 ++pos;
2067 } else if (IsUpperCase(startChar)) {
2068 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2069 --pos;
2070 if (!IsUpperCase(cb.CharAt(pos)))
2071 ++pos;
2072 } else if (IsADigit(startChar)) {
2073 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2074 --pos;
2075 if (!IsADigit(cb.CharAt(pos)))
2076 ++pos;
2077 } else if (IsPunctuation(startChar)) {
2078 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2079 --pos;
2080 if (!IsPunctuation(cb.CharAt(pos)))
2081 ++pos;
2082 } else if (isspacechar(startChar)) {
2083 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2084 --pos;
2085 if (!isspacechar(cb.CharAt(pos)))
2086 ++pos;
2087 } else if (!IsASCII(startChar)) {
2088 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2089 --pos;
2090 if (IsASCII(cb.CharAt(pos)))
2091 ++pos;
2092 } else {
2093 ++pos;
2097 return pos;
2100 int Document::WordPartRight(int pos) {
2101 char startChar = cb.CharAt(pos);
2102 int length = Length();
2103 if (IsWordPartSeparator(startChar)) {
2104 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2105 ++pos;
2106 startChar = cb.CharAt(pos);
2108 if (!IsASCII(startChar)) {
2109 while (pos < length && !IsASCII(cb.CharAt(pos)))
2110 ++pos;
2111 } else if (IsLowerCase(startChar)) {
2112 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2113 ++pos;
2114 } else if (IsUpperCase(startChar)) {
2115 if (IsLowerCase(cb.CharAt(pos + 1))) {
2116 ++pos;
2117 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2118 ++pos;
2119 } else {
2120 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2121 ++pos;
2123 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2124 --pos;
2125 } else if (IsADigit(startChar)) {
2126 while (pos < length && IsADigit(cb.CharAt(pos)))
2127 ++pos;
2128 } else if (IsPunctuation(startChar)) {
2129 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2130 ++pos;
2131 } else if (isspacechar(startChar)) {
2132 while (pos < length && isspacechar(cb.CharAt(pos)))
2133 ++pos;
2134 } else {
2135 ++pos;
2137 return pos;
2140 bool IsLineEndChar(char c) {
2141 return (c == '\n' || c == '\r');
2144 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2145 int sStart = cb.StyleAt(pos);
2146 if (delta < 0) {
2147 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2148 pos--;
2149 pos++;
2150 } else {
2151 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2152 pos++;
2154 return pos;
2157 static char BraceOpposite(char ch) {
2158 switch (ch) {
2159 case '(':
2160 return ')';
2161 case ')':
2162 return '(';
2163 case '[':
2164 return ']';
2165 case ']':
2166 return '[';
2167 case '{':
2168 return '}';
2169 case '}':
2170 return '{';
2171 case '<':
2172 return '>';
2173 case '>':
2174 return '<';
2175 default:
2176 return '\0';
2180 // TODO: should be able to extend styled region to find matching brace
2181 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2182 char chBrace = CharAt(position);
2183 char chSeek = BraceOpposite(chBrace);
2184 if (chSeek == '\0')
2185 return - 1;
2186 char styBrace = static_cast<char>(StyleAt(position));
2187 int direction = -1;
2188 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2189 direction = 1;
2190 int depth = 1;
2191 position = NextPosition(position, direction);
2192 while ((position >= 0) && (position < Length())) {
2193 char chAtPos = CharAt(position);
2194 char styAtPos = static_cast<char>(StyleAt(position));
2195 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2196 if (chAtPos == chBrace)
2197 depth++;
2198 if (chAtPos == chSeek)
2199 depth--;
2200 if (depth == 0)
2201 return position;
2203 int positionBeforeMove = position;
2204 position = NextPosition(position, direction);
2205 if (position == positionBeforeMove)
2206 break;
2208 return - 1;
2212 * Implementation of RegexSearchBase for the default built-in regular expression engine
2214 class BuiltinRegex : public RegexSearchBase {
2215 public:
2216 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2218 virtual ~BuiltinRegex() {
2221 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2222 bool caseSensitive, bool word, bool wordStart, int flags,
2223 int *length);
2225 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2227 private:
2228 RESearch search;
2229 std::string substituted;
2232 namespace {
2235 * RESearchRange keeps track of search range.
2237 class RESearchRange {
2238 public:
2239 const Document *doc;
2240 int increment;
2241 int startPos;
2242 int endPos;
2243 int lineRangeStart;
2244 int lineRangeEnd;
2245 int lineRangeBreak;
2246 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2247 increment = (minPos <= maxPos) ? 1 : -1;
2249 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2250 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2251 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2253 lineRangeStart = doc->LineFromPosition(startPos);
2254 lineRangeEnd = doc->LineFromPosition(endPos);
2255 if ((increment == 1) &&
2256 (startPos >= doc->LineEnd(lineRangeStart)) &&
2257 (lineRangeStart < lineRangeEnd)) {
2258 // the start position is at end of line or between line end characters.
2259 lineRangeStart++;
2260 startPos = doc->LineStart(lineRangeStart);
2261 } else if ((increment == -1) &&
2262 (startPos <= doc->LineStart(lineRangeStart)) &&
2263 (lineRangeStart > lineRangeEnd)) {
2264 // the start position is at beginning of line.
2265 lineRangeStart--;
2266 startPos = doc->LineEnd(lineRangeStart);
2268 lineRangeBreak = lineRangeEnd + increment;
2270 Range LineRange(int line) const {
2271 Range range(doc->LineStart(line), doc->LineEnd(line));
2272 if (increment == 1) {
2273 if (line == lineRangeStart)
2274 range.start = startPos;
2275 if (line == lineRangeEnd)
2276 range.end = endPos;
2277 } else {
2278 if (line == lineRangeEnd)
2279 range.start = endPos;
2280 if (line == lineRangeStart)
2281 range.end = startPos;
2283 return range;
2287 // Define a way for the Regular Expression code to access the document
2288 class DocumentIndexer : public CharacterIndexer {
2289 Document *pdoc;
2290 int end;
2291 public:
2292 DocumentIndexer(Document *pdoc_, int end_) :
2293 pdoc(pdoc_), end(end_) {
2296 virtual ~DocumentIndexer() {
2299 virtual char CharAt(int index) {
2300 if (index < 0 || index >= end)
2301 return 0;
2302 else
2303 return pdoc->CharAt(index);
2307 #ifdef CXX11_REGEX
2309 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2310 public:
2311 const Document *doc;
2312 Position position;
2313 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2315 ByteIterator(const ByteIterator &other) {
2316 doc = other.doc;
2317 position = other.position;
2319 ByteIterator &operator=(const ByteIterator &other) {
2320 if (this != &other) {
2321 doc = other.doc;
2322 position = other.position;
2324 return *this;
2326 char operator*() const {
2327 return doc->CharAt(position);
2329 ByteIterator &operator++() {
2330 position++;
2331 return *this;
2333 ByteIterator operator++(int) {
2334 ByteIterator retVal(*this);
2335 position++;
2336 return retVal;
2338 ByteIterator &operator--() {
2339 position--;
2340 return *this;
2342 bool operator==(const ByteIterator &other) const {
2343 return doc == other.doc && position == other.position;
2345 bool operator!=(const ByteIterator &other) const {
2346 return doc != other.doc || position != other.position;
2348 int Pos() const {
2349 return position;
2351 int PosRoundUp() const {
2352 return position;
2356 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2357 // Would be better to use sizeof(wchar_t) or similar to differentiate
2358 // but easier for now to hard-code platforms.
2359 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2360 // appear to allow specializing basic_regex over these.
2362 #ifdef _WIN32
2363 #define WCHAR_T_IS_16 1
2364 #else
2365 #define WCHAR_T_IS_16 0
2366 #endif
2368 #if WCHAR_T_IS_16
2370 // On Windows, report non-BMP characters as 2 separate surrogates as that
2371 // matches wregex since it is based on wchar_t.
2372 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2373 // These 3 fields determine the iterator position and are used for comparisons
2374 const Document *doc;
2375 Position position;
2376 size_t characterIndex;
2377 // Remaining fields are derived from the determining fields so are excluded in comparisons
2378 unsigned int lenBytes;
2379 size_t lenCharacters;
2380 wchar_t buffered[2];
2381 public:
2382 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2383 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2384 buffered[0] = 0;
2385 buffered[1] = 0;
2386 if (doc) {
2387 ReadCharacter();
2390 UTF8Iterator(const UTF8Iterator &other) {
2391 doc = other.doc;
2392 position = other.position;
2393 characterIndex = other.characterIndex;
2394 lenBytes = other.lenBytes;
2395 lenCharacters = other.lenCharacters;
2396 buffered[0] = other.buffered[0];
2397 buffered[1] = other.buffered[1];
2399 UTF8Iterator &operator=(const UTF8Iterator &other) {
2400 if (this != &other) {
2401 doc = other.doc;
2402 position = other.position;
2403 characterIndex = other.characterIndex;
2404 lenBytes = other.lenBytes;
2405 lenCharacters = other.lenCharacters;
2406 buffered[0] = other.buffered[0];
2407 buffered[1] = other.buffered[1];
2409 return *this;
2411 wchar_t operator*() const {
2412 assert(lenCharacters != 0);
2413 return buffered[characterIndex];
2415 UTF8Iterator &operator++() {
2416 if ((characterIndex + 1) < (lenCharacters)) {
2417 characterIndex++;
2418 } else {
2419 position += lenBytes;
2420 ReadCharacter();
2421 characterIndex = 0;
2423 return *this;
2425 UTF8Iterator operator++(int) {
2426 UTF8Iterator retVal(*this);
2427 if ((characterIndex + 1) < (lenCharacters)) {
2428 characterIndex++;
2429 } else {
2430 position += lenBytes;
2431 ReadCharacter();
2432 characterIndex = 0;
2434 return retVal;
2436 UTF8Iterator &operator--() {
2437 if (characterIndex) {
2438 characterIndex--;
2439 } else {
2440 position = doc->NextPosition(position, -1);
2441 ReadCharacter();
2442 characterIndex = lenCharacters - 1;
2444 return *this;
2446 bool operator==(const UTF8Iterator &other) const {
2447 // Only test the determining fields, not the character widths and values derived from this
2448 return doc == other.doc &&
2449 position == other.position &&
2450 characterIndex == other.characterIndex;
2452 bool operator!=(const UTF8Iterator &other) const {
2453 // Only test the determining fields, not the character widths and values derived from this
2454 return doc != other.doc ||
2455 position != other.position ||
2456 characterIndex != other.characterIndex;
2458 int Pos() const {
2459 return position;
2461 int PosRoundUp() const {
2462 if (characterIndex)
2463 return position + lenBytes; // Force to end of character
2464 else
2465 return position;
2467 private:
2468 void ReadCharacter() {
2469 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2470 lenBytes = charExtracted.widthBytes;
2471 if (charExtracted.character == unicodeReplacementChar) {
2472 lenCharacters = 1;
2473 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2474 } else {
2475 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2480 #else
2482 // On Unix, report non-BMP characters as single characters
2484 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2485 const Document *doc;
2486 Position position;
2487 public:
2488 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2490 UTF8Iterator(const UTF8Iterator &other) {
2491 doc = other.doc;
2492 position = other.position;
2494 UTF8Iterator &operator=(const UTF8Iterator &other) {
2495 if (this != &other) {
2496 doc = other.doc;
2497 position = other.position;
2499 return *this;
2501 wchar_t operator*() const {
2502 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2503 return charExtracted.character;
2505 UTF8Iterator &operator++() {
2506 position = doc->NextPosition(position, 1);
2507 return *this;
2509 UTF8Iterator operator++(int) {
2510 UTF8Iterator retVal(*this);
2511 position = doc->NextPosition(position, 1);
2512 return retVal;
2514 UTF8Iterator &operator--() {
2515 position = doc->NextPosition(position, -1);
2516 return *this;
2518 bool operator==(const UTF8Iterator &other) const {
2519 return doc == other.doc && position == other.position;
2521 bool operator!=(const UTF8Iterator &other) const {
2522 return doc != other.doc || position != other.position;
2524 int Pos() const {
2525 return position;
2527 int PosRoundUp() const {
2528 return position;
2532 #endif
2534 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2535 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2536 if (!doc->IsLineStartPosition(startPos))
2537 flagsMatch |= std::regex_constants::match_not_bol;
2538 if (!doc->IsLineEndPosition(endPos))
2539 flagsMatch |= std::regex_constants::match_not_eol;
2540 return flagsMatch;
2543 template<typename Iterator, typename Regex>
2544 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2545 bool matched = false;
2546 std::match_results<Iterator> match;
2548 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2549 // If they didn't then the line by line iteration could be removed for the forwards
2550 // case and replaced with the following 4 lines:
2551 // Iterator uiStart(doc, startPos);
2552 // Iterator uiEnd(doc, endPos);
2553 // flagsMatch = MatchFlags(doc, startPos, endPos);
2554 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2556 // Line by line.
2557 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2558 const Range lineRange = resr.LineRange(line);
2559 Iterator itStart(doc, lineRange.start);
2560 Iterator itEnd(doc, lineRange.end);
2561 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2562 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2563 // Check for the last match on this line.
2564 if (matched) {
2565 if (resr.increment == -1) {
2566 while (matched) {
2567 Iterator itNext(doc, match[0].second.PosRoundUp());
2568 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2569 std::match_results<Iterator> matchNext;
2570 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2571 if (matched) {
2572 if (match[0].first == match[0].second) {
2573 // Empty match means failure so exit
2574 return false;
2576 match = matchNext;
2579 matched = true;
2581 break;
2584 if (matched) {
2585 for (size_t co = 0; co < match.size(); co++) {
2586 search.bopat[co] = match[co].first.Pos();
2587 search.eopat[co] = match[co].second.PosRoundUp();
2588 size_t lenMatch = search.eopat[co] - search.bopat[co];
2589 search.pat[co].resize(lenMatch);
2590 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2591 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2595 return matched;
2598 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2599 bool caseSensitive, int *length, RESearch &search) {
2600 const RESearchRange resr(doc, minPos, maxPos);
2601 try {
2602 //ElapsedTime et;
2603 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2604 // Flags that apper to have no effect:
2605 // | std::regex::collate | std::regex::extended;
2606 if (!caseSensitive)
2607 flagsRe = flagsRe | std::regex::icase;
2609 // Clear the RESearch so can fill in matches
2610 search.Clear();
2612 bool matched = false;
2613 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2614 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2615 std::vector<wchar_t> ws(lenS + 1);
2616 #if WCHAR_T_IS_16
2617 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2618 #else
2619 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2620 #endif
2621 ws[outLen] = 0;
2622 std::wregex regexp;
2623 #if defined(__APPLE__)
2624 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2625 // is one byte not one character.
2626 // However, on OS X this makes wregex act as Unicode
2627 std::locale localeU("en_US.UTF-8");
2628 regexp.imbue(localeU);
2629 #endif
2630 regexp.assign(&ws[0], flagsRe);
2631 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2633 } else {
2634 std::regex regexp;
2635 regexp.assign(s, flagsRe);
2636 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2639 int posMatch = -1;
2640 if (matched) {
2641 posMatch = search.bopat[0];
2642 *length = search.eopat[0] - search.bopat[0];
2644 // Example - search in doc/ScintillaHistory.html for
2645 // [[:upper:]]eta[[:space:]]
2646 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2647 //double durSearch = et.Duration(true);
2648 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2649 return posMatch;
2650 } catch (std::regex_error &) {
2651 // Failed to create regular expression
2652 throw RegexError();
2653 } catch (...) {
2654 // Failed in some other way
2655 return -1;
2659 #endif
2663 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2664 bool caseSensitive, bool, bool, int flags,
2665 int *length) {
2667 #ifdef CXX11_REGEX
2668 if (flags & SCFIND_CXX11REGEX) {
2669 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2670 caseSensitive, length, search);
2672 #endif
2674 const RESearchRange resr(doc, minPos, maxPos);
2676 const bool posix = (flags & SCFIND_POSIX) != 0;
2678 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2679 if (errmsg) {
2680 return -1;
2682 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2683 // Replace first '.' with '-' in each property file variable reference:
2684 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2685 // Replace: $(\1-\2)
2686 int pos = -1;
2687 int lenRet = 0;
2688 const char searchEnd = s[*length - 1];
2689 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2690 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2691 int startOfLine = doc->LineStart(line);
2692 int endOfLine = doc->LineEnd(line);
2693 if (resr.increment == 1) {
2694 if (line == resr.lineRangeStart) {
2695 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2696 continue; // Can't match start of line if start position after start of line
2697 startOfLine = resr.startPos;
2699 if (line == resr.lineRangeEnd) {
2700 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2701 continue; // Can't match end of line if end position before end of line
2702 endOfLine = resr.endPos;
2704 } else {
2705 if (line == resr.lineRangeEnd) {
2706 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2707 continue; // Can't match start of line if end position after start of line
2708 startOfLine = resr.endPos;
2710 if (line == resr.lineRangeStart) {
2711 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2712 continue; // Can't match end of line if start position before end of line
2713 endOfLine = resr.startPos;
2717 DocumentIndexer di(doc, endOfLine);
2718 int success = search.Execute(di, startOfLine, endOfLine);
2719 if (success) {
2720 pos = search.bopat[0];
2721 // Ensure only whole characters selected
2722 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2723 lenRet = search.eopat[0] - search.bopat[0];
2724 // There can be only one start of a line, so no need to look for last match in line
2725 if ((resr.increment == -1) && (s[0] != '^')) {
2726 // Check for the last match on this line.
2727 int repetitions = 1000; // Break out of infinite loop
2728 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2729 success = search.Execute(di, pos+1, endOfLine);
2730 if (success) {
2731 if (search.eopat[0] <= minPos) {
2732 pos = search.bopat[0];
2733 lenRet = search.eopat[0] - search.bopat[0];
2734 } else {
2735 success = 0;
2740 break;
2743 *length = lenRet;
2744 return pos;
2747 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2748 substituted.clear();
2749 DocumentIndexer di(doc, doc->Length());
2750 search.GrabMatches(di);
2751 for (int j = 0; j < *length; j++) {
2752 if (text[j] == '\\') {
2753 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2754 unsigned int patNum = text[j + 1] - '0';
2755 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2756 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2757 substituted.append(search.pat[patNum].c_str(), len);
2758 j++;
2759 } else {
2760 j++;
2761 switch (text[j]) {
2762 case 'a':
2763 substituted.push_back('\a');
2764 break;
2765 case 'b':
2766 substituted.push_back('\b');
2767 break;
2768 case 'f':
2769 substituted.push_back('\f');
2770 break;
2771 case 'n':
2772 substituted.push_back('\n');
2773 break;
2774 case 'r':
2775 substituted.push_back('\r');
2776 break;
2777 case 't':
2778 substituted.push_back('\t');
2779 break;
2780 case 'v':
2781 substituted.push_back('\v');
2782 break;
2783 case '\\':
2784 substituted.push_back('\\');
2785 break;
2786 default:
2787 substituted.push_back('\\');
2788 j--;
2791 } else {
2792 substituted.push_back(text[j]);
2795 *length = static_cast<int>(substituted.length());
2796 return substituted.c_str();
2799 #ifndef SCI_OWNREGEX
2801 #ifdef SCI_NAMESPACE
2803 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2804 return new BuiltinRegex(charClassTable);
2807 #else
2809 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2810 return new BuiltinRegex(charClassTable);
2813 #endif
2815 #endif