Update Scintilla to version 3.5.2
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blob8bb1b7ae64e5352be497cad1f7a3eedbc67d949c
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "CellBuffer.h"
33 #include "PerLine.h"
34 #include "CharClassify.h"
35 #include "Decoration.h"
36 #include "CaseFolder.h"
37 #include "Document.h"
38 #include "RESearch.h"
39 #include "UniConversion.h"
41 #ifdef SCI_NAMESPACE
42 using namespace Scintilla;
43 #endif
45 static inline bool IsPunctuation(char ch) {
46 return IsASCII(ch) && ispunct(ch);
49 void LexInterface::Colourise(int start, int end) {
50 if (pdoc && instance && !performingStyle) {
51 // Protect against reentrance, which may occur, for example, when
52 // fold points are discovered while performing styling and the folding
53 // code looks for child lines which may trigger styling.
54 performingStyle = true;
56 int lengthDoc = pdoc->Length();
57 if (end == -1)
58 end = lengthDoc;
59 int len = end - start;
61 PLATFORM_ASSERT(len >= 0);
62 PLATFORM_ASSERT(start + len <= lengthDoc);
64 int styleStart = 0;
65 if (start > 0)
66 styleStart = pdoc->StyleAt(start - 1);
68 if (len > 0) {
69 instance->Lex(start, len, styleStart, pdoc);
70 instance->Fold(start, len, styleStart, pdoc);
73 performingStyle = false;
77 int LexInterface::LineEndTypesSupported() {
78 if (instance) {
79 int interfaceVersion = instance->Version();
80 if (interfaceVersion >= lvSubStyles) {
81 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
82 return ssinstance->LineEndTypesSupported();
85 return 0;
88 Document::Document() {
89 refCount = 0;
90 pcf = NULL;
91 #ifdef _WIN32
92 eolMode = SC_EOL_CRLF;
93 #else
94 eolMode = SC_EOL_LF;
95 #endif
96 dbcsCodePage = 0;
97 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
98 endStyled = 0;
99 styleClock = 0;
100 enteredModification = 0;
101 enteredStyling = 0;
102 enteredReadOnlyCount = 0;
103 insertionSet = false;
104 tabInChars = 8;
105 indentInChars = 0;
106 actualIndentInChars = 8;
107 useTabs = true;
108 tabIndents = true;
109 backspaceUnindents = false;
111 matchesValid = false;
112 regex = 0;
114 UTF8BytesOfLeadInitialise();
116 perLineData[ldMarkers] = new LineMarkers();
117 perLineData[ldLevels] = new LineLevels();
118 perLineData[ldState] = new LineState();
119 perLineData[ldMargin] = new LineAnnotation();
120 perLineData[ldAnnotation] = new LineAnnotation();
122 cb.SetPerLine(this);
124 pli = 0;
127 Document::~Document() {
128 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
129 it->watcher->NotifyDeleted(this, it->userData);
131 for (int j=0; j<ldSize; j++) {
132 delete perLineData[j];
133 perLineData[j] = 0;
135 delete regex;
136 regex = 0;
137 delete pli;
138 pli = 0;
139 delete pcf;
140 pcf = 0;
143 void Document::Init() {
144 for (int j=0; j<ldSize; j++) {
145 if (perLineData[j])
146 perLineData[j]->Init();
150 int Document::LineEndTypesSupported() const {
151 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
152 return pli->LineEndTypesSupported();
153 else
154 return 0;
157 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
158 if (dbcsCodePage != dbcsCodePage_) {
159 dbcsCodePage = dbcsCodePage_;
160 SetCaseFolder(NULL);
161 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
162 return true;
163 } else {
164 return false;
168 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
169 if (lineEndBitSet != lineEndBitSet_) {
170 lineEndBitSet = lineEndBitSet_;
171 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
172 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
173 ModifiedAt(0);
174 cb.SetLineEndTypes(lineEndBitSetActive);
175 return true;
176 } else {
177 return false;
179 } else {
180 return false;
184 void Document::InsertLine(int line) {
185 for (int j=0; j<ldSize; j++) {
186 if (perLineData[j])
187 perLineData[j]->InsertLine(line);
191 void Document::RemoveLine(int line) {
192 for (int j=0; j<ldSize; j++) {
193 if (perLineData[j])
194 perLineData[j]->RemoveLine(line);
198 // Increase reference count and return its previous value.
199 int Document::AddRef() {
200 return refCount++;
203 // Decrease reference count and return its previous value.
204 // Delete the document if reference count reaches zero.
205 int SCI_METHOD Document::Release() {
206 int curRefCount = --refCount;
207 if (curRefCount == 0)
208 delete this;
209 return curRefCount;
212 void Document::SetSavePoint() {
213 cb.SetSavePoint();
214 NotifySavePoint(true);
217 void Document::TentativeUndo() {
218 CheckReadOnly();
219 if (enteredModification == 0) {
220 enteredModification++;
221 if (!cb.IsReadOnly()) {
222 bool startSavePoint = cb.IsSavePoint();
223 bool multiLine = false;
224 int steps = cb.TentativeSteps();
225 //Platform::DebugPrintf("Steps=%d\n", steps);
226 for (int step = 0; step < steps; step++) {
227 const int prevLinesTotal = LinesTotal();
228 const Action &action = cb.GetUndoStep();
229 if (action.at == removeAction) {
230 NotifyModified(DocModification(
231 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
232 } else if (action.at == containerAction) {
233 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
234 dm.token = action.position;
235 NotifyModified(dm);
236 } else {
237 NotifyModified(DocModification(
238 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
240 cb.PerformUndoStep();
241 if (action.at != containerAction) {
242 ModifiedAt(action.position);
245 int modFlags = SC_PERFORMED_UNDO;
246 // With undo, an insertion action becomes a deletion notification
247 if (action.at == removeAction) {
248 modFlags |= SC_MOD_INSERTTEXT;
249 } else if (action.at == insertAction) {
250 modFlags |= SC_MOD_DELETETEXT;
252 if (steps > 1)
253 modFlags |= SC_MULTISTEPUNDOREDO;
254 const int linesAdded = LinesTotal() - prevLinesTotal;
255 if (linesAdded != 0)
256 multiLine = true;
257 if (step == steps - 1) {
258 modFlags |= SC_LASTSTEPINUNDOREDO;
259 if (multiLine)
260 modFlags |= SC_MULTILINEUNDOREDO;
262 NotifyModified(DocModification(modFlags, action.position, action.lenData,
263 linesAdded, action.data));
266 bool endSavePoint = cb.IsSavePoint();
267 if (startSavePoint != endSavePoint)
268 NotifySavePoint(endSavePoint);
270 cb.TentativeCommit();
272 enteredModification--;
276 int Document::GetMark(int line) {
277 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
280 int Document::MarkerNext(int lineStart, int mask) const {
281 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
284 int Document::AddMark(int line, int markerNum) {
285 if (line >= 0 && line <= LinesTotal()) {
286 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
287 AddMark(line, markerNum, LinesTotal());
288 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
289 NotifyModified(mh);
290 return prev;
291 } else {
292 return 0;
296 void Document::AddMarkSet(int line, int valueSet) {
297 if (line < 0 || line > LinesTotal()) {
298 return;
300 unsigned int m = valueSet;
301 for (int i = 0; m; i++, m >>= 1)
302 if (m & 1)
303 static_cast<LineMarkers *>(perLineData[ldMarkers])->
304 AddMark(line, i, LinesTotal());
305 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
306 NotifyModified(mh);
309 void Document::DeleteMark(int line, int markerNum) {
310 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
311 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
312 NotifyModified(mh);
315 void Document::DeleteMarkFromHandle(int markerHandle) {
316 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
317 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
318 mh.line = -1;
319 NotifyModified(mh);
322 void Document::DeleteAllMarks(int markerNum) {
323 bool someChanges = false;
324 for (int line = 0; line < LinesTotal(); line++) {
325 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
326 someChanges = true;
328 if (someChanges) {
329 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
330 mh.line = -1;
331 NotifyModified(mh);
335 int Document::LineFromHandle(int markerHandle) {
336 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
339 int SCI_METHOD Document::LineStart(int line) const {
340 return cb.LineStart(line);
343 bool Document::IsLineStartPosition(int position) const {
344 return LineStart(LineFromPosition(position)) == position;
347 int SCI_METHOD Document::LineEnd(int line) const {
348 if (line >= LinesTotal() - 1) {
349 return LineStart(line + 1);
350 } else {
351 int position = LineStart(line + 1);
352 if (SC_CP_UTF8 == dbcsCodePage) {
353 unsigned char bytes[] = {
354 static_cast<unsigned char>(cb.CharAt(position-3)),
355 static_cast<unsigned char>(cb.CharAt(position-2)),
356 static_cast<unsigned char>(cb.CharAt(position-1)),
358 if (UTF8IsSeparator(bytes)) {
359 return position - UTF8SeparatorLength;
361 if (UTF8IsNEL(bytes+1)) {
362 return position - UTF8NELLength;
365 position--; // Back over CR or LF
366 // When line terminator is CR+LF, may need to go back one more
367 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
368 position--;
370 return position;
374 void SCI_METHOD Document::SetErrorStatus(int status) {
375 // Tell the watchers an error has occurred.
376 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
377 it->watcher->NotifyErrorOccurred(this, it->userData, status);
381 int SCI_METHOD Document::LineFromPosition(int pos) const {
382 return cb.LineFromPosition(pos);
385 int Document::LineEndPosition(int position) const {
386 return LineEnd(LineFromPosition(position));
389 bool Document::IsLineEndPosition(int position) const {
390 return LineEnd(LineFromPosition(position)) == position;
393 bool Document::IsPositionInLineEnd(int position) const {
394 return position >= LineEnd(LineFromPosition(position));
397 int Document::VCHomePosition(int position) const {
398 int line = LineFromPosition(position);
399 int startPosition = LineStart(line);
400 int endLine = LineEnd(line);
401 int startText = startPosition;
402 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
403 startText++;
404 if (position == startText)
405 return startPosition;
406 else
407 return startText;
410 int SCI_METHOD Document::SetLevel(int line, int level) {
411 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
412 if (prev != level) {
413 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
414 LineStart(line), 0, 0, 0, line);
415 mh.foldLevelNow = level;
416 mh.foldLevelPrev = prev;
417 NotifyModified(mh);
419 return prev;
422 int SCI_METHOD Document::GetLevel(int line) const {
423 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
426 void Document::ClearLevels() {
427 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
430 static bool IsSubordinate(int levelStart, int levelTry) {
431 if (levelTry & SC_FOLDLEVELWHITEFLAG)
432 return true;
433 else
434 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
437 int Document::GetLastChild(int lineParent, int level, int lastLine) {
438 if (level == -1)
439 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
440 int maxLine = LinesTotal();
441 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
442 int lineMaxSubord = lineParent;
443 while (lineMaxSubord < maxLine - 1) {
444 EnsureStyledTo(LineStart(lineMaxSubord + 2));
445 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
446 break;
447 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
448 break;
449 lineMaxSubord++;
451 if (lineMaxSubord > lineParent) {
452 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
453 // Have chewed up some whitespace that belongs to a parent so seek back
454 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
455 lineMaxSubord--;
459 return lineMaxSubord;
462 int Document::GetFoldParent(int line) const {
463 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
464 int lineLook = line - 1;
465 while ((lineLook > 0) && (
466 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
467 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
469 lineLook--;
471 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
472 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
473 return lineLook;
474 } else {
475 return -1;
479 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
480 int level = GetLevel(line);
481 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
483 int lookLine = line;
484 int lookLineLevel = level;
485 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
486 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
487 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
488 lookLineLevel = GetLevel(--lookLine);
489 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
492 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
493 if (beginFoldBlock == -1) {
494 highlightDelimiter.Clear();
495 return;
498 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
499 int firstChangeableLineBefore = -1;
500 if (endFoldBlock < line) {
501 lookLine = beginFoldBlock - 1;
502 lookLineLevel = GetLevel(lookLine);
503 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
504 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
505 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
506 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
507 beginFoldBlock = lookLine;
508 endFoldBlock = line;
509 firstChangeableLineBefore = line - 1;
512 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
513 break;
514 lookLineLevel = GetLevel(--lookLine);
515 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
518 if (firstChangeableLineBefore == -1) {
519 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
520 lookLine >= beginFoldBlock;
521 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
522 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
523 firstChangeableLineBefore = lookLine;
524 break;
528 if (firstChangeableLineBefore == -1)
529 firstChangeableLineBefore = beginFoldBlock - 1;
531 int firstChangeableLineAfter = -1;
532 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
533 lookLine <= endFoldBlock;
534 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
535 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
536 firstChangeableLineAfter = lookLine;
537 break;
540 if (firstChangeableLineAfter == -1)
541 firstChangeableLineAfter = endFoldBlock + 1;
543 highlightDelimiter.beginFoldBlock = beginFoldBlock;
544 highlightDelimiter.endFoldBlock = endFoldBlock;
545 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
546 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
549 int Document::ClampPositionIntoDocument(int pos) const {
550 return Platform::Clamp(pos, 0, Length());
553 bool Document::IsCrLf(int pos) const {
554 if (pos < 0)
555 return false;
556 if (pos >= (Length() - 1))
557 return false;
558 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
561 int Document::LenChar(int pos) {
562 if (pos < 0) {
563 return 1;
564 } else if (IsCrLf(pos)) {
565 return 2;
566 } else if (SC_CP_UTF8 == dbcsCodePage) {
567 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
568 const int widthCharBytes = UTF8BytesOfLead[leadByte];
569 int lengthDoc = Length();
570 if ((pos + widthCharBytes) > lengthDoc)
571 return lengthDoc - pos;
572 else
573 return widthCharBytes;
574 } else if (dbcsCodePage) {
575 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
576 } else {
577 return 1;
581 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
582 int trail = pos;
583 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
584 trail--;
585 start = (trail > 0) ? trail-1 : trail;
587 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
588 const int widthCharBytes = UTF8BytesOfLead[leadByte];
589 if (widthCharBytes == 1) {
590 return false;
591 } else {
592 int trailBytes = widthCharBytes - 1;
593 int len = pos - start;
594 if (len > trailBytes)
595 // pos too far from lead
596 return false;
597 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
598 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
599 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
600 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
601 if (utf8status & UTF8MaskInvalid)
602 return false;
603 end = start + widthCharBytes;
604 return true;
608 // Normalise a position so that it is not halfway through a two byte character.
609 // This can occur in two situations -
610 // When lines are terminated with \r\n pairs which should be treated as one character.
611 // When displaying DBCS text such as Japanese.
612 // If moving, move the position in the indicated direction.
613 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
614 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
615 // If out of range, just return minimum/maximum value.
616 if (pos <= 0)
617 return 0;
618 if (pos >= Length())
619 return Length();
621 // PLATFORM_ASSERT(pos > 0 && pos < Length());
622 if (checkLineEnd && IsCrLf(pos - 1)) {
623 if (moveDir > 0)
624 return pos + 1;
625 else
626 return pos - 1;
629 if (dbcsCodePage) {
630 if (SC_CP_UTF8 == dbcsCodePage) {
631 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
632 // If ch is not a trail byte then pos is valid intercharacter position
633 if (UTF8IsTrailByte(ch)) {
634 int startUTF = pos;
635 int endUTF = pos;
636 if (InGoodUTF8(pos, startUTF, endUTF)) {
637 // ch is a trail byte within a UTF-8 character
638 if (moveDir > 0)
639 pos = endUTF;
640 else
641 pos = startUTF;
643 // Else invalid UTF-8 so return position of isolated trail byte
645 } else {
646 // Anchor DBCS calculations at start of line because start of line can
647 // not be a DBCS trail byte.
648 int posStartLine = LineStart(LineFromPosition(pos));
649 if (pos == posStartLine)
650 return pos;
652 // Step back until a non-lead-byte is found.
653 int posCheck = pos;
654 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
655 posCheck--;
657 // Check from known start of character.
658 while (posCheck < pos) {
659 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
660 if (posCheck + mbsize == pos) {
661 return pos;
662 } else if (posCheck + mbsize > pos) {
663 if (moveDir > 0) {
664 return posCheck + mbsize;
665 } else {
666 return posCheck;
669 posCheck += mbsize;
674 return pos;
677 // NextPosition moves between valid positions - it can not handle a position in the middle of a
678 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
679 // A \r\n pair is treated as two characters.
680 int Document::NextPosition(int pos, int moveDir) const {
681 // If out of range, just return minimum/maximum value.
682 int increment = (moveDir > 0) ? 1 : -1;
683 if (pos + increment <= 0)
684 return 0;
685 if (pos + increment >= Length())
686 return Length();
688 if (dbcsCodePage) {
689 if (SC_CP_UTF8 == dbcsCodePage) {
690 if (increment == 1) {
691 // Simple forward movement case so can avoid some checks
692 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
693 if (UTF8IsAscii(leadByte)) {
694 // Single byte character or invalid
695 pos++;
696 } else {
697 const int widthCharBytes = UTF8BytesOfLead[leadByte];
698 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
699 for (int b=1; b<widthCharBytes; b++)
700 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
701 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
702 if (utf8status & UTF8MaskInvalid)
703 pos++;
704 else
705 pos += utf8status & UTF8MaskWidth;
707 } else {
708 // Examine byte before position
709 pos--;
710 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
711 // If ch is not a trail byte then pos is valid intercharacter position
712 if (UTF8IsTrailByte(ch)) {
713 // If ch is a trail byte in a valid UTF-8 character then return start of character
714 int startUTF = pos;
715 int endUTF = pos;
716 if (InGoodUTF8(pos, startUTF, endUTF)) {
717 pos = startUTF;
719 // Else invalid UTF-8 so return position of isolated trail byte
722 } else {
723 if (moveDir > 0) {
724 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
725 pos += mbsize;
726 if (pos > Length())
727 pos = Length();
728 } else {
729 // Anchor DBCS calculations at start of line because start of line can
730 // not be a DBCS trail byte.
731 int posStartLine = LineStart(LineFromPosition(pos));
732 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
733 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
734 if ((pos - 1) <= posStartLine) {
735 return pos - 1;
736 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
737 // Must actually be trail byte
738 return pos - 2;
739 } else {
740 // Otherwise, step back until a non-lead-byte is found.
741 int posTemp = pos - 1;
742 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
744 // Now posTemp+1 must point to the beginning of a character,
745 // so figure out whether we went back an even or an odd
746 // number of bytes and go back 1 or 2 bytes, respectively.
747 return (pos - 1 - ((pos - posTemp) & 1));
751 } else {
752 pos += increment;
755 return pos;
758 bool Document::NextCharacter(int &pos, int moveDir) const {
759 // Returns true if pos changed
760 int posNext = NextPosition(pos, moveDir);
761 if (posNext == pos) {
762 return false;
763 } else {
764 pos = posNext;
765 return true;
769 static inline int UnicodeFromBytes(const unsigned char *us) {
770 if (us[0] < 0xC2) {
771 return us[0];
772 } else if (us[0] < 0xE0) {
773 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
774 } else if (us[0] < 0xF0) {
775 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
776 } else if (us[0] < 0xF5) {
777 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
779 return us[0];
782 // Return -1 on out-of-bounds
783 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
784 int pos = positionStart;
785 if (dbcsCodePage) {
786 const int increment = (characterOffset > 0) ? 1 : -1;
787 while (characterOffset != 0) {
788 const int posNext = NextPosition(pos, increment);
789 if (posNext == pos)
790 return INVALID_POSITION;
791 pos = posNext;
792 characterOffset -= increment;
794 } else {
795 pos = positionStart + characterOffset;
796 if ((pos < 0) || (pos > Length()))
797 return INVALID_POSITION;
799 return pos;
802 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
803 int character;
804 int bytesInCharacter = 1;
805 if (dbcsCodePage) {
806 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
807 if (SC_CP_UTF8 == dbcsCodePage) {
808 if (UTF8IsAscii(leadByte)) {
809 // Single byte character or invalid
810 character = leadByte;
811 } else {
812 const int widthCharBytes = UTF8BytesOfLead[leadByte];
813 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
814 for (int b=1; b<widthCharBytes; b++)
815 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
816 int utf8status = UTF8Classify(charBytes, widthCharBytes);
817 if (utf8status & UTF8MaskInvalid) {
818 // Report as singleton surrogate values which are invalid Unicode
819 character = 0xDC80 + leadByte;
820 } else {
821 bytesInCharacter = utf8status & UTF8MaskWidth;
822 character = UnicodeFromBytes(charBytes);
825 } else {
826 if (IsDBCSLeadByte(leadByte)) {
827 bytesInCharacter = 2;
828 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
829 } else {
830 character = leadByte;
833 } else {
834 character = cb.CharAt(position);
836 if (pWidth) {
837 *pWidth = bytesInCharacter;
839 return character;
842 int SCI_METHOD Document::CodePage() const {
843 return dbcsCodePage;
846 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
847 // Byte ranges found in Wikipedia articles with relevant search strings in each case
848 unsigned char uch = static_cast<unsigned char>(ch);
849 switch (dbcsCodePage) {
850 case 932:
851 // Shift_jis
852 return ((uch >= 0x81) && (uch <= 0x9F)) ||
853 ((uch >= 0xE0) && (uch <= 0xFC));
854 // Lead bytes F0 to FC may be a Microsoft addition.
855 case 936:
856 // GBK
857 return (uch >= 0x81) && (uch <= 0xFE);
858 case 949:
859 // Korean Wansung KS C-5601-1987
860 return (uch >= 0x81) && (uch <= 0xFE);
861 case 950:
862 // Big5
863 return (uch >= 0x81) && (uch <= 0xFE);
864 case 1361:
865 // Korean Johab KS C-5601-1992
866 return
867 ((uch >= 0x84) && (uch <= 0xD3)) ||
868 ((uch >= 0xD8) && (uch <= 0xDE)) ||
869 ((uch >= 0xE0) && (uch <= 0xF9));
871 return false;
874 static inline bool IsSpaceOrTab(int ch) {
875 return ch == ' ' || ch == '\t';
878 // Need to break text into segments near lengthSegment but taking into
879 // account the encoding to not break inside a UTF-8 or DBCS character
880 // and also trying to avoid breaking inside a pair of combining characters.
881 // The segment length must always be long enough (more than 4 bytes)
882 // so that there will be at least one whole character to make a segment.
883 // For UTF-8, text must consist only of valid whole characters.
884 // In preference order from best to worst:
885 // 1) Break after space
886 // 2) Break before punctuation
887 // 3) Break after whole character
889 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
890 if (length <= lengthSegment)
891 return length;
892 int lastSpaceBreak = -1;
893 int lastPunctuationBreak = -1;
894 int lastEncodingAllowedBreak = 0;
895 for (int j=0; j < lengthSegment;) {
896 unsigned char ch = static_cast<unsigned char>(text[j]);
897 if (j > 0) {
898 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
899 lastSpaceBreak = j;
901 if (ch < 'A') {
902 lastPunctuationBreak = j;
905 lastEncodingAllowedBreak = j;
907 if (dbcsCodePage == SC_CP_UTF8) {
908 j += UTF8BytesOfLead[ch];
909 } else if (dbcsCodePage) {
910 j += IsDBCSLeadByte(ch) ? 2 : 1;
911 } else {
912 j++;
915 if (lastSpaceBreak >= 0) {
916 return lastSpaceBreak;
917 } else if (lastPunctuationBreak >= 0) {
918 return lastPunctuationBreak;
920 return lastEncodingAllowedBreak;
923 EncodingFamily Document::CodePageFamily() const {
924 if (SC_CP_UTF8 == dbcsCodePage)
925 return efUnicode;
926 else if (dbcsCodePage)
927 return efDBCS;
928 else
929 return efEightBit;
932 void Document::ModifiedAt(int pos) {
933 if (endStyled > pos)
934 endStyled = pos;
937 void Document::CheckReadOnly() {
938 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
939 enteredReadOnlyCount++;
940 NotifyModifyAttempt();
941 enteredReadOnlyCount--;
945 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
946 // SetStyleAt does not change the persistent state of a document
948 bool Document::DeleteChars(int pos, int len) {
949 if (pos < 0)
950 return false;
951 if (len <= 0)
952 return false;
953 if ((pos + len) > Length())
954 return false;
955 CheckReadOnly();
956 if (enteredModification != 0) {
957 return false;
958 } else {
959 enteredModification++;
960 if (!cb.IsReadOnly()) {
961 NotifyModified(
962 DocModification(
963 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
964 pos, len,
965 0, 0));
966 int prevLinesTotal = LinesTotal();
967 bool startSavePoint = cb.IsSavePoint();
968 bool startSequence = false;
969 const char *text = cb.DeleteChars(pos, len, startSequence);
970 if (startSavePoint && cb.IsCollectingUndo())
971 NotifySavePoint(!startSavePoint);
972 if ((pos < Length()) || (pos == 0))
973 ModifiedAt(pos);
974 else
975 ModifiedAt(pos-1);
976 NotifyModified(
977 DocModification(
978 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
979 pos, len,
980 LinesTotal() - prevLinesTotal, text));
982 enteredModification--;
984 return !cb.IsReadOnly();
988 * Insert a string with a length.
990 int Document::InsertString(int position, const char *s, int insertLength) {
991 if (insertLength <= 0) {
992 return 0;
994 CheckReadOnly(); // Application may change read only state here
995 if (cb.IsReadOnly()) {
996 return 0;
998 if (enteredModification != 0) {
999 return 0;
1001 enteredModification++;
1002 insertionSet = false;
1003 insertion.clear();
1004 NotifyModified(
1005 DocModification(
1006 SC_MOD_INSERTCHECK,
1007 position, insertLength,
1008 0, s));
1009 if (insertionSet) {
1010 s = insertion.c_str();
1011 insertLength = static_cast<int>(insertion.length());
1013 NotifyModified(
1014 DocModification(
1015 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1016 position, insertLength,
1017 0, s));
1018 int prevLinesTotal = LinesTotal();
1019 bool startSavePoint = cb.IsSavePoint();
1020 bool startSequence = false;
1021 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1022 if (startSavePoint && cb.IsCollectingUndo())
1023 NotifySavePoint(!startSavePoint);
1024 ModifiedAt(position);
1025 NotifyModified(
1026 DocModification(
1027 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1028 position, insertLength,
1029 LinesTotal() - prevLinesTotal, text));
1030 if (insertionSet) { // Free memory as could be large
1031 std::string().swap(insertion);
1033 enteredModification--;
1034 return insertLength;
1037 void Document::ChangeInsertion(const char *s, int length) {
1038 insertionSet = true;
1039 insertion.assign(s, length);
1042 int SCI_METHOD Document::AddData(char *data, int length) {
1043 try {
1044 int position = Length();
1045 InsertString(position, data, length);
1046 } catch (std::bad_alloc &) {
1047 return SC_STATUS_BADALLOC;
1048 } catch (...) {
1049 return SC_STATUS_FAILURE;
1051 return 0;
1054 void * SCI_METHOD Document::ConvertToDocument() {
1055 return this;
1058 int Document::Undo() {
1059 int newPos = -1;
1060 CheckReadOnly();
1061 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1062 enteredModification++;
1063 if (!cb.IsReadOnly()) {
1064 bool startSavePoint = cb.IsSavePoint();
1065 bool multiLine = false;
1066 int steps = cb.StartUndo();
1067 //Platform::DebugPrintf("Steps=%d\n", steps);
1068 int coalescedRemovePos = -1;
1069 int coalescedRemoveLen = 0;
1070 int prevRemoveActionPos = -1;
1071 int prevRemoveActionLen = 0;
1072 for (int step = 0; step < steps; step++) {
1073 const int prevLinesTotal = LinesTotal();
1074 const Action &action = cb.GetUndoStep();
1075 if (action.at == removeAction) {
1076 NotifyModified(DocModification(
1077 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1078 } else if (action.at == containerAction) {
1079 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1080 dm.token = action.position;
1081 NotifyModified(dm);
1082 if (!action.mayCoalesce) {
1083 coalescedRemovePos = -1;
1084 coalescedRemoveLen = 0;
1085 prevRemoveActionPos = -1;
1086 prevRemoveActionLen = 0;
1088 } else {
1089 NotifyModified(DocModification(
1090 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1092 cb.PerformUndoStep();
1093 if (action.at != containerAction) {
1094 ModifiedAt(action.position);
1095 newPos = action.position;
1098 int modFlags = SC_PERFORMED_UNDO;
1099 // With undo, an insertion action becomes a deletion notification
1100 if (action.at == removeAction) {
1101 newPos += action.lenData;
1102 modFlags |= SC_MOD_INSERTTEXT;
1103 if ((coalescedRemoveLen > 0) &&
1104 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1105 coalescedRemoveLen += action.lenData;
1106 newPos = coalescedRemovePos + coalescedRemoveLen;
1107 } else {
1108 coalescedRemovePos = action.position;
1109 coalescedRemoveLen = action.lenData;
1111 prevRemoveActionPos = action.position;
1112 prevRemoveActionLen = action.lenData;
1113 } else if (action.at == insertAction) {
1114 modFlags |= SC_MOD_DELETETEXT;
1115 coalescedRemovePos = -1;
1116 coalescedRemoveLen = 0;
1117 prevRemoveActionPos = -1;
1118 prevRemoveActionLen = 0;
1120 if (steps > 1)
1121 modFlags |= SC_MULTISTEPUNDOREDO;
1122 const int linesAdded = LinesTotal() - prevLinesTotal;
1123 if (linesAdded != 0)
1124 multiLine = true;
1125 if (step == steps - 1) {
1126 modFlags |= SC_LASTSTEPINUNDOREDO;
1127 if (multiLine)
1128 modFlags |= SC_MULTILINEUNDOREDO;
1130 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1131 linesAdded, action.data));
1134 bool endSavePoint = cb.IsSavePoint();
1135 if (startSavePoint != endSavePoint)
1136 NotifySavePoint(endSavePoint);
1138 enteredModification--;
1140 return newPos;
1143 int Document::Redo() {
1144 int newPos = -1;
1145 CheckReadOnly();
1146 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1147 enteredModification++;
1148 if (!cb.IsReadOnly()) {
1149 bool startSavePoint = cb.IsSavePoint();
1150 bool multiLine = false;
1151 int steps = cb.StartRedo();
1152 for (int step = 0; step < steps; step++) {
1153 const int prevLinesTotal = LinesTotal();
1154 const Action &action = cb.GetRedoStep();
1155 if (action.at == insertAction) {
1156 NotifyModified(DocModification(
1157 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1158 } else if (action.at == containerAction) {
1159 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1160 dm.token = action.position;
1161 NotifyModified(dm);
1162 } else {
1163 NotifyModified(DocModification(
1164 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1166 cb.PerformRedoStep();
1167 if (action.at != containerAction) {
1168 ModifiedAt(action.position);
1169 newPos = action.position;
1172 int modFlags = SC_PERFORMED_REDO;
1173 if (action.at == insertAction) {
1174 newPos += action.lenData;
1175 modFlags |= SC_MOD_INSERTTEXT;
1176 } else if (action.at == removeAction) {
1177 modFlags |= SC_MOD_DELETETEXT;
1179 if (steps > 1)
1180 modFlags |= SC_MULTISTEPUNDOREDO;
1181 const int linesAdded = LinesTotal() - prevLinesTotal;
1182 if (linesAdded != 0)
1183 multiLine = true;
1184 if (step == steps - 1) {
1185 modFlags |= SC_LASTSTEPINUNDOREDO;
1186 if (multiLine)
1187 modFlags |= SC_MULTILINEUNDOREDO;
1189 NotifyModified(
1190 DocModification(modFlags, action.position, action.lenData,
1191 linesAdded, action.data));
1194 bool endSavePoint = cb.IsSavePoint();
1195 if (startSavePoint != endSavePoint)
1196 NotifySavePoint(endSavePoint);
1198 enteredModification--;
1200 return newPos;
1203 void Document::DelChar(int pos) {
1204 DeleteChars(pos, LenChar(pos));
1207 void Document::DelCharBack(int pos) {
1208 if (pos <= 0) {
1209 return;
1210 } else if (IsCrLf(pos - 2)) {
1211 DeleteChars(pos - 2, 2);
1212 } else if (dbcsCodePage) {
1213 int startChar = NextPosition(pos, -1);
1214 DeleteChars(startChar, pos - startChar);
1215 } else {
1216 DeleteChars(pos - 1, 1);
1220 static int NextTab(int pos, int tabSize) {
1221 return ((pos / tabSize) + 1) * tabSize;
1224 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1225 std::string indentation;
1226 if (!insertSpaces) {
1227 while (indent >= tabSize) {
1228 indentation += '\t';
1229 indent -= tabSize;
1232 while (indent > 0) {
1233 indentation += ' ';
1234 indent--;
1236 return indentation;
1239 int SCI_METHOD Document::GetLineIndentation(int line) {
1240 int indent = 0;
1241 if ((line >= 0) && (line < LinesTotal())) {
1242 int lineStart = LineStart(line);
1243 int length = Length();
1244 for (int i = lineStart; i < length; i++) {
1245 char ch = cb.CharAt(i);
1246 if (ch == ' ')
1247 indent++;
1248 else if (ch == '\t')
1249 indent = NextTab(indent, tabInChars);
1250 else
1251 return indent;
1254 return indent;
1257 int Document::SetLineIndentation(int line, int indent) {
1258 int indentOfLine = GetLineIndentation(line);
1259 if (indent < 0)
1260 indent = 0;
1261 if (indent != indentOfLine) {
1262 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1263 int thisLineStart = LineStart(line);
1264 int indentPos = GetLineIndentPosition(line);
1265 UndoGroup ug(this);
1266 DeleteChars(thisLineStart, indentPos - thisLineStart);
1267 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1268 static_cast<int>(linebuf.length()));
1269 } else {
1270 return GetLineIndentPosition(line);
1274 int Document::GetLineIndentPosition(int line) const {
1275 if (line < 0)
1276 return 0;
1277 int pos = LineStart(line);
1278 int length = Length();
1279 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1280 pos++;
1282 return pos;
1285 int Document::GetColumn(int pos) {
1286 int column = 0;
1287 int line = LineFromPosition(pos);
1288 if ((line >= 0) && (line < LinesTotal())) {
1289 for (int i = LineStart(line); i < pos;) {
1290 char ch = cb.CharAt(i);
1291 if (ch == '\t') {
1292 column = NextTab(column, tabInChars);
1293 i++;
1294 } else if (ch == '\r') {
1295 return column;
1296 } else if (ch == '\n') {
1297 return column;
1298 } else if (i >= Length()) {
1299 return column;
1300 } else {
1301 column++;
1302 i = NextPosition(i, 1);
1306 return column;
1309 int Document::CountCharacters(int startPos, int endPos) const {
1310 startPos = MovePositionOutsideChar(startPos, 1, false);
1311 endPos = MovePositionOutsideChar(endPos, -1, false);
1312 int count = 0;
1313 int i = startPos;
1314 while (i < endPos) {
1315 count++;
1316 if (IsCrLf(i))
1317 i++;
1318 i = NextPosition(i, 1);
1320 return count;
1323 int Document::FindColumn(int line, int column) {
1324 int position = LineStart(line);
1325 if ((line >= 0) && (line < LinesTotal())) {
1326 int columnCurrent = 0;
1327 while ((columnCurrent < column) && (position < Length())) {
1328 char ch = cb.CharAt(position);
1329 if (ch == '\t') {
1330 columnCurrent = NextTab(columnCurrent, tabInChars);
1331 if (columnCurrent > column)
1332 return position;
1333 position++;
1334 } else if (ch == '\r') {
1335 return position;
1336 } else if (ch == '\n') {
1337 return position;
1338 } else {
1339 columnCurrent++;
1340 position = NextPosition(position, 1);
1344 return position;
1347 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1348 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1349 for (int line = lineBottom; line >= lineTop; line--) {
1350 int indentOfLine = GetLineIndentation(line);
1351 if (forwards) {
1352 if (LineStart(line) < LineEnd(line)) {
1353 SetLineIndentation(line, indentOfLine + IndentSize());
1355 } else {
1356 SetLineIndentation(line, indentOfLine - IndentSize());
1361 // Convert line endings for a piece of text to a particular mode.
1362 // Stop at len or when a NUL is found.
1363 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1364 std::string dest;
1365 for (size_t i = 0; (i < len) && (s[i]); i++) {
1366 if (s[i] == '\n' || s[i] == '\r') {
1367 if (eolModeWanted == SC_EOL_CR) {
1368 dest.push_back('\r');
1369 } else if (eolModeWanted == SC_EOL_LF) {
1370 dest.push_back('\n');
1371 } else { // eolModeWanted == SC_EOL_CRLF
1372 dest.push_back('\r');
1373 dest.push_back('\n');
1375 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1376 i++;
1378 } else {
1379 dest.push_back(s[i]);
1382 return dest;
1385 void Document::ConvertLineEnds(int eolModeSet) {
1386 UndoGroup ug(this);
1388 for (int pos = 0; pos < Length(); pos++) {
1389 if (cb.CharAt(pos) == '\r') {
1390 if (cb.CharAt(pos + 1) == '\n') {
1391 // CRLF
1392 if (eolModeSet == SC_EOL_CR) {
1393 DeleteChars(pos + 1, 1); // Delete the LF
1394 } else if (eolModeSet == SC_EOL_LF) {
1395 DeleteChars(pos, 1); // Delete the CR
1396 } else {
1397 pos++;
1399 } else {
1400 // CR
1401 if (eolModeSet == SC_EOL_CRLF) {
1402 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1403 } else if (eolModeSet == SC_EOL_LF) {
1404 pos += InsertString(pos, "\n", 1); // Insert LF
1405 DeleteChars(pos, 1); // Delete CR
1406 pos--;
1409 } else if (cb.CharAt(pos) == '\n') {
1410 // LF
1411 if (eolModeSet == SC_EOL_CRLF) {
1412 pos += InsertString(pos, "\r", 1); // Insert CR
1413 } else if (eolModeSet == SC_EOL_CR) {
1414 pos += InsertString(pos, "\r", 1); // Insert CR
1415 DeleteChars(pos, 1); // Delete LF
1416 pos--;
1423 bool Document::IsWhiteLine(int line) const {
1424 int currentChar = LineStart(line);
1425 int endLine = LineEnd(line);
1426 while (currentChar < endLine) {
1427 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1428 return false;
1430 ++currentChar;
1432 return true;
1435 int Document::ParaUp(int pos) const {
1436 int line = LineFromPosition(pos);
1437 line--;
1438 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1439 line--;
1441 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1442 line--;
1444 line++;
1445 return LineStart(line);
1448 int Document::ParaDown(int pos) const {
1449 int line = LineFromPosition(pos);
1450 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1451 line++;
1453 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1454 line++;
1456 if (line < LinesTotal())
1457 return LineStart(line);
1458 else // end of a document
1459 return LineEnd(line-1);
1462 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1463 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1464 return CharClassify::ccWord;
1465 return charClass.GetClass(ch);
1469 * Used by commmands that want to select whole words.
1470 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1472 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1473 CharClassify::cc ccStart = CharClassify::ccWord;
1474 if (delta < 0) {
1475 if (!onlyWordCharacters)
1476 ccStart = WordCharClass(cb.CharAt(pos-1));
1477 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1478 pos--;
1479 } else {
1480 if (!onlyWordCharacters && pos < Length())
1481 ccStart = WordCharClass(cb.CharAt(pos));
1482 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1483 pos++;
1485 return MovePositionOutsideChar(pos, delta, true);
1489 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1490 * (delta < 0).
1491 * This is looking for a transition between character classes although there is also some
1492 * additional movement to transit white space.
1493 * Used by cursor movement by word commands.
1495 int Document::NextWordStart(int pos, int delta) {
1496 if (delta < 0) {
1497 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1498 pos--;
1499 if (pos > 0) {
1500 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1501 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1502 pos--;
1505 } else {
1506 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1507 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1508 pos++;
1509 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1510 pos++;
1512 return pos;
1516 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1517 * (delta < 0).
1518 * This is looking for a transition between character classes although there is also some
1519 * additional movement to transit white space.
1520 * Used by cursor movement by word commands.
1522 int Document::NextWordEnd(int pos, int delta) {
1523 if (delta < 0) {
1524 if (pos > 0) {
1525 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1526 if (ccStart != CharClassify::ccSpace) {
1527 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1528 pos--;
1531 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1532 pos--;
1535 } else {
1536 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1537 pos++;
1539 if (pos < Length()) {
1540 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1541 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1542 pos++;
1546 return pos;
1550 * Check that the character at the given position is a word or punctuation character and that
1551 * the previous character is of a different character class.
1553 bool Document::IsWordStartAt(int pos) const {
1554 if (pos > 0) {
1555 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1556 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1557 (ccPos != WordCharClass(CharAt(pos - 1)));
1559 return true;
1563 * Check that the character at the given position is a word or punctuation character and that
1564 * the next character is of a different character class.
1566 bool Document::IsWordEndAt(int pos) const {
1567 if (pos < Length()) {
1568 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1569 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1570 (ccPrev != WordCharClass(CharAt(pos)));
1572 return true;
1576 * Check that the given range is has transitions between character classes at both
1577 * ends and where the characters on the inside are word or punctuation characters.
1579 bool Document::IsWordAt(int start, int end) const {
1580 return IsWordStartAt(start) && IsWordEndAt(end);
1583 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1584 return (!word && !wordStart) ||
1585 (word && IsWordAt(pos, pos + length)) ||
1586 (wordStart && IsWordStartAt(pos));
1589 bool Document::HasCaseFolder(void) const {
1590 return pcf != 0;
1593 void Document::SetCaseFolder(CaseFolder *pcf_) {
1594 delete pcf;
1595 pcf = pcf_;
1598 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1599 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1600 if (UTF8IsAscii(leadByte)) {
1601 // Common case: ASCII character
1602 return CharacterExtracted(leadByte, 1);
1604 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1605 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1606 for (int b=1; b<widthCharBytes; b++)
1607 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1608 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1609 if (utf8status & UTF8MaskInvalid) {
1610 // Treat as invalid and use up just one byte
1611 return CharacterExtracted(unicodeReplacementChar, 1);
1612 } else {
1613 return CharacterExtracted(UnicodeFromBytes(charBytes), utf8status & UTF8MaskWidth);
1618 * Find text in document, supporting both forward and backward
1619 * searches (just pass minPos > maxPos to do a backward search)
1620 * Has not been tested with backwards DBCS searches yet.
1622 long Document::FindText(int minPos, int maxPos, const char *search,
1623 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1624 int *length) {
1625 if (*length <= 0)
1626 return minPos;
1627 if (regExp) {
1628 if (!regex)
1629 regex = CreateRegexSearch(&charClass);
1630 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1631 } else {
1633 const bool forward = minPos <= maxPos;
1634 const int increment = forward ? 1 : -1;
1636 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1637 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1638 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1640 // Compute actual search ranges needed
1641 const int lengthFind = *length;
1643 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1644 const int limitPos = Platform::Maximum(startPos, endPos);
1645 int pos = startPos;
1646 if (!forward) {
1647 // Back all of a character
1648 pos = NextPosition(pos, increment);
1650 if (caseSensitive) {
1651 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1652 const char charStartSearch = search[0];
1653 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1654 if (CharAt(pos) == charStartSearch) {
1655 bool found = (pos + lengthFind) <= limitPos;
1656 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1657 found = CharAt(pos + indexSearch) == search[indexSearch];
1659 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1660 return pos;
1663 if (!NextCharacter(pos, increment))
1664 break;
1666 } else if (SC_CP_UTF8 == dbcsCodePage) {
1667 const size_t maxFoldingExpansion = 4;
1668 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1669 const int lenSearch = static_cast<int>(
1670 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1671 char bytes[UTF8MaxBytes + 1];
1672 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1673 while (forward ? (pos < endPos) : (pos >= endPos)) {
1674 int widthFirstCharacter = 0;
1675 int posIndexDocument = pos;
1676 int indexSearch = 0;
1677 bool characterMatches = true;
1678 for (;;) {
1679 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1680 bytes[0] = leadByte;
1681 int widthChar = 1;
1682 if (!UTF8IsAscii(leadByte)) {
1683 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1684 for (int b=1; b<widthCharBytes; b++) {
1685 bytes[b] = cb.CharAt(posIndexDocument+b);
1687 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1689 if (!widthFirstCharacter)
1690 widthFirstCharacter = widthChar;
1691 if ((posIndexDocument + widthChar) > limitPos)
1692 break;
1693 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1694 folded[lenFlat] = 0;
1695 // Does folded match the buffer
1696 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1697 if (!characterMatches)
1698 break;
1699 posIndexDocument += widthChar;
1700 indexSearch += lenFlat;
1701 if (indexSearch >= lenSearch)
1702 break;
1704 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1705 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1706 *length = posIndexDocument - pos;
1707 return pos;
1710 if (forward) {
1711 pos += widthFirstCharacter;
1712 } else {
1713 if (!NextCharacter(pos, increment))
1714 break;
1717 } else if (dbcsCodePage) {
1718 const size_t maxBytesCharacter = 2;
1719 const size_t maxFoldingExpansion = 4;
1720 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1721 const int lenSearch = static_cast<int>(
1722 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1723 while (forward ? (pos < endPos) : (pos >= endPos)) {
1724 int indexDocument = 0;
1725 int indexSearch = 0;
1726 bool characterMatches = true;
1727 while (characterMatches &&
1728 ((pos + indexDocument) < limitPos) &&
1729 (indexSearch < lenSearch)) {
1730 char bytes[maxBytesCharacter + 1];
1731 bytes[0] = cb.CharAt(pos + indexDocument);
1732 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1733 if (widthChar == 2)
1734 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1735 if ((pos + indexDocument + widthChar) > limitPos)
1736 break;
1737 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1738 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1739 folded[lenFlat] = 0;
1740 // Does folded match the buffer
1741 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1742 indexDocument += widthChar;
1743 indexSearch += lenFlat;
1745 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1746 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1747 *length = indexDocument;
1748 return pos;
1751 if (!NextCharacter(pos, increment))
1752 break;
1754 } else {
1755 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1756 std::vector<char> searchThing(lengthFind + 1);
1757 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1758 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1759 bool found = (pos + lengthFind) <= limitPos;
1760 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1761 char ch = CharAt(pos + indexSearch);
1762 char folded[2];
1763 pcf->Fold(folded, sizeof(folded), &ch, 1);
1764 found = folded[0] == searchThing[indexSearch];
1766 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1767 return pos;
1769 if (!NextCharacter(pos, increment))
1770 break;
1774 //Platform::DebugPrintf("Not found\n");
1775 return -1;
1778 const char *Document::SubstituteByPosition(const char *text, int *length) {
1779 if (regex)
1780 return regex->SubstituteByPosition(this, text, length);
1781 else
1782 return 0;
1785 int Document::LinesTotal() const {
1786 return cb.Lines();
1789 void Document::SetDefaultCharClasses(bool includeWordClass) {
1790 charClass.SetDefaultCharClasses(includeWordClass);
1793 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1794 charClass.SetCharClasses(chars, newCharClass);
1797 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1798 return charClass.GetCharsOfClass(characterClass, buffer);
1801 void SCI_METHOD Document::StartStyling(int position, char) {
1802 endStyled = position;
1805 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1806 if (enteredStyling != 0) {
1807 return false;
1808 } else {
1809 enteredStyling++;
1810 int prevEndStyled = endStyled;
1811 if (cb.SetStyleFor(endStyled, length, style)) {
1812 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1813 prevEndStyled, length);
1814 NotifyModified(mh);
1816 endStyled += length;
1817 enteredStyling--;
1818 return true;
1822 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1823 if (enteredStyling != 0) {
1824 return false;
1825 } else {
1826 enteredStyling++;
1827 bool didChange = false;
1828 int startMod = 0;
1829 int endMod = 0;
1830 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1831 PLATFORM_ASSERT(endStyled < Length());
1832 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1833 if (!didChange) {
1834 startMod = endStyled;
1836 didChange = true;
1837 endMod = endStyled;
1840 if (didChange) {
1841 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1842 startMod, endMod - startMod + 1);
1843 NotifyModified(mh);
1845 enteredStyling--;
1846 return true;
1850 void Document::EnsureStyledTo(int pos) {
1851 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1852 IncrementStyleClock();
1853 if (pli && !pli->UseContainerLexing()) {
1854 int lineEndStyled = LineFromPosition(GetEndStyled());
1855 int endStyledTo = LineStart(lineEndStyled);
1856 pli->Colourise(endStyledTo, pos);
1857 } else {
1858 // Ask the watchers to style, and stop as soon as one responds.
1859 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1860 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1861 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1867 void Document::LexerChanged() {
1868 // Tell the watchers the lexer has changed.
1869 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1870 it->watcher->NotifyLexerChanged(this, it->userData);
1874 int SCI_METHOD Document::SetLineState(int line, int state) {
1875 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1876 if (state != statePrevious) {
1877 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1878 NotifyModified(mh);
1880 return statePrevious;
1883 int SCI_METHOD Document::GetLineState(int line) const {
1884 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1887 int Document::GetMaxLineState() {
1888 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1891 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1892 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1893 NotifyModified(mh);
1896 StyledText Document::MarginStyledText(int line) const {
1897 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1898 return StyledText(pla->Length(line), pla->Text(line),
1899 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1902 void Document::MarginSetText(int line, const char *text) {
1903 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1904 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1905 NotifyModified(mh);
1908 void Document::MarginSetStyle(int line, int style) {
1909 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1910 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1913 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1914 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1915 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1918 void Document::MarginClearAll() {
1919 int maxEditorLine = LinesTotal();
1920 for (int l=0; l<maxEditorLine; l++)
1921 MarginSetText(l, 0);
1922 // Free remaining data
1923 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1926 StyledText Document::AnnotationStyledText(int line) const {
1927 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1928 return StyledText(pla->Length(line), pla->Text(line),
1929 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1932 void Document::AnnotationSetText(int line, const char *text) {
1933 if (line >= 0 && line < LinesTotal()) {
1934 const int linesBefore = AnnotationLines(line);
1935 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1936 const int linesAfter = AnnotationLines(line);
1937 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1938 mh.annotationLinesAdded = linesAfter - linesBefore;
1939 NotifyModified(mh);
1943 void Document::AnnotationSetStyle(int line, int style) {
1944 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1945 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1946 NotifyModified(mh);
1949 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1950 if (line >= 0 && line < LinesTotal()) {
1951 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1955 int Document::AnnotationLines(int line) const {
1956 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1959 void Document::AnnotationClearAll() {
1960 int maxEditorLine = LinesTotal();
1961 for (int l=0; l<maxEditorLine; l++)
1962 AnnotationSetText(l, 0);
1963 // Free remaining data
1964 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1967 void Document::IncrementStyleClock() {
1968 styleClock = (styleClock + 1) % 0x100000;
1971 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1972 if (decorations.FillRange(position, value, fillLength)) {
1973 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1974 position, fillLength);
1975 NotifyModified(mh);
1979 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1980 WatcherWithUserData wwud(watcher, userData);
1981 std::vector<WatcherWithUserData>::iterator it =
1982 std::find(watchers.begin(), watchers.end(), wwud);
1983 if (it != watchers.end())
1984 return false;
1985 watchers.push_back(wwud);
1986 return true;
1989 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1990 std::vector<WatcherWithUserData>::iterator it =
1991 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1992 if (it != watchers.end()) {
1993 watchers.erase(it);
1994 return true;
1996 return false;
1999 void Document::NotifyModifyAttempt() {
2000 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2001 it->watcher->NotifyModifyAttempt(this, it->userData);
2005 void Document::NotifySavePoint(bool atSavePoint) {
2006 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2007 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2011 void Document::NotifyModified(DocModification mh) {
2012 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2013 decorations.InsertSpace(mh.position, mh.length);
2014 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2015 decorations.DeleteRange(mh.position, mh.length);
2017 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2018 it->watcher->NotifyModified(this, mh, it->userData);
2022 bool Document::IsWordPartSeparator(char ch) const {
2023 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2026 int Document::WordPartLeft(int pos) {
2027 if (pos > 0) {
2028 --pos;
2029 char startChar = cb.CharAt(pos);
2030 if (IsWordPartSeparator(startChar)) {
2031 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2032 --pos;
2035 if (pos > 0) {
2036 startChar = cb.CharAt(pos);
2037 --pos;
2038 if (IsLowerCase(startChar)) {
2039 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2040 --pos;
2041 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2042 ++pos;
2043 } else if (IsUpperCase(startChar)) {
2044 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2045 --pos;
2046 if (!IsUpperCase(cb.CharAt(pos)))
2047 ++pos;
2048 } else if (IsADigit(startChar)) {
2049 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2050 --pos;
2051 if (!IsADigit(cb.CharAt(pos)))
2052 ++pos;
2053 } else if (IsPunctuation(startChar)) {
2054 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2055 --pos;
2056 if (!IsPunctuation(cb.CharAt(pos)))
2057 ++pos;
2058 } else if (isspacechar(startChar)) {
2059 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2060 --pos;
2061 if (!isspacechar(cb.CharAt(pos)))
2062 ++pos;
2063 } else if (!IsASCII(startChar)) {
2064 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2065 --pos;
2066 if (IsASCII(cb.CharAt(pos)))
2067 ++pos;
2068 } else {
2069 ++pos;
2073 return pos;
2076 int Document::WordPartRight(int pos) {
2077 char startChar = cb.CharAt(pos);
2078 int length = Length();
2079 if (IsWordPartSeparator(startChar)) {
2080 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2081 ++pos;
2082 startChar = cb.CharAt(pos);
2084 if (!IsASCII(startChar)) {
2085 while (pos < length && !IsASCII(cb.CharAt(pos)))
2086 ++pos;
2087 } else if (IsLowerCase(startChar)) {
2088 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2089 ++pos;
2090 } else if (IsUpperCase(startChar)) {
2091 if (IsLowerCase(cb.CharAt(pos + 1))) {
2092 ++pos;
2093 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2094 ++pos;
2095 } else {
2096 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2097 ++pos;
2099 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2100 --pos;
2101 } else if (IsADigit(startChar)) {
2102 while (pos < length && IsADigit(cb.CharAt(pos)))
2103 ++pos;
2104 } else if (IsPunctuation(startChar)) {
2105 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2106 ++pos;
2107 } else if (isspacechar(startChar)) {
2108 while (pos < length && isspacechar(cb.CharAt(pos)))
2109 ++pos;
2110 } else {
2111 ++pos;
2113 return pos;
2116 bool IsLineEndChar(char c) {
2117 return (c == '\n' || c == '\r');
2120 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2121 int sStart = cb.StyleAt(pos);
2122 if (delta < 0) {
2123 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2124 pos--;
2125 pos++;
2126 } else {
2127 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2128 pos++;
2130 return pos;
2133 static char BraceOpposite(char ch) {
2134 switch (ch) {
2135 case '(':
2136 return ')';
2137 case ')':
2138 return '(';
2139 case '[':
2140 return ']';
2141 case ']':
2142 return '[';
2143 case '{':
2144 return '}';
2145 case '}':
2146 return '{';
2147 case '<':
2148 return '>';
2149 case '>':
2150 return '<';
2151 default:
2152 return '\0';
2156 // TODO: should be able to extend styled region to find matching brace
2157 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2158 char chBrace = CharAt(position);
2159 char chSeek = BraceOpposite(chBrace);
2160 if (chSeek == '\0')
2161 return - 1;
2162 char styBrace = static_cast<char>(StyleAt(position));
2163 int direction = -1;
2164 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2165 direction = 1;
2166 int depth = 1;
2167 position = NextPosition(position, direction);
2168 while ((position >= 0) && (position < Length())) {
2169 char chAtPos = CharAt(position);
2170 char styAtPos = static_cast<char>(StyleAt(position));
2171 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2172 if (chAtPos == chBrace)
2173 depth++;
2174 if (chAtPos == chSeek)
2175 depth--;
2176 if (depth == 0)
2177 return position;
2179 int positionBeforeMove = position;
2180 position = NextPosition(position, direction);
2181 if (position == positionBeforeMove)
2182 break;
2184 return - 1;
2188 * Implementation of RegexSearchBase for the default built-in regular expression engine
2190 class BuiltinRegex : public RegexSearchBase {
2191 public:
2192 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2194 virtual ~BuiltinRegex() {
2197 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2198 bool caseSensitive, bool word, bool wordStart, int flags,
2199 int *length);
2201 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2203 private:
2204 RESearch search;
2205 std::string substituted;
2208 namespace {
2211 * RESearchRange keeps track of search range.
2213 class RESearchRange {
2214 public:
2215 const Document *doc;
2216 int increment;
2217 int startPos;
2218 int endPos;
2219 int lineRangeStart;
2220 int lineRangeEnd;
2221 int lineRangeBreak;
2222 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2223 increment = (minPos <= maxPos) ? 1 : -1;
2225 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2226 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2227 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2229 lineRangeStart = doc->LineFromPosition(startPos);
2230 lineRangeEnd = doc->LineFromPosition(endPos);
2231 if ((increment == 1) &&
2232 (startPos >= doc->LineEnd(lineRangeStart)) &&
2233 (lineRangeStart < lineRangeEnd)) {
2234 // the start position is at end of line or between line end characters.
2235 lineRangeStart++;
2236 startPos = doc->LineStart(lineRangeStart);
2237 } else if ((increment == -1) &&
2238 (startPos <= doc->LineStart(lineRangeStart)) &&
2239 (lineRangeStart > lineRangeEnd)) {
2240 // the start position is at beginning of line.
2241 lineRangeStart--;
2242 startPos = doc->LineEnd(lineRangeStart);
2244 lineRangeBreak = lineRangeEnd + increment;
2246 Range LineRange(int line) const {
2247 Range range(doc->LineStart(line), doc->LineEnd(line));
2248 if (increment == 1) {
2249 if (line == lineRangeStart)
2250 range.start = startPos;
2251 if (line == lineRangeEnd)
2252 range.end = endPos;
2253 } else {
2254 if (line == lineRangeEnd)
2255 range.start = endPos;
2256 if (line == lineRangeStart)
2257 range.end = startPos;
2259 return range;
2263 // Define a way for the Regular Expression code to access the document
2264 class DocumentIndexer : public CharacterIndexer {
2265 Document *pdoc;
2266 int end;
2267 public:
2268 DocumentIndexer(Document *pdoc_, int end_) :
2269 pdoc(pdoc_), end(end_) {
2272 virtual ~DocumentIndexer() {
2275 virtual char CharAt(int index) {
2276 if (index < 0 || index >= end)
2277 return 0;
2278 else
2279 return pdoc->CharAt(index);
2283 #ifdef CXX11_REGEX
2285 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2286 public:
2287 const Document *doc;
2288 Position position;
2289 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2291 ByteIterator(const ByteIterator &other) {
2292 doc = other.doc;
2293 position = other.position;
2295 ByteIterator &operator=(const ByteIterator &other) {
2296 if (this != &other) {
2297 doc = other.doc;
2298 position = other.position;
2300 return *this;
2302 char operator*() const {
2303 return doc->CharAt(position);
2305 ByteIterator &operator++() {
2306 position++;
2307 return *this;
2309 ByteIterator operator++(int) {
2310 ByteIterator retVal(*this);
2311 position++;
2312 return retVal;
2314 ByteIterator &operator--() {
2315 position--;
2316 return *this;
2318 bool operator==(const ByteIterator &other) const {
2319 return doc == other.doc && position == other.position;
2321 bool operator!=(const ByteIterator &other) const {
2322 return doc != other.doc || position != other.position;
2324 int Pos() const {
2325 return position;
2327 int PosRoundUp() const {
2328 return position;
2332 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2333 // Would be better to use sizeof(wchar_t) or similar to differentiate
2334 // but easier for now to hard-code platforms.
2335 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2336 // appear to allow specializing basic_regex over these.
2338 #ifdef _WIN32
2339 #define WCHAR_T_IS_16 1
2340 #else
2341 #define WCHAR_T_IS_16 0
2342 #endif
2344 #if WCHAR_T_IS_16
2346 // On Windows, report non-BMP characters as 2 separate surrogates as that
2347 // matches wregex since it is based on wchar_t.
2348 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2349 // These 3 fields determine the iterator position and are used for comparisons
2350 const Document *doc;
2351 Position position;
2352 size_t characterIndex;
2353 // Remaining fields are derived from the determining fields so are excluded in comparisons
2354 unsigned int lenBytes;
2355 size_t lenCharacters;
2356 wchar_t buffered[2];
2357 public:
2358 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2359 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2360 buffered[0] = 0;
2361 buffered[1] = 0;
2363 UTF8Iterator(const UTF8Iterator &other) {
2364 doc = other.doc;
2365 position = other.position;
2366 characterIndex = other.characterIndex;
2367 lenBytes = other.lenBytes;
2368 lenCharacters = other.lenCharacters;
2369 buffered[0] = other.buffered[0];
2370 buffered[1] = other.buffered[1];
2372 UTF8Iterator &operator=(const UTF8Iterator &other) {
2373 if (this != &other) {
2374 doc = other.doc;
2375 position = other.position;
2376 characterIndex = other.characterIndex;
2377 lenBytes = other.lenBytes;
2378 lenCharacters = other.lenCharacters;
2379 buffered[0] = other.buffered[0];
2380 buffered[1] = other.buffered[1];
2382 return *this;
2384 wchar_t operator*() {
2385 if (lenCharacters == 0) {
2386 ReadCharacter();
2388 return buffered[characterIndex];
2390 UTF8Iterator &operator++() {
2391 if ((characterIndex + 1) < (lenCharacters)) {
2392 characterIndex++;
2393 } else {
2394 position += lenBytes;
2395 ReadCharacter();
2396 characterIndex = 0;
2398 return *this;
2400 UTF8Iterator operator++(int) {
2401 UTF8Iterator retVal(*this);
2402 if ((characterIndex + 1) < (lenCharacters)) {
2403 characterIndex++;
2404 } else {
2405 position += lenBytes;
2406 ReadCharacter();
2407 characterIndex = 0;
2409 return retVal;
2411 UTF8Iterator &operator--() {
2412 if (characterIndex) {
2413 characterIndex--;
2414 } else {
2415 position = doc->NextPosition(position, -1);
2416 ReadCharacter();
2417 characterIndex = lenCharacters - 1;
2419 return *this;
2421 bool operator==(const UTF8Iterator &other) const {
2422 // Only test the determining fields, not the character widths and values derived from this
2423 return doc == other.doc &&
2424 position == other.position &&
2425 characterIndex == other.characterIndex;
2427 bool operator!=(const UTF8Iterator &other) const {
2428 // Only test the determining fields, not the character widths and values derived from this
2429 return doc != other.doc ||
2430 position != other.position ||
2431 characterIndex != other.characterIndex;
2433 int Pos() const {
2434 return position;
2436 int PosRoundUp() const {
2437 if (characterIndex)
2438 return position + lenBytes; // Force to end of character
2439 else
2440 return position;
2442 private:
2443 void ReadCharacter() {
2444 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2445 lenBytes = charExtracted.widthBytes;
2446 if (charExtracted.character == unicodeReplacementChar) {
2447 lenCharacters = 1;
2448 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2449 } else {
2450 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2455 #else
2457 // On Unix, report non-BMP characters as single characters
2459 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2460 const Document *doc;
2461 Position position;
2462 public:
2463 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2465 UTF8Iterator(const UTF8Iterator &other) {
2466 doc = other.doc;
2467 position = other.position;
2469 UTF8Iterator &operator=(const UTF8Iterator &other) {
2470 if (this != &other) {
2471 doc = other.doc;
2472 position = other.position;
2474 return *this;
2476 wchar_t operator*() const {
2477 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2478 return charExtracted.character;
2480 UTF8Iterator &operator++() {
2481 position = doc->NextPosition(position, 1);
2482 return *this;
2484 UTF8Iterator operator++(int) {
2485 UTF8Iterator retVal(*this);
2486 position = doc->NextPosition(position, 1);
2487 return retVal;
2489 UTF8Iterator &operator--() {
2490 position = doc->NextPosition(position, -1);
2491 return *this;
2493 bool operator==(const UTF8Iterator &other) const {
2494 return doc == other.doc && position == other.position;
2496 bool operator!=(const UTF8Iterator &other) const {
2497 return doc != other.doc || position != other.position;
2499 int Pos() const {
2500 return position;
2502 int PosRoundUp() const {
2503 return position;
2507 #endif
2509 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2510 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2511 if (!doc->IsLineStartPosition(startPos))
2512 flagsMatch |= std::regex_constants::match_not_bol;
2513 if (!doc->IsLineEndPosition(endPos))
2514 flagsMatch |= std::regex_constants::match_not_eol;
2515 return flagsMatch;
2518 template<typename Iterator, typename Regex>
2519 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2520 bool matched = false;
2521 std::match_results<Iterator> match;
2523 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2524 // If they didn't then the line by line iteration could be removed for the forwards
2525 // case and replaced with the following 4 lines:
2526 // Iterator uiStart(doc, startPos);
2527 // Iterator uiEnd(doc, endPos);
2528 // flagsMatch = MatchFlags(doc, startPos, endPos);
2529 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2531 // Line by line.
2532 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2533 const Range lineRange = resr.LineRange(line);
2534 Iterator itStart(doc, lineRange.start);
2535 Iterator itEnd(doc, lineRange.end);
2536 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2537 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2538 // Check for the last match on this line.
2539 if (matched) {
2540 if (resr.increment == -1) {
2541 while (matched) {
2542 Iterator itNext(doc, match[0].second.PosRoundUp());
2543 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2544 std::match_results<Iterator> matchNext;
2545 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2546 if (matched) {
2547 if (match[0].first == match[0].second) {
2548 // Empty match means failure so exit
2549 return false;
2551 match = matchNext;
2554 matched = true;
2556 break;
2559 if (matched) {
2560 for (size_t co = 0; co < match.size(); co++) {
2561 search.bopat[co] = match[co].first.Pos();
2562 search.eopat[co] = match[co].second.PosRoundUp();
2563 size_t lenMatch = search.eopat[co] - search.bopat[co];
2564 search.pat[co].resize(lenMatch);
2565 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2566 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2570 return matched;
2573 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2574 bool caseSensitive, int *length, RESearch &search) {
2575 const RESearchRange resr(doc, minPos, maxPos);
2576 try {
2577 //ElapsedTime et;
2578 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2579 // Flags that apper to have no effect:
2580 // | std::regex::collate | std::regex::extended;
2581 if (!caseSensitive)
2582 flagsRe = flagsRe | std::regex::icase;
2584 // Clear the RESearch so can fill in matches
2585 search.Clear();
2587 bool matched = false;
2588 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2589 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2590 std::vector<wchar_t> ws(lenS + 1);
2591 #if WCHAR_T_IS_16
2592 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2593 #else
2594 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2595 #endif
2596 ws[outLen] = 0;
2597 std::wregex regexp;
2598 #if defined(__APPLE__)
2599 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2600 // is one byte not one character.
2601 // However, on OS X this makes wregex act as Unicode
2602 std::locale localeU("en_US.UTF-8");
2603 regexp.imbue(localeU);
2604 #endif
2605 regexp.assign(&ws[0], flagsRe);
2606 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2608 } else {
2609 std::regex regexp;
2610 regexp.assign(s, flagsRe);
2611 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2614 int posMatch = -1;
2615 if (matched) {
2616 posMatch = search.bopat[0];
2617 *length = search.eopat[0] - search.bopat[0];
2619 // Example - search in doc/ScintillaHistory.html for
2620 // [[:upper:]]eta[[:space:]]
2621 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2622 //double durSearch = et.Duration(true);
2623 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2624 return posMatch;
2625 } catch (std::regex_error &) {
2626 // Failed to create regular expression
2627 throw RegexError();
2628 } catch (...) {
2629 // Failed in some other way
2630 return -1;
2634 #endif
2638 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2639 bool caseSensitive, bool, bool, int flags,
2640 int *length) {
2642 #ifdef CXX11_REGEX
2643 if (flags & SCFIND_CXX11REGEX) {
2644 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2645 caseSensitive, length, search);
2647 #endif
2649 const RESearchRange resr(doc, minPos, maxPos);
2651 const bool posix = (flags & SCFIND_POSIX) != 0;
2653 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2654 if (errmsg) {
2655 return -1;
2657 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2658 // Replace first '.' with '-' in each property file variable reference:
2659 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2660 // Replace: $(\1-\2)
2661 int pos = -1;
2662 int lenRet = 0;
2663 const char searchEnd = s[*length - 1];
2664 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2665 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2666 int startOfLine = doc->LineStart(line);
2667 int endOfLine = doc->LineEnd(line);
2668 if (resr.increment == 1) {
2669 if (line == resr.lineRangeStart) {
2670 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2671 continue; // Can't match start of line if start position after start of line
2672 startOfLine = resr.startPos;
2674 if (line == resr.lineRangeEnd) {
2675 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2676 continue; // Can't match end of line if end position before end of line
2677 endOfLine = resr.endPos;
2679 } else {
2680 if (line == resr.lineRangeEnd) {
2681 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2682 continue; // Can't match start of line if end position after start of line
2683 startOfLine = resr.endPos;
2685 if (line == resr.lineRangeStart) {
2686 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2687 continue; // Can't match end of line if start position before end of line
2688 endOfLine = resr.startPos;
2692 DocumentIndexer di(doc, endOfLine);
2693 int success = search.Execute(di, startOfLine, endOfLine);
2694 if (success) {
2695 pos = search.bopat[0];
2696 // Ensure only whole characters selected
2697 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2698 lenRet = search.eopat[0] - search.bopat[0];
2699 // There can be only one start of a line, so no need to look for last match in line
2700 if ((resr.increment == -1) && (s[0] != '^')) {
2701 // Check for the last match on this line.
2702 int repetitions = 1000; // Break out of infinite loop
2703 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2704 success = search.Execute(di, pos+1, endOfLine);
2705 if (success) {
2706 if (search.eopat[0] <= minPos) {
2707 pos = search.bopat[0];
2708 lenRet = search.eopat[0] - search.bopat[0];
2709 } else {
2710 success = 0;
2715 break;
2718 *length = lenRet;
2719 return pos;
2722 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2723 substituted.clear();
2724 DocumentIndexer di(doc, doc->Length());
2725 search.GrabMatches(di);
2726 for (int j = 0; j < *length; j++) {
2727 if (text[j] == '\\') {
2728 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2729 unsigned int patNum = text[j + 1] - '0';
2730 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2731 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2732 substituted.append(search.pat[patNum].c_str(), len);
2733 j++;
2734 } else {
2735 j++;
2736 switch (text[j]) {
2737 case 'a':
2738 substituted.push_back('\a');
2739 break;
2740 case 'b':
2741 substituted.push_back('\b');
2742 break;
2743 case 'f':
2744 substituted.push_back('\f');
2745 break;
2746 case 'n':
2747 substituted.push_back('\n');
2748 break;
2749 case 'r':
2750 substituted.push_back('\r');
2751 break;
2752 case 't':
2753 substituted.push_back('\t');
2754 break;
2755 case 'v':
2756 substituted.push_back('\v');
2757 break;
2758 case '\\':
2759 substituted.push_back('\\');
2760 break;
2761 default:
2762 substituted.push_back('\\');
2763 j--;
2766 } else {
2767 substituted.push_back(text[j]);
2770 *length = static_cast<int>(substituted.length());
2771 return substituted.c_str();
2774 #ifndef SCI_OWNREGEX
2776 #ifdef SCI_NAMESPACE
2778 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2779 return new BuiltinRegex(charClassTable);
2782 #else
2784 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2785 return new BuiltinRegex(charClassTable);
2788 #endif
2790 #endif