Update Scintilla to 3.5.6 pre-release
[geany-mirror.git] / scintilla / src / Document.cxx
blob3f365fdf4531ad33f0ef12cc8e41b89e7a50ed2c
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "CellBuffer.h"
33 #include "PerLine.h"
34 #include "CharClassify.h"
35 #include "Decoration.h"
36 #include "CaseFolder.h"
37 #include "Document.h"
38 #include "RESearch.h"
39 #include "UniConversion.h"
40 #include "UnicodeFromUTF8.h"
42 #ifdef SCI_NAMESPACE
43 using namespace Scintilla;
44 #endif
46 static inline bool IsPunctuation(char ch) {
47 return IsASCII(ch) && ispunct(ch);
50 void LexInterface::Colourise(int start, int end) {
51 if (pdoc && instance && !performingStyle) {
52 // Protect against reentrance, which may occur, for example, when
53 // fold points are discovered while performing styling and the folding
54 // code looks for child lines which may trigger styling.
55 performingStyle = true;
57 int lengthDoc = pdoc->Length();
58 if (end == -1)
59 end = lengthDoc;
60 int len = end - start;
62 PLATFORM_ASSERT(len >= 0);
63 PLATFORM_ASSERT(start + len <= lengthDoc);
65 int styleStart = 0;
66 if (start > 0)
67 styleStart = pdoc->StyleAt(start - 1);
69 if (len > 0) {
70 instance->Lex(start, len, styleStart, pdoc);
71 instance->Fold(start, len, styleStart, pdoc);
74 performingStyle = false;
78 int LexInterface::LineEndTypesSupported() {
79 if (instance) {
80 int interfaceVersion = instance->Version();
81 if (interfaceVersion >= lvSubStyles) {
82 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
83 return ssinstance->LineEndTypesSupported();
86 return 0;
89 Document::Document() {
90 refCount = 0;
91 pcf = NULL;
92 #ifdef _WIN32
93 eolMode = SC_EOL_CRLF;
94 #else
95 eolMode = SC_EOL_LF;
96 #endif
97 dbcsCodePage = 0;
98 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
99 endStyled = 0;
100 styleClock = 0;
101 enteredModification = 0;
102 enteredStyling = 0;
103 enteredReadOnlyCount = 0;
104 insertionSet = false;
105 tabInChars = 8;
106 indentInChars = 0;
107 actualIndentInChars = 8;
108 useTabs = true;
109 tabIndents = true;
110 backspaceUnindents = false;
112 matchesValid = false;
113 regex = 0;
115 UTF8BytesOfLeadInitialise();
117 perLineData[ldMarkers] = new LineMarkers();
118 perLineData[ldLevels] = new LineLevels();
119 perLineData[ldState] = new LineState();
120 perLineData[ldMargin] = new LineAnnotation();
121 perLineData[ldAnnotation] = new LineAnnotation();
123 cb.SetPerLine(this);
125 pli = 0;
128 Document::~Document() {
129 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
130 it->watcher->NotifyDeleted(this, it->userData);
132 for (int j=0; j<ldSize; j++) {
133 delete perLineData[j];
134 perLineData[j] = 0;
136 delete regex;
137 regex = 0;
138 delete pli;
139 pli = 0;
140 delete pcf;
141 pcf = 0;
144 void Document::Init() {
145 for (int j=0; j<ldSize; j++) {
146 if (perLineData[j])
147 perLineData[j]->Init();
151 int Document::LineEndTypesSupported() const {
152 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
153 return pli->LineEndTypesSupported();
154 else
155 return 0;
158 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
159 if (dbcsCodePage != dbcsCodePage_) {
160 dbcsCodePage = dbcsCodePage_;
161 SetCaseFolder(NULL);
162 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
163 return true;
164 } else {
165 return false;
169 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
170 if (lineEndBitSet != lineEndBitSet_) {
171 lineEndBitSet = lineEndBitSet_;
172 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
173 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
174 ModifiedAt(0);
175 cb.SetLineEndTypes(lineEndBitSetActive);
176 return true;
177 } else {
178 return false;
180 } else {
181 return false;
185 void Document::InsertLine(int line) {
186 for (int j=0; j<ldSize; j++) {
187 if (perLineData[j])
188 perLineData[j]->InsertLine(line);
192 void Document::RemoveLine(int line) {
193 for (int j=0; j<ldSize; j++) {
194 if (perLineData[j])
195 perLineData[j]->RemoveLine(line);
199 // Increase reference count and return its previous value.
200 int Document::AddRef() {
201 return refCount++;
204 // Decrease reference count and return its previous value.
205 // Delete the document if reference count reaches zero.
206 int SCI_METHOD Document::Release() {
207 int curRefCount = --refCount;
208 if (curRefCount == 0)
209 delete this;
210 return curRefCount;
213 void Document::SetSavePoint() {
214 cb.SetSavePoint();
215 NotifySavePoint(true);
218 void Document::TentativeUndo() {
219 if (!TentativeActive())
220 return;
221 CheckReadOnly();
222 if (enteredModification == 0) {
223 enteredModification++;
224 if (!cb.IsReadOnly()) {
225 bool startSavePoint = cb.IsSavePoint();
226 bool multiLine = false;
227 int steps = cb.TentativeSteps();
228 //Platform::DebugPrintf("Steps=%d\n", steps);
229 for (int step = 0; step < steps; step++) {
230 const int prevLinesTotal = LinesTotal();
231 const Action &action = cb.GetUndoStep();
232 if (action.at == removeAction) {
233 NotifyModified(DocModification(
234 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
235 } else if (action.at == containerAction) {
236 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
237 dm.token = action.position;
238 NotifyModified(dm);
239 } else {
240 NotifyModified(DocModification(
241 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
243 cb.PerformUndoStep();
244 if (action.at != containerAction) {
245 ModifiedAt(action.position);
248 int modFlags = SC_PERFORMED_UNDO;
249 // With undo, an insertion action becomes a deletion notification
250 if (action.at == removeAction) {
251 modFlags |= SC_MOD_INSERTTEXT;
252 } else if (action.at == insertAction) {
253 modFlags |= SC_MOD_DELETETEXT;
255 if (steps > 1)
256 modFlags |= SC_MULTISTEPUNDOREDO;
257 const int linesAdded = LinesTotal() - prevLinesTotal;
258 if (linesAdded != 0)
259 multiLine = true;
260 if (step == steps - 1) {
261 modFlags |= SC_LASTSTEPINUNDOREDO;
262 if (multiLine)
263 modFlags |= SC_MULTILINEUNDOREDO;
265 NotifyModified(DocModification(modFlags, action.position, action.lenData,
266 linesAdded, action.data));
269 bool endSavePoint = cb.IsSavePoint();
270 if (startSavePoint != endSavePoint)
271 NotifySavePoint(endSavePoint);
273 cb.TentativeCommit();
275 enteredModification--;
279 int Document::GetMark(int line) {
280 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
283 int Document::MarkerNext(int lineStart, int mask) const {
284 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
287 int Document::AddMark(int line, int markerNum) {
288 if (line >= 0 && line <= LinesTotal()) {
289 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
290 AddMark(line, markerNum, LinesTotal());
291 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
292 NotifyModified(mh);
293 return prev;
294 } else {
295 return 0;
299 void Document::AddMarkSet(int line, int valueSet) {
300 if (line < 0 || line > LinesTotal()) {
301 return;
303 unsigned int m = valueSet;
304 for (int i = 0; m; i++, m >>= 1)
305 if (m & 1)
306 static_cast<LineMarkers *>(perLineData[ldMarkers])->
307 AddMark(line, i, LinesTotal());
308 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
309 NotifyModified(mh);
312 void Document::DeleteMark(int line, int markerNum) {
313 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
314 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
315 NotifyModified(mh);
318 void Document::DeleteMarkFromHandle(int markerHandle) {
319 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
320 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
321 mh.line = -1;
322 NotifyModified(mh);
325 void Document::DeleteAllMarks(int markerNum) {
326 bool someChanges = false;
327 for (int line = 0; line < LinesTotal(); line++) {
328 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
329 someChanges = true;
331 if (someChanges) {
332 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
333 mh.line = -1;
334 NotifyModified(mh);
338 int Document::LineFromHandle(int markerHandle) {
339 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
342 int SCI_METHOD Document::LineStart(int line) const {
343 return cb.LineStart(line);
346 bool Document::IsLineStartPosition(int position) const {
347 return LineStart(LineFromPosition(position)) == position;
350 int SCI_METHOD Document::LineEnd(int line) const {
351 if (line >= LinesTotal() - 1) {
352 return LineStart(line + 1);
353 } else {
354 int position = LineStart(line + 1);
355 if (SC_CP_UTF8 == dbcsCodePage) {
356 unsigned char bytes[] = {
357 static_cast<unsigned char>(cb.CharAt(position-3)),
358 static_cast<unsigned char>(cb.CharAt(position-2)),
359 static_cast<unsigned char>(cb.CharAt(position-1)),
361 if (UTF8IsSeparator(bytes)) {
362 return position - UTF8SeparatorLength;
364 if (UTF8IsNEL(bytes+1)) {
365 return position - UTF8NELLength;
368 position--; // Back over CR or LF
369 // When line terminator is CR+LF, may need to go back one more
370 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
371 position--;
373 return position;
377 void SCI_METHOD Document::SetErrorStatus(int status) {
378 // Tell the watchers an error has occurred.
379 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
380 it->watcher->NotifyErrorOccurred(this, it->userData, status);
384 int SCI_METHOD Document::LineFromPosition(int pos) const {
385 return cb.LineFromPosition(pos);
388 int Document::LineEndPosition(int position) const {
389 return LineEnd(LineFromPosition(position));
392 bool Document::IsLineEndPosition(int position) const {
393 return LineEnd(LineFromPosition(position)) == position;
396 bool Document::IsPositionInLineEnd(int position) const {
397 return position >= LineEnd(LineFromPosition(position));
400 int Document::VCHomePosition(int position) const {
401 int line = LineFromPosition(position);
402 int startPosition = LineStart(line);
403 int endLine = LineEnd(line);
404 int startText = startPosition;
405 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
406 startText++;
407 if (position == startText)
408 return startPosition;
409 else
410 return startText;
413 int SCI_METHOD Document::SetLevel(int line, int level) {
414 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
415 if (prev != level) {
416 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
417 LineStart(line), 0, 0, 0, line);
418 mh.foldLevelNow = level;
419 mh.foldLevelPrev = prev;
420 NotifyModified(mh);
422 return prev;
425 int SCI_METHOD Document::GetLevel(int line) const {
426 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
429 void Document::ClearLevels() {
430 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
433 static bool IsSubordinate(int levelStart, int levelTry) {
434 if (levelTry & SC_FOLDLEVELWHITEFLAG)
435 return true;
436 else
437 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
440 int Document::GetLastChild(int lineParent, int level, int lastLine) {
441 if (level == -1)
442 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
443 int maxLine = LinesTotal();
444 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
445 int lineMaxSubord = lineParent;
446 while (lineMaxSubord < maxLine - 1) {
447 EnsureStyledTo(LineStart(lineMaxSubord + 2));
448 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
449 break;
450 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
451 break;
452 lineMaxSubord++;
454 if (lineMaxSubord > lineParent) {
455 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
456 // Have chewed up some whitespace that belongs to a parent so seek back
457 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
458 lineMaxSubord--;
462 return lineMaxSubord;
465 int Document::GetFoldParent(int line) const {
466 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
467 int lineLook = line - 1;
468 while ((lineLook > 0) && (
469 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
470 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
472 lineLook--;
474 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
475 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
476 return lineLook;
477 } else {
478 return -1;
482 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
483 int level = GetLevel(line);
484 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
486 int lookLine = line;
487 int lookLineLevel = level;
488 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
489 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
490 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
491 lookLineLevel = GetLevel(--lookLine);
492 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
495 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
496 if (beginFoldBlock == -1) {
497 highlightDelimiter.Clear();
498 return;
501 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
502 int firstChangeableLineBefore = -1;
503 if (endFoldBlock < line) {
504 lookLine = beginFoldBlock - 1;
505 lookLineLevel = GetLevel(lookLine);
506 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
507 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
508 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
509 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
510 beginFoldBlock = lookLine;
511 endFoldBlock = line;
512 firstChangeableLineBefore = line - 1;
515 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
516 break;
517 lookLineLevel = GetLevel(--lookLine);
518 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
521 if (firstChangeableLineBefore == -1) {
522 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
523 lookLine >= beginFoldBlock;
524 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
525 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
526 firstChangeableLineBefore = lookLine;
527 break;
531 if (firstChangeableLineBefore == -1)
532 firstChangeableLineBefore = beginFoldBlock - 1;
534 int firstChangeableLineAfter = -1;
535 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
536 lookLine <= endFoldBlock;
537 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
538 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
539 firstChangeableLineAfter = lookLine;
540 break;
543 if (firstChangeableLineAfter == -1)
544 firstChangeableLineAfter = endFoldBlock + 1;
546 highlightDelimiter.beginFoldBlock = beginFoldBlock;
547 highlightDelimiter.endFoldBlock = endFoldBlock;
548 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
549 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
552 int Document::ClampPositionIntoDocument(int pos) const {
553 return Platform::Clamp(pos, 0, Length());
556 bool Document::IsCrLf(int pos) const {
557 if (pos < 0)
558 return false;
559 if (pos >= (Length() - 1))
560 return false;
561 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
564 int Document::LenChar(int pos) {
565 if (pos < 0) {
566 return 1;
567 } else if (IsCrLf(pos)) {
568 return 2;
569 } else if (SC_CP_UTF8 == dbcsCodePage) {
570 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
571 const int widthCharBytes = UTF8BytesOfLead[leadByte];
572 int lengthDoc = Length();
573 if ((pos + widthCharBytes) > lengthDoc)
574 return lengthDoc - pos;
575 else
576 return widthCharBytes;
577 } else if (dbcsCodePage) {
578 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
579 } else {
580 return 1;
584 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
585 int trail = pos;
586 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
587 trail--;
588 start = (trail > 0) ? trail-1 : trail;
590 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
591 const int widthCharBytes = UTF8BytesOfLead[leadByte];
592 if (widthCharBytes == 1) {
593 return false;
594 } else {
595 int trailBytes = widthCharBytes - 1;
596 int len = pos - start;
597 if (len > trailBytes)
598 // pos too far from lead
599 return false;
600 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
601 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
602 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
603 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
604 if (utf8status & UTF8MaskInvalid)
605 return false;
606 end = start + widthCharBytes;
607 return true;
611 // Normalise a position so that it is not halfway through a two byte character.
612 // This can occur in two situations -
613 // When lines are terminated with \r\n pairs which should be treated as one character.
614 // When displaying DBCS text such as Japanese.
615 // If moving, move the position in the indicated direction.
616 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
617 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
618 // If out of range, just return minimum/maximum value.
619 if (pos <= 0)
620 return 0;
621 if (pos >= Length())
622 return Length();
624 // PLATFORM_ASSERT(pos > 0 && pos < Length());
625 if (checkLineEnd && IsCrLf(pos - 1)) {
626 if (moveDir > 0)
627 return pos + 1;
628 else
629 return pos - 1;
632 if (dbcsCodePage) {
633 if (SC_CP_UTF8 == dbcsCodePage) {
634 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
635 // If ch is not a trail byte then pos is valid intercharacter position
636 if (UTF8IsTrailByte(ch)) {
637 int startUTF = pos;
638 int endUTF = pos;
639 if (InGoodUTF8(pos, startUTF, endUTF)) {
640 // ch is a trail byte within a UTF-8 character
641 if (moveDir > 0)
642 pos = endUTF;
643 else
644 pos = startUTF;
646 // Else invalid UTF-8 so return position of isolated trail byte
648 } else {
649 // Anchor DBCS calculations at start of line because start of line can
650 // not be a DBCS trail byte.
651 int posStartLine = LineStart(LineFromPosition(pos));
652 if (pos == posStartLine)
653 return pos;
655 // Step back until a non-lead-byte is found.
656 int posCheck = pos;
657 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
658 posCheck--;
660 // Check from known start of character.
661 while (posCheck < pos) {
662 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
663 if (posCheck + mbsize == pos) {
664 return pos;
665 } else if (posCheck + mbsize > pos) {
666 if (moveDir > 0) {
667 return posCheck + mbsize;
668 } else {
669 return posCheck;
672 posCheck += mbsize;
677 return pos;
680 // NextPosition moves between valid positions - it can not handle a position in the middle of a
681 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
682 // A \r\n pair is treated as two characters.
683 int Document::NextPosition(int pos, int moveDir) const {
684 // If out of range, just return minimum/maximum value.
685 int increment = (moveDir > 0) ? 1 : -1;
686 if (pos + increment <= 0)
687 return 0;
688 if (pos + increment >= Length())
689 return Length();
691 if (dbcsCodePage) {
692 if (SC_CP_UTF8 == dbcsCodePage) {
693 if (increment == 1) {
694 // Simple forward movement case so can avoid some checks
695 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
696 if (UTF8IsAscii(leadByte)) {
697 // Single byte character or invalid
698 pos++;
699 } else {
700 const int widthCharBytes = UTF8BytesOfLead[leadByte];
701 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
702 for (int b=1; b<widthCharBytes; b++)
703 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
704 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
705 if (utf8status & UTF8MaskInvalid)
706 pos++;
707 else
708 pos += utf8status & UTF8MaskWidth;
710 } else {
711 // Examine byte before position
712 pos--;
713 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
714 // If ch is not a trail byte then pos is valid intercharacter position
715 if (UTF8IsTrailByte(ch)) {
716 // If ch is a trail byte in a valid UTF-8 character then return start of character
717 int startUTF = pos;
718 int endUTF = pos;
719 if (InGoodUTF8(pos, startUTF, endUTF)) {
720 pos = startUTF;
722 // Else invalid UTF-8 so return position of isolated trail byte
725 } else {
726 if (moveDir > 0) {
727 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
728 pos += mbsize;
729 if (pos > Length())
730 pos = Length();
731 } else {
732 // Anchor DBCS calculations at start of line because start of line can
733 // not be a DBCS trail byte.
734 int posStartLine = LineStart(LineFromPosition(pos));
735 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
736 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
737 if ((pos - 1) <= posStartLine) {
738 return pos - 1;
739 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
740 // Must actually be trail byte
741 return pos - 2;
742 } else {
743 // Otherwise, step back until a non-lead-byte is found.
744 int posTemp = pos - 1;
745 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
747 // Now posTemp+1 must point to the beginning of a character,
748 // so figure out whether we went back an even or an odd
749 // number of bytes and go back 1 or 2 bytes, respectively.
750 return (pos - 1 - ((pos - posTemp) & 1));
754 } else {
755 pos += increment;
758 return pos;
761 bool Document::NextCharacter(int &pos, int moveDir) const {
762 // Returns true if pos changed
763 int posNext = NextPosition(pos, moveDir);
764 if (posNext == pos) {
765 return false;
766 } else {
767 pos = posNext;
768 return true;
772 // Return -1 on out-of-bounds
773 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
774 int pos = positionStart;
775 if (dbcsCodePage) {
776 const int increment = (characterOffset > 0) ? 1 : -1;
777 while (characterOffset != 0) {
778 const int posNext = NextPosition(pos, increment);
779 if (posNext == pos)
780 return INVALID_POSITION;
781 pos = posNext;
782 characterOffset -= increment;
784 } else {
785 pos = positionStart + characterOffset;
786 if ((pos < 0) || (pos > Length()))
787 return INVALID_POSITION;
789 return pos;
792 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
793 int pos = positionStart;
794 if (dbcsCodePage) {
795 const int increment = (characterOffset > 0) ? 1 : -1;
796 while (characterOffset != 0) {
797 const int posNext = NextPosition(pos, increment);
798 if (posNext == pos)
799 return INVALID_POSITION;
800 if (abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
801 characterOffset -= increment;
802 pos = posNext;
803 characterOffset -= increment;
805 } else {
806 pos = positionStart + characterOffset;
807 if ((pos < 0) || (pos > Length()))
808 return INVALID_POSITION;
810 return pos;
813 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
814 int character;
815 int bytesInCharacter = 1;
816 if (dbcsCodePage) {
817 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
818 if (SC_CP_UTF8 == dbcsCodePage) {
819 if (UTF8IsAscii(leadByte)) {
820 // Single byte character or invalid
821 character = leadByte;
822 } else {
823 const int widthCharBytes = UTF8BytesOfLead[leadByte];
824 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
825 for (int b=1; b<widthCharBytes; b++)
826 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
827 int utf8status = UTF8Classify(charBytes, widthCharBytes);
828 if (utf8status & UTF8MaskInvalid) {
829 // Report as singleton surrogate values which are invalid Unicode
830 character = 0xDC80 + leadByte;
831 } else {
832 bytesInCharacter = utf8status & UTF8MaskWidth;
833 character = UnicodeFromUTF8(charBytes);
836 } else {
837 if (IsDBCSLeadByte(leadByte)) {
838 bytesInCharacter = 2;
839 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
840 } else {
841 character = leadByte;
844 } else {
845 character = cb.CharAt(position);
847 if (pWidth) {
848 *pWidth = bytesInCharacter;
850 return character;
853 int SCI_METHOD Document::CodePage() const {
854 return dbcsCodePage;
857 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
858 // Byte ranges found in Wikipedia articles with relevant search strings in each case
859 unsigned char uch = static_cast<unsigned char>(ch);
860 switch (dbcsCodePage) {
861 case 932:
862 // Shift_jis
863 return ((uch >= 0x81) && (uch <= 0x9F)) ||
864 ((uch >= 0xE0) && (uch <= 0xFC));
865 // Lead bytes F0 to FC may be a Microsoft addition.
866 case 936:
867 // GBK
868 return (uch >= 0x81) && (uch <= 0xFE);
869 case 949:
870 // Korean Wansung KS C-5601-1987
871 return (uch >= 0x81) && (uch <= 0xFE);
872 case 950:
873 // Big5
874 return (uch >= 0x81) && (uch <= 0xFE);
875 case 1361:
876 // Korean Johab KS C-5601-1992
877 return
878 ((uch >= 0x84) && (uch <= 0xD3)) ||
879 ((uch >= 0xD8) && (uch <= 0xDE)) ||
880 ((uch >= 0xE0) && (uch <= 0xF9));
882 return false;
885 static inline bool IsSpaceOrTab(int ch) {
886 return ch == ' ' || ch == '\t';
889 // Need to break text into segments near lengthSegment but taking into
890 // account the encoding to not break inside a UTF-8 or DBCS character
891 // and also trying to avoid breaking inside a pair of combining characters.
892 // The segment length must always be long enough (more than 4 bytes)
893 // so that there will be at least one whole character to make a segment.
894 // For UTF-8, text must consist only of valid whole characters.
895 // In preference order from best to worst:
896 // 1) Break after space
897 // 2) Break before punctuation
898 // 3) Break after whole character
900 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
901 if (length <= lengthSegment)
902 return length;
903 int lastSpaceBreak = -1;
904 int lastPunctuationBreak = -1;
905 int lastEncodingAllowedBreak = 0;
906 for (int j=0; j < lengthSegment;) {
907 unsigned char ch = static_cast<unsigned char>(text[j]);
908 if (j > 0) {
909 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
910 lastSpaceBreak = j;
912 if (ch < 'A') {
913 lastPunctuationBreak = j;
916 lastEncodingAllowedBreak = j;
918 if (dbcsCodePage == SC_CP_UTF8) {
919 j += UTF8BytesOfLead[ch];
920 } else if (dbcsCodePage) {
921 j += IsDBCSLeadByte(ch) ? 2 : 1;
922 } else {
923 j++;
926 if (lastSpaceBreak >= 0) {
927 return lastSpaceBreak;
928 } else if (lastPunctuationBreak >= 0) {
929 return lastPunctuationBreak;
931 return lastEncodingAllowedBreak;
934 EncodingFamily Document::CodePageFamily() const {
935 if (SC_CP_UTF8 == dbcsCodePage)
936 return efUnicode;
937 else if (dbcsCodePage)
938 return efDBCS;
939 else
940 return efEightBit;
943 void Document::ModifiedAt(int pos) {
944 if (endStyled > pos)
945 endStyled = pos;
948 void Document::CheckReadOnly() {
949 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
950 enteredReadOnlyCount++;
951 NotifyModifyAttempt();
952 enteredReadOnlyCount--;
956 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
957 // SetStyleAt does not change the persistent state of a document
959 bool Document::DeleteChars(int pos, int len) {
960 if (pos < 0)
961 return false;
962 if (len <= 0)
963 return false;
964 if ((pos + len) > Length())
965 return false;
966 CheckReadOnly();
967 if (enteredModification != 0) {
968 return false;
969 } else {
970 enteredModification++;
971 if (!cb.IsReadOnly()) {
972 NotifyModified(
973 DocModification(
974 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
975 pos, len,
976 0, 0));
977 int prevLinesTotal = LinesTotal();
978 bool startSavePoint = cb.IsSavePoint();
979 bool startSequence = false;
980 const char *text = cb.DeleteChars(pos, len, startSequence);
981 if (startSavePoint && cb.IsCollectingUndo())
982 NotifySavePoint(!startSavePoint);
983 if ((pos < Length()) || (pos == 0))
984 ModifiedAt(pos);
985 else
986 ModifiedAt(pos-1);
987 NotifyModified(
988 DocModification(
989 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
990 pos, len,
991 LinesTotal() - prevLinesTotal, text));
993 enteredModification--;
995 return !cb.IsReadOnly();
999 * Insert a string with a length.
1001 int Document::InsertString(int position, const char *s, int insertLength) {
1002 if (insertLength <= 0) {
1003 return 0;
1005 CheckReadOnly(); // Application may change read only state here
1006 if (cb.IsReadOnly()) {
1007 return 0;
1009 if (enteredModification != 0) {
1010 return 0;
1012 enteredModification++;
1013 insertionSet = false;
1014 insertion.clear();
1015 NotifyModified(
1016 DocModification(
1017 SC_MOD_INSERTCHECK,
1018 position, insertLength,
1019 0, s));
1020 if (insertionSet) {
1021 s = insertion.c_str();
1022 insertLength = static_cast<int>(insertion.length());
1024 NotifyModified(
1025 DocModification(
1026 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1027 position, insertLength,
1028 0, s));
1029 int prevLinesTotal = LinesTotal();
1030 bool startSavePoint = cb.IsSavePoint();
1031 bool startSequence = false;
1032 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1033 if (startSavePoint && cb.IsCollectingUndo())
1034 NotifySavePoint(!startSavePoint);
1035 ModifiedAt(position);
1036 NotifyModified(
1037 DocModification(
1038 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1039 position, insertLength,
1040 LinesTotal() - prevLinesTotal, text));
1041 if (insertionSet) { // Free memory as could be large
1042 std::string().swap(insertion);
1044 enteredModification--;
1045 return insertLength;
1048 void Document::ChangeInsertion(const char *s, int length) {
1049 insertionSet = true;
1050 insertion.assign(s, length);
1053 int SCI_METHOD Document::AddData(char *data, int length) {
1054 try {
1055 int position = Length();
1056 InsertString(position, data, length);
1057 } catch (std::bad_alloc &) {
1058 return SC_STATUS_BADALLOC;
1059 } catch (...) {
1060 return SC_STATUS_FAILURE;
1062 return 0;
1065 void * SCI_METHOD Document::ConvertToDocument() {
1066 return this;
1069 int Document::Undo() {
1070 int newPos = -1;
1071 CheckReadOnly();
1072 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1073 enteredModification++;
1074 if (!cb.IsReadOnly()) {
1075 bool startSavePoint = cb.IsSavePoint();
1076 bool multiLine = false;
1077 int steps = cb.StartUndo();
1078 //Platform::DebugPrintf("Steps=%d\n", steps);
1079 int coalescedRemovePos = -1;
1080 int coalescedRemoveLen = 0;
1081 int prevRemoveActionPos = -1;
1082 int prevRemoveActionLen = 0;
1083 for (int step = 0; step < steps; step++) {
1084 const int prevLinesTotal = LinesTotal();
1085 const Action &action = cb.GetUndoStep();
1086 if (action.at == removeAction) {
1087 NotifyModified(DocModification(
1088 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1089 } else if (action.at == containerAction) {
1090 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1091 dm.token = action.position;
1092 NotifyModified(dm);
1093 if (!action.mayCoalesce) {
1094 coalescedRemovePos = -1;
1095 coalescedRemoveLen = 0;
1096 prevRemoveActionPos = -1;
1097 prevRemoveActionLen = 0;
1099 } else {
1100 NotifyModified(DocModification(
1101 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1103 cb.PerformUndoStep();
1104 if (action.at != containerAction) {
1105 ModifiedAt(action.position);
1106 newPos = action.position;
1109 int modFlags = SC_PERFORMED_UNDO;
1110 // With undo, an insertion action becomes a deletion notification
1111 if (action.at == removeAction) {
1112 newPos += action.lenData;
1113 modFlags |= SC_MOD_INSERTTEXT;
1114 if ((coalescedRemoveLen > 0) &&
1115 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1116 coalescedRemoveLen += action.lenData;
1117 newPos = coalescedRemovePos + coalescedRemoveLen;
1118 } else {
1119 coalescedRemovePos = action.position;
1120 coalescedRemoveLen = action.lenData;
1122 prevRemoveActionPos = action.position;
1123 prevRemoveActionLen = action.lenData;
1124 } else if (action.at == insertAction) {
1125 modFlags |= SC_MOD_DELETETEXT;
1126 coalescedRemovePos = -1;
1127 coalescedRemoveLen = 0;
1128 prevRemoveActionPos = -1;
1129 prevRemoveActionLen = 0;
1131 if (steps > 1)
1132 modFlags |= SC_MULTISTEPUNDOREDO;
1133 const int linesAdded = LinesTotal() - prevLinesTotal;
1134 if (linesAdded != 0)
1135 multiLine = true;
1136 if (step == steps - 1) {
1137 modFlags |= SC_LASTSTEPINUNDOREDO;
1138 if (multiLine)
1139 modFlags |= SC_MULTILINEUNDOREDO;
1141 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1142 linesAdded, action.data));
1145 bool endSavePoint = cb.IsSavePoint();
1146 if (startSavePoint != endSavePoint)
1147 NotifySavePoint(endSavePoint);
1149 enteredModification--;
1151 return newPos;
1154 int Document::Redo() {
1155 int newPos = -1;
1156 CheckReadOnly();
1157 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1158 enteredModification++;
1159 if (!cb.IsReadOnly()) {
1160 bool startSavePoint = cb.IsSavePoint();
1161 bool multiLine = false;
1162 int steps = cb.StartRedo();
1163 for (int step = 0; step < steps; step++) {
1164 const int prevLinesTotal = LinesTotal();
1165 const Action &action = cb.GetRedoStep();
1166 if (action.at == insertAction) {
1167 NotifyModified(DocModification(
1168 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1169 } else if (action.at == containerAction) {
1170 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1171 dm.token = action.position;
1172 NotifyModified(dm);
1173 } else {
1174 NotifyModified(DocModification(
1175 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1177 cb.PerformRedoStep();
1178 if (action.at != containerAction) {
1179 ModifiedAt(action.position);
1180 newPos = action.position;
1183 int modFlags = SC_PERFORMED_REDO;
1184 if (action.at == insertAction) {
1185 newPos += action.lenData;
1186 modFlags |= SC_MOD_INSERTTEXT;
1187 } else if (action.at == removeAction) {
1188 modFlags |= SC_MOD_DELETETEXT;
1190 if (steps > 1)
1191 modFlags |= SC_MULTISTEPUNDOREDO;
1192 const int linesAdded = LinesTotal() - prevLinesTotal;
1193 if (linesAdded != 0)
1194 multiLine = true;
1195 if (step == steps - 1) {
1196 modFlags |= SC_LASTSTEPINUNDOREDO;
1197 if (multiLine)
1198 modFlags |= SC_MULTILINEUNDOREDO;
1200 NotifyModified(
1201 DocModification(modFlags, action.position, action.lenData,
1202 linesAdded, action.data));
1205 bool endSavePoint = cb.IsSavePoint();
1206 if (startSavePoint != endSavePoint)
1207 NotifySavePoint(endSavePoint);
1209 enteredModification--;
1211 return newPos;
1214 void Document::DelChar(int pos) {
1215 DeleteChars(pos, LenChar(pos));
1218 void Document::DelCharBack(int pos) {
1219 if (pos <= 0) {
1220 return;
1221 } else if (IsCrLf(pos - 2)) {
1222 DeleteChars(pos - 2, 2);
1223 } else if (dbcsCodePage) {
1224 int startChar = NextPosition(pos, -1);
1225 DeleteChars(startChar, pos - startChar);
1226 } else {
1227 DeleteChars(pos - 1, 1);
1231 static int NextTab(int pos, int tabSize) {
1232 return ((pos / tabSize) + 1) * tabSize;
1235 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1236 std::string indentation;
1237 if (!insertSpaces) {
1238 while (indent >= tabSize) {
1239 indentation += '\t';
1240 indent -= tabSize;
1243 while (indent > 0) {
1244 indentation += ' ';
1245 indent--;
1247 return indentation;
1250 int SCI_METHOD Document::GetLineIndentation(int line) {
1251 int indent = 0;
1252 if ((line >= 0) && (line < LinesTotal())) {
1253 int lineStart = LineStart(line);
1254 int length = Length();
1255 for (int i = lineStart; i < length; i++) {
1256 char ch = cb.CharAt(i);
1257 if (ch == ' ')
1258 indent++;
1259 else if (ch == '\t')
1260 indent = NextTab(indent, tabInChars);
1261 else
1262 return indent;
1265 return indent;
1268 int Document::SetLineIndentation(int line, int indent) {
1269 int indentOfLine = GetLineIndentation(line);
1270 if (indent < 0)
1271 indent = 0;
1272 if (indent != indentOfLine) {
1273 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1274 int thisLineStart = LineStart(line);
1275 int indentPos = GetLineIndentPosition(line);
1276 UndoGroup ug(this);
1277 DeleteChars(thisLineStart, indentPos - thisLineStart);
1278 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1279 static_cast<int>(linebuf.length()));
1280 } else {
1281 return GetLineIndentPosition(line);
1285 int Document::GetLineIndentPosition(int line) const {
1286 if (line < 0)
1287 return 0;
1288 int pos = LineStart(line);
1289 int length = Length();
1290 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1291 pos++;
1293 return pos;
1296 int Document::GetColumn(int pos) {
1297 int column = 0;
1298 int line = LineFromPosition(pos);
1299 if ((line >= 0) && (line < LinesTotal())) {
1300 for (int i = LineStart(line); i < pos;) {
1301 char ch = cb.CharAt(i);
1302 if (ch == '\t') {
1303 column = NextTab(column, tabInChars);
1304 i++;
1305 } else if (ch == '\r') {
1306 return column;
1307 } else if (ch == '\n') {
1308 return column;
1309 } else if (i >= Length()) {
1310 return column;
1311 } else {
1312 column++;
1313 i = NextPosition(i, 1);
1317 return column;
1320 int Document::CountCharacters(int startPos, int endPos) const {
1321 startPos = MovePositionOutsideChar(startPos, 1, false);
1322 endPos = MovePositionOutsideChar(endPos, -1, false);
1323 int count = 0;
1324 int i = startPos;
1325 while (i < endPos) {
1326 count++;
1327 if (IsCrLf(i))
1328 i++;
1329 i = NextPosition(i, 1);
1331 return count;
1334 int Document::CountUTF16(int startPos, int endPos) const {
1335 startPos = MovePositionOutsideChar(startPos, 1, false);
1336 endPos = MovePositionOutsideChar(endPos, -1, false);
1337 int count = 0;
1338 int i = startPos;
1339 while (i < endPos) {
1340 count++;
1341 const int next = NextPosition(i, 1);
1342 if ((next - i) > 3)
1343 count++;
1344 i = next;
1346 return count;
1349 int Document::FindColumn(int line, int column) {
1350 int position = LineStart(line);
1351 if ((line >= 0) && (line < LinesTotal())) {
1352 int columnCurrent = 0;
1353 while ((columnCurrent < column) && (position < Length())) {
1354 char ch = cb.CharAt(position);
1355 if (ch == '\t') {
1356 columnCurrent = NextTab(columnCurrent, tabInChars);
1357 if (columnCurrent > column)
1358 return position;
1359 position++;
1360 } else if (ch == '\r') {
1361 return position;
1362 } else if (ch == '\n') {
1363 return position;
1364 } else {
1365 columnCurrent++;
1366 position = NextPosition(position, 1);
1370 return position;
1373 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1374 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1375 for (int line = lineBottom; line >= lineTop; line--) {
1376 int indentOfLine = GetLineIndentation(line);
1377 if (forwards) {
1378 if (LineStart(line) < LineEnd(line)) {
1379 SetLineIndentation(line, indentOfLine + IndentSize());
1381 } else {
1382 SetLineIndentation(line, indentOfLine - IndentSize());
1387 // Convert line endings for a piece of text to a particular mode.
1388 // Stop at len or when a NUL is found.
1389 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1390 std::string dest;
1391 for (size_t i = 0; (i < len) && (s[i]); i++) {
1392 if (s[i] == '\n' || s[i] == '\r') {
1393 if (eolModeWanted == SC_EOL_CR) {
1394 dest.push_back('\r');
1395 } else if (eolModeWanted == SC_EOL_LF) {
1396 dest.push_back('\n');
1397 } else { // eolModeWanted == SC_EOL_CRLF
1398 dest.push_back('\r');
1399 dest.push_back('\n');
1401 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1402 i++;
1404 } else {
1405 dest.push_back(s[i]);
1408 return dest;
1411 void Document::ConvertLineEnds(int eolModeSet) {
1412 UndoGroup ug(this);
1414 for (int pos = 0; pos < Length(); pos++) {
1415 if (cb.CharAt(pos) == '\r') {
1416 if (cb.CharAt(pos + 1) == '\n') {
1417 // CRLF
1418 if (eolModeSet == SC_EOL_CR) {
1419 DeleteChars(pos + 1, 1); // Delete the LF
1420 } else if (eolModeSet == SC_EOL_LF) {
1421 DeleteChars(pos, 1); // Delete the CR
1422 } else {
1423 pos++;
1425 } else {
1426 // CR
1427 if (eolModeSet == SC_EOL_CRLF) {
1428 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1429 } else if (eolModeSet == SC_EOL_LF) {
1430 pos += InsertString(pos, "\n", 1); // Insert LF
1431 DeleteChars(pos, 1); // Delete CR
1432 pos--;
1435 } else if (cb.CharAt(pos) == '\n') {
1436 // LF
1437 if (eolModeSet == SC_EOL_CRLF) {
1438 pos += InsertString(pos, "\r", 1); // Insert CR
1439 } else if (eolModeSet == SC_EOL_CR) {
1440 pos += InsertString(pos, "\r", 1); // Insert CR
1441 DeleteChars(pos, 1); // Delete LF
1442 pos--;
1449 bool Document::IsWhiteLine(int line) const {
1450 int currentChar = LineStart(line);
1451 int endLine = LineEnd(line);
1452 while (currentChar < endLine) {
1453 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1454 return false;
1456 ++currentChar;
1458 return true;
1461 int Document::ParaUp(int pos) const {
1462 int line = LineFromPosition(pos);
1463 line--;
1464 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1465 line--;
1467 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1468 line--;
1470 line++;
1471 return LineStart(line);
1474 int Document::ParaDown(int pos) const {
1475 int line = LineFromPosition(pos);
1476 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1477 line++;
1479 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1480 line++;
1482 if (line < LinesTotal())
1483 return LineStart(line);
1484 else // end of a document
1485 return LineEnd(line-1);
1488 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1489 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1490 return CharClassify::ccWord;
1491 return charClass.GetClass(ch);
1495 * Used by commmands that want to select whole words.
1496 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1498 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1499 CharClassify::cc ccStart = CharClassify::ccWord;
1500 if (delta < 0) {
1501 if (!onlyWordCharacters)
1502 ccStart = WordCharClass(cb.CharAt(pos-1));
1503 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1504 pos--;
1505 } else {
1506 if (!onlyWordCharacters && pos < Length())
1507 ccStart = WordCharClass(cb.CharAt(pos));
1508 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1509 pos++;
1511 return MovePositionOutsideChar(pos, delta, true);
1515 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1516 * (delta < 0).
1517 * This is looking for a transition between character classes although there is also some
1518 * additional movement to transit white space.
1519 * Used by cursor movement by word commands.
1521 int Document::NextWordStart(int pos, int delta) {
1522 if (delta < 0) {
1523 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1524 pos--;
1525 if (pos > 0) {
1526 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1527 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1528 pos--;
1531 } else {
1532 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1533 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1534 pos++;
1535 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1536 pos++;
1538 return pos;
1542 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1543 * (delta < 0).
1544 * This is looking for a transition between character classes although there is also some
1545 * additional movement to transit white space.
1546 * Used by cursor movement by word commands.
1548 int Document::NextWordEnd(int pos, int delta) {
1549 if (delta < 0) {
1550 if (pos > 0) {
1551 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1552 if (ccStart != CharClassify::ccSpace) {
1553 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1554 pos--;
1557 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1558 pos--;
1561 } else {
1562 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1563 pos++;
1565 if (pos < Length()) {
1566 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1567 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1568 pos++;
1572 return pos;
1576 * Check that the character at the given position is a word or punctuation character and that
1577 * the previous character is of a different character class.
1579 bool Document::IsWordStartAt(int pos) const {
1580 if (pos > 0) {
1581 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1582 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1583 (ccPos != WordCharClass(CharAt(pos - 1)));
1585 return true;
1589 * Check that the character at the given position is a word or punctuation character and that
1590 * the next character is of a different character class.
1592 bool Document::IsWordEndAt(int pos) const {
1593 if (pos < Length()) {
1594 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1595 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1596 (ccPrev != WordCharClass(CharAt(pos)));
1598 return true;
1602 * Check that the given range is has transitions between character classes at both
1603 * ends and where the characters on the inside are word or punctuation characters.
1605 bool Document::IsWordAt(int start, int end) const {
1606 return IsWordStartAt(start) && IsWordEndAt(end);
1609 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1610 return (!word && !wordStart) ||
1611 (word && IsWordAt(pos, pos + length)) ||
1612 (wordStart && IsWordStartAt(pos));
1615 bool Document::HasCaseFolder(void) const {
1616 return pcf != 0;
1619 void Document::SetCaseFolder(CaseFolder *pcf_) {
1620 delete pcf;
1621 pcf = pcf_;
1624 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1625 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1626 if (UTF8IsAscii(leadByte)) {
1627 // Common case: ASCII character
1628 return CharacterExtracted(leadByte, 1);
1630 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1631 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1632 for (int b=1; b<widthCharBytes; b++)
1633 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1634 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1635 if (utf8status & UTF8MaskInvalid) {
1636 // Treat as invalid and use up just one byte
1637 return CharacterExtracted(unicodeReplacementChar, 1);
1638 } else {
1639 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1644 * Find text in document, supporting both forward and backward
1645 * searches (just pass minPos > maxPos to do a backward search)
1646 * Has not been tested with backwards DBCS searches yet.
1648 long Document::FindText(int minPos, int maxPos, const char *search,
1649 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1650 int *length) {
1651 if (*length <= 0)
1652 return minPos;
1653 if (regExp) {
1654 if (!regex)
1655 regex = CreateRegexSearch(&charClass);
1656 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1657 } else {
1659 const bool forward = minPos <= maxPos;
1660 const int increment = forward ? 1 : -1;
1662 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1663 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1664 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1666 // Compute actual search ranges needed
1667 const int lengthFind = *length;
1669 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1670 const int limitPos = Platform::Maximum(startPos, endPos);
1671 int pos = startPos;
1672 if (!forward) {
1673 // Back all of a character
1674 pos = NextPosition(pos, increment);
1676 if (caseSensitive) {
1677 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1678 const char charStartSearch = search[0];
1679 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1680 if (CharAt(pos) == charStartSearch) {
1681 bool found = (pos + lengthFind) <= limitPos;
1682 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1683 found = CharAt(pos + indexSearch) == search[indexSearch];
1685 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1686 return pos;
1689 if (!NextCharacter(pos, increment))
1690 break;
1692 } else if (SC_CP_UTF8 == dbcsCodePage) {
1693 const size_t maxFoldingExpansion = 4;
1694 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1695 const int lenSearch = static_cast<int>(
1696 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1697 char bytes[UTF8MaxBytes + 1];
1698 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1699 while (forward ? (pos < endPos) : (pos >= endPos)) {
1700 int widthFirstCharacter = 0;
1701 int posIndexDocument = pos;
1702 int indexSearch = 0;
1703 bool characterMatches = true;
1704 for (;;) {
1705 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1706 bytes[0] = leadByte;
1707 int widthChar = 1;
1708 if (!UTF8IsAscii(leadByte)) {
1709 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1710 for (int b=1; b<widthCharBytes; b++) {
1711 bytes[b] = cb.CharAt(posIndexDocument+b);
1713 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1715 if (!widthFirstCharacter)
1716 widthFirstCharacter = widthChar;
1717 if ((posIndexDocument + widthChar) > limitPos)
1718 break;
1719 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1720 folded[lenFlat] = 0;
1721 // Does folded match the buffer
1722 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1723 if (!characterMatches)
1724 break;
1725 posIndexDocument += widthChar;
1726 indexSearch += lenFlat;
1727 if (indexSearch >= lenSearch)
1728 break;
1730 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1731 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1732 *length = posIndexDocument - pos;
1733 return pos;
1736 if (forward) {
1737 pos += widthFirstCharacter;
1738 } else {
1739 if (!NextCharacter(pos, increment))
1740 break;
1743 } else if (dbcsCodePage) {
1744 const size_t maxBytesCharacter = 2;
1745 const size_t maxFoldingExpansion = 4;
1746 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1747 const int lenSearch = static_cast<int>(
1748 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1749 while (forward ? (pos < endPos) : (pos >= endPos)) {
1750 int indexDocument = 0;
1751 int indexSearch = 0;
1752 bool characterMatches = true;
1753 while (characterMatches &&
1754 ((pos + indexDocument) < limitPos) &&
1755 (indexSearch < lenSearch)) {
1756 char bytes[maxBytesCharacter + 1];
1757 bytes[0] = cb.CharAt(pos + indexDocument);
1758 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1759 if (widthChar == 2)
1760 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1761 if ((pos + indexDocument + widthChar) > limitPos)
1762 break;
1763 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1764 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1765 folded[lenFlat] = 0;
1766 // Does folded match the buffer
1767 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1768 indexDocument += widthChar;
1769 indexSearch += lenFlat;
1771 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1772 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1773 *length = indexDocument;
1774 return pos;
1777 if (!NextCharacter(pos, increment))
1778 break;
1780 } else {
1781 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1782 std::vector<char> searchThing(lengthFind + 1);
1783 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1784 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1785 bool found = (pos + lengthFind) <= limitPos;
1786 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1787 char ch = CharAt(pos + indexSearch);
1788 char folded[2];
1789 pcf->Fold(folded, sizeof(folded), &ch, 1);
1790 found = folded[0] == searchThing[indexSearch];
1792 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1793 return pos;
1795 if (!NextCharacter(pos, increment))
1796 break;
1800 //Platform::DebugPrintf("Not found\n");
1801 return -1;
1804 const char *Document::SubstituteByPosition(const char *text, int *length) {
1805 if (regex)
1806 return regex->SubstituteByPosition(this, text, length);
1807 else
1808 return 0;
1811 int Document::LinesTotal() const {
1812 return cb.Lines();
1815 void Document::SetDefaultCharClasses(bool includeWordClass) {
1816 charClass.SetDefaultCharClasses(includeWordClass);
1819 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1820 charClass.SetCharClasses(chars, newCharClass);
1823 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1824 return charClass.GetCharsOfClass(characterClass, buffer);
1827 void SCI_METHOD Document::StartStyling(int position, char) {
1828 endStyled = position;
1831 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1832 if (enteredStyling != 0) {
1833 return false;
1834 } else {
1835 enteredStyling++;
1836 int prevEndStyled = endStyled;
1837 if (cb.SetStyleFor(endStyled, length, style)) {
1838 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1839 prevEndStyled, length);
1840 NotifyModified(mh);
1842 endStyled += length;
1843 enteredStyling--;
1844 return true;
1848 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1849 if (enteredStyling != 0) {
1850 return false;
1851 } else {
1852 enteredStyling++;
1853 bool didChange = false;
1854 int startMod = 0;
1855 int endMod = 0;
1856 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1857 PLATFORM_ASSERT(endStyled < Length());
1858 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1859 if (!didChange) {
1860 startMod = endStyled;
1862 didChange = true;
1863 endMod = endStyled;
1866 if (didChange) {
1867 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1868 startMod, endMod - startMod + 1);
1869 NotifyModified(mh);
1871 enteredStyling--;
1872 return true;
1876 void Document::EnsureStyledTo(int pos) {
1877 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1878 IncrementStyleClock();
1879 if (pli && !pli->UseContainerLexing()) {
1880 int lineEndStyled = LineFromPosition(GetEndStyled());
1881 int endStyledTo = LineStart(lineEndStyled);
1882 pli->Colourise(endStyledTo, pos);
1883 } else {
1884 // Ask the watchers to style, and stop as soon as one responds.
1885 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1886 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1887 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1893 void Document::LexerChanged() {
1894 // Tell the watchers the lexer has changed.
1895 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1896 it->watcher->NotifyLexerChanged(this, it->userData);
1900 int SCI_METHOD Document::SetLineState(int line, int state) {
1901 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1902 if (state != statePrevious) {
1903 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1904 NotifyModified(mh);
1906 return statePrevious;
1909 int SCI_METHOD Document::GetLineState(int line) const {
1910 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1913 int Document::GetMaxLineState() {
1914 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1917 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1918 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1919 NotifyModified(mh);
1922 StyledText Document::MarginStyledText(int line) const {
1923 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1924 return StyledText(pla->Length(line), pla->Text(line),
1925 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1928 void Document::MarginSetText(int line, const char *text) {
1929 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1930 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1931 NotifyModified(mh);
1934 void Document::MarginSetStyle(int line, int style) {
1935 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1936 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1939 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1940 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1941 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1944 void Document::MarginClearAll() {
1945 int maxEditorLine = LinesTotal();
1946 for (int l=0; l<maxEditorLine; l++)
1947 MarginSetText(l, 0);
1948 // Free remaining data
1949 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1952 StyledText Document::AnnotationStyledText(int line) const {
1953 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1954 return StyledText(pla->Length(line), pla->Text(line),
1955 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1958 void Document::AnnotationSetText(int line, const char *text) {
1959 if (line >= 0 && line < LinesTotal()) {
1960 const int linesBefore = AnnotationLines(line);
1961 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1962 const int linesAfter = AnnotationLines(line);
1963 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1964 mh.annotationLinesAdded = linesAfter - linesBefore;
1965 NotifyModified(mh);
1969 void Document::AnnotationSetStyle(int line, int style) {
1970 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1971 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1972 NotifyModified(mh);
1975 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1976 if (line >= 0 && line < LinesTotal()) {
1977 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1981 int Document::AnnotationLines(int line) const {
1982 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1985 void Document::AnnotationClearAll() {
1986 int maxEditorLine = LinesTotal();
1987 for (int l=0; l<maxEditorLine; l++)
1988 AnnotationSetText(l, 0);
1989 // Free remaining data
1990 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1993 void Document::IncrementStyleClock() {
1994 styleClock = (styleClock + 1) % 0x100000;
1997 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1998 if (decorations.FillRange(position, value, fillLength)) {
1999 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2000 position, fillLength);
2001 NotifyModified(mh);
2005 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2006 WatcherWithUserData wwud(watcher, userData);
2007 std::vector<WatcherWithUserData>::iterator it =
2008 std::find(watchers.begin(), watchers.end(), wwud);
2009 if (it != watchers.end())
2010 return false;
2011 watchers.push_back(wwud);
2012 return true;
2015 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2016 std::vector<WatcherWithUserData>::iterator it =
2017 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2018 if (it != watchers.end()) {
2019 watchers.erase(it);
2020 return true;
2022 return false;
2025 void Document::NotifyModifyAttempt() {
2026 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2027 it->watcher->NotifyModifyAttempt(this, it->userData);
2031 void Document::NotifySavePoint(bool atSavePoint) {
2032 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2033 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2037 void Document::NotifyModified(DocModification mh) {
2038 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2039 decorations.InsertSpace(mh.position, mh.length);
2040 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2041 decorations.DeleteRange(mh.position, mh.length);
2043 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2044 it->watcher->NotifyModified(this, mh, it->userData);
2048 bool Document::IsWordPartSeparator(char ch) const {
2049 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2052 int Document::WordPartLeft(int pos) {
2053 if (pos > 0) {
2054 --pos;
2055 char startChar = cb.CharAt(pos);
2056 if (IsWordPartSeparator(startChar)) {
2057 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2058 --pos;
2061 if (pos > 0) {
2062 startChar = cb.CharAt(pos);
2063 --pos;
2064 if (IsLowerCase(startChar)) {
2065 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2066 --pos;
2067 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2068 ++pos;
2069 } else if (IsUpperCase(startChar)) {
2070 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2071 --pos;
2072 if (!IsUpperCase(cb.CharAt(pos)))
2073 ++pos;
2074 } else if (IsADigit(startChar)) {
2075 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2076 --pos;
2077 if (!IsADigit(cb.CharAt(pos)))
2078 ++pos;
2079 } else if (IsPunctuation(startChar)) {
2080 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2081 --pos;
2082 if (!IsPunctuation(cb.CharAt(pos)))
2083 ++pos;
2084 } else if (isspacechar(startChar)) {
2085 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2086 --pos;
2087 if (!isspacechar(cb.CharAt(pos)))
2088 ++pos;
2089 } else if (!IsASCII(startChar)) {
2090 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2091 --pos;
2092 if (IsASCII(cb.CharAt(pos)))
2093 ++pos;
2094 } else {
2095 ++pos;
2099 return pos;
2102 int Document::WordPartRight(int pos) {
2103 char startChar = cb.CharAt(pos);
2104 int length = Length();
2105 if (IsWordPartSeparator(startChar)) {
2106 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2107 ++pos;
2108 startChar = cb.CharAt(pos);
2110 if (!IsASCII(startChar)) {
2111 while (pos < length && !IsASCII(cb.CharAt(pos)))
2112 ++pos;
2113 } else if (IsLowerCase(startChar)) {
2114 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2115 ++pos;
2116 } else if (IsUpperCase(startChar)) {
2117 if (IsLowerCase(cb.CharAt(pos + 1))) {
2118 ++pos;
2119 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2120 ++pos;
2121 } else {
2122 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2123 ++pos;
2125 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2126 --pos;
2127 } else if (IsADigit(startChar)) {
2128 while (pos < length && IsADigit(cb.CharAt(pos)))
2129 ++pos;
2130 } else if (IsPunctuation(startChar)) {
2131 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2132 ++pos;
2133 } else if (isspacechar(startChar)) {
2134 while (pos < length && isspacechar(cb.CharAt(pos)))
2135 ++pos;
2136 } else {
2137 ++pos;
2139 return pos;
2142 bool IsLineEndChar(char c) {
2143 return (c == '\n' || c == '\r');
2146 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2147 int sStart = cb.StyleAt(pos);
2148 if (delta < 0) {
2149 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2150 pos--;
2151 pos++;
2152 } else {
2153 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2154 pos++;
2156 return pos;
2159 static char BraceOpposite(char ch) {
2160 switch (ch) {
2161 case '(':
2162 return ')';
2163 case ')':
2164 return '(';
2165 case '[':
2166 return ']';
2167 case ']':
2168 return '[';
2169 case '{':
2170 return '}';
2171 case '}':
2172 return '{';
2173 case '<':
2174 return '>';
2175 case '>':
2176 return '<';
2177 default:
2178 return '\0';
2182 // TODO: should be able to extend styled region to find matching brace
2183 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2184 char chBrace = CharAt(position);
2185 char chSeek = BraceOpposite(chBrace);
2186 if (chSeek == '\0')
2187 return - 1;
2188 char styBrace = static_cast<char>(StyleAt(position));
2189 int direction = -1;
2190 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2191 direction = 1;
2192 int depth = 1;
2193 position = NextPosition(position, direction);
2194 while ((position >= 0) && (position < Length())) {
2195 char chAtPos = CharAt(position);
2196 char styAtPos = static_cast<char>(StyleAt(position));
2197 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2198 if (chAtPos == chBrace)
2199 depth++;
2200 if (chAtPos == chSeek)
2201 depth--;
2202 if (depth == 0)
2203 return position;
2205 int positionBeforeMove = position;
2206 position = NextPosition(position, direction);
2207 if (position == positionBeforeMove)
2208 break;
2210 return - 1;
2214 * Implementation of RegexSearchBase for the default built-in regular expression engine
2216 class BuiltinRegex : public RegexSearchBase {
2217 public:
2218 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2220 virtual ~BuiltinRegex() {
2223 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2224 bool caseSensitive, bool word, bool wordStart, int flags,
2225 int *length);
2227 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2229 private:
2230 RESearch search;
2231 std::string substituted;
2234 namespace {
2237 * RESearchRange keeps track of search range.
2239 class RESearchRange {
2240 public:
2241 const Document *doc;
2242 int increment;
2243 int startPos;
2244 int endPos;
2245 int lineRangeStart;
2246 int lineRangeEnd;
2247 int lineRangeBreak;
2248 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2249 increment = (minPos <= maxPos) ? 1 : -1;
2251 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2252 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2253 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2255 lineRangeStart = doc->LineFromPosition(startPos);
2256 lineRangeEnd = doc->LineFromPosition(endPos);
2257 if ((increment == 1) &&
2258 (startPos >= doc->LineEnd(lineRangeStart)) &&
2259 (lineRangeStart < lineRangeEnd)) {
2260 // the start position is at end of line or between line end characters.
2261 lineRangeStart++;
2262 startPos = doc->LineStart(lineRangeStart);
2263 } else if ((increment == -1) &&
2264 (startPos <= doc->LineStart(lineRangeStart)) &&
2265 (lineRangeStart > lineRangeEnd)) {
2266 // the start position is at beginning of line.
2267 lineRangeStart--;
2268 startPos = doc->LineEnd(lineRangeStart);
2270 lineRangeBreak = lineRangeEnd + increment;
2272 Range LineRange(int line) const {
2273 Range range(doc->LineStart(line), doc->LineEnd(line));
2274 if (increment == 1) {
2275 if (line == lineRangeStart)
2276 range.start = startPos;
2277 if (line == lineRangeEnd)
2278 range.end = endPos;
2279 } else {
2280 if (line == lineRangeEnd)
2281 range.start = endPos;
2282 if (line == lineRangeStart)
2283 range.end = startPos;
2285 return range;
2289 // Define a way for the Regular Expression code to access the document
2290 class DocumentIndexer : public CharacterIndexer {
2291 Document *pdoc;
2292 int end;
2293 public:
2294 DocumentIndexer(Document *pdoc_, int end_) :
2295 pdoc(pdoc_), end(end_) {
2298 virtual ~DocumentIndexer() {
2301 virtual char CharAt(int index) {
2302 if (index < 0 || index >= end)
2303 return 0;
2304 else
2305 return pdoc->CharAt(index);
2309 #ifdef CXX11_REGEX
2311 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2312 public:
2313 const Document *doc;
2314 Position position;
2315 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2317 ByteIterator(const ByteIterator &other) {
2318 doc = other.doc;
2319 position = other.position;
2321 ByteIterator &operator=(const ByteIterator &other) {
2322 if (this != &other) {
2323 doc = other.doc;
2324 position = other.position;
2326 return *this;
2328 char operator*() const {
2329 return doc->CharAt(position);
2331 ByteIterator &operator++() {
2332 position++;
2333 return *this;
2335 ByteIterator operator++(int) {
2336 ByteIterator retVal(*this);
2337 position++;
2338 return retVal;
2340 ByteIterator &operator--() {
2341 position--;
2342 return *this;
2344 bool operator==(const ByteIterator &other) const {
2345 return doc == other.doc && position == other.position;
2347 bool operator!=(const ByteIterator &other) const {
2348 return doc != other.doc || position != other.position;
2350 int Pos() const {
2351 return position;
2353 int PosRoundUp() const {
2354 return position;
2358 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2359 // Would be better to use sizeof(wchar_t) or similar to differentiate
2360 // but easier for now to hard-code platforms.
2361 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2362 // appear to allow specializing basic_regex over these.
2364 #ifdef _WIN32
2365 #define WCHAR_T_IS_16 1
2366 #else
2367 #define WCHAR_T_IS_16 0
2368 #endif
2370 #if WCHAR_T_IS_16
2372 // On Windows, report non-BMP characters as 2 separate surrogates as that
2373 // matches wregex since it is based on wchar_t.
2374 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2375 // These 3 fields determine the iterator position and are used for comparisons
2376 const Document *doc;
2377 Position position;
2378 size_t characterIndex;
2379 // Remaining fields are derived from the determining fields so are excluded in comparisons
2380 unsigned int lenBytes;
2381 size_t lenCharacters;
2382 wchar_t buffered[2];
2383 public:
2384 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2385 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2386 buffered[0] = 0;
2387 buffered[1] = 0;
2388 if (doc) {
2389 ReadCharacter();
2392 UTF8Iterator(const UTF8Iterator &other) {
2393 doc = other.doc;
2394 position = other.position;
2395 characterIndex = other.characterIndex;
2396 lenBytes = other.lenBytes;
2397 lenCharacters = other.lenCharacters;
2398 buffered[0] = other.buffered[0];
2399 buffered[1] = other.buffered[1];
2401 UTF8Iterator &operator=(const UTF8Iterator &other) {
2402 if (this != &other) {
2403 doc = other.doc;
2404 position = other.position;
2405 characterIndex = other.characterIndex;
2406 lenBytes = other.lenBytes;
2407 lenCharacters = other.lenCharacters;
2408 buffered[0] = other.buffered[0];
2409 buffered[1] = other.buffered[1];
2411 return *this;
2413 wchar_t operator*() const {
2414 assert(lenCharacters != 0);
2415 return buffered[characterIndex];
2417 UTF8Iterator &operator++() {
2418 if ((characterIndex + 1) < (lenCharacters)) {
2419 characterIndex++;
2420 } else {
2421 position += lenBytes;
2422 ReadCharacter();
2423 characterIndex = 0;
2425 return *this;
2427 UTF8Iterator operator++(int) {
2428 UTF8Iterator retVal(*this);
2429 if ((characterIndex + 1) < (lenCharacters)) {
2430 characterIndex++;
2431 } else {
2432 position += lenBytes;
2433 ReadCharacter();
2434 characterIndex = 0;
2436 return retVal;
2438 UTF8Iterator &operator--() {
2439 if (characterIndex) {
2440 characterIndex--;
2441 } else {
2442 position = doc->NextPosition(position, -1);
2443 ReadCharacter();
2444 characterIndex = lenCharacters - 1;
2446 return *this;
2448 bool operator==(const UTF8Iterator &other) const {
2449 // Only test the determining fields, not the character widths and values derived from this
2450 return doc == other.doc &&
2451 position == other.position &&
2452 characterIndex == other.characterIndex;
2454 bool operator!=(const UTF8Iterator &other) const {
2455 // Only test the determining fields, not the character widths and values derived from this
2456 return doc != other.doc ||
2457 position != other.position ||
2458 characterIndex != other.characterIndex;
2460 int Pos() const {
2461 return position;
2463 int PosRoundUp() const {
2464 if (characterIndex)
2465 return position + lenBytes; // Force to end of character
2466 else
2467 return position;
2469 private:
2470 void ReadCharacter() {
2471 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2472 lenBytes = charExtracted.widthBytes;
2473 if (charExtracted.character == unicodeReplacementChar) {
2474 lenCharacters = 1;
2475 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2476 } else {
2477 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2482 #else
2484 // On Unix, report non-BMP characters as single characters
2486 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2487 const Document *doc;
2488 Position position;
2489 public:
2490 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2492 UTF8Iterator(const UTF8Iterator &other) {
2493 doc = other.doc;
2494 position = other.position;
2496 UTF8Iterator &operator=(const UTF8Iterator &other) {
2497 if (this != &other) {
2498 doc = other.doc;
2499 position = other.position;
2501 return *this;
2503 wchar_t operator*() const {
2504 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2505 return charExtracted.character;
2507 UTF8Iterator &operator++() {
2508 position = doc->NextPosition(position, 1);
2509 return *this;
2511 UTF8Iterator operator++(int) {
2512 UTF8Iterator retVal(*this);
2513 position = doc->NextPosition(position, 1);
2514 return retVal;
2516 UTF8Iterator &operator--() {
2517 position = doc->NextPosition(position, -1);
2518 return *this;
2520 bool operator==(const UTF8Iterator &other) const {
2521 return doc == other.doc && position == other.position;
2523 bool operator!=(const UTF8Iterator &other) const {
2524 return doc != other.doc || position != other.position;
2526 int Pos() const {
2527 return position;
2529 int PosRoundUp() const {
2530 return position;
2534 #endif
2536 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2537 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2538 if (!doc->IsLineStartPosition(startPos))
2539 flagsMatch |= std::regex_constants::match_not_bol;
2540 if (!doc->IsLineEndPosition(endPos))
2541 flagsMatch |= std::regex_constants::match_not_eol;
2542 return flagsMatch;
2545 template<typename Iterator, typename Regex>
2546 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2547 bool matched = false;
2548 std::match_results<Iterator> match;
2550 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2551 // If they didn't then the line by line iteration could be removed for the forwards
2552 // case and replaced with the following 4 lines:
2553 // Iterator uiStart(doc, startPos);
2554 // Iterator uiEnd(doc, endPos);
2555 // flagsMatch = MatchFlags(doc, startPos, endPos);
2556 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2558 // Line by line.
2559 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2560 const Range lineRange = resr.LineRange(line);
2561 Iterator itStart(doc, lineRange.start);
2562 Iterator itEnd(doc, lineRange.end);
2563 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2564 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2565 // Check for the last match on this line.
2566 if (matched) {
2567 if (resr.increment == -1) {
2568 while (matched) {
2569 Iterator itNext(doc, match[0].second.PosRoundUp());
2570 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2571 std::match_results<Iterator> matchNext;
2572 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2573 if (matched) {
2574 if (match[0].first == match[0].second) {
2575 // Empty match means failure so exit
2576 return false;
2578 match = matchNext;
2581 matched = true;
2583 break;
2586 if (matched) {
2587 for (size_t co = 0; co < match.size(); co++) {
2588 search.bopat[co] = match[co].first.Pos();
2589 search.eopat[co] = match[co].second.PosRoundUp();
2590 size_t lenMatch = search.eopat[co] - search.bopat[co];
2591 search.pat[co].resize(lenMatch);
2592 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2593 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2597 return matched;
2600 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2601 bool caseSensitive, int *length, RESearch &search) {
2602 const RESearchRange resr(doc, minPos, maxPos);
2603 try {
2604 //ElapsedTime et;
2605 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2606 // Flags that apper to have no effect:
2607 // | std::regex::collate | std::regex::extended;
2608 if (!caseSensitive)
2609 flagsRe = flagsRe | std::regex::icase;
2611 // Clear the RESearch so can fill in matches
2612 search.Clear();
2614 bool matched = false;
2615 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2616 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2617 std::vector<wchar_t> ws(lenS + 1);
2618 #if WCHAR_T_IS_16
2619 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2620 #else
2621 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2622 #endif
2623 ws[outLen] = 0;
2624 std::wregex regexp;
2625 #if defined(__APPLE__)
2626 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2627 // is one byte not one character.
2628 // However, on OS X this makes wregex act as Unicode
2629 std::locale localeU("en_US.UTF-8");
2630 regexp.imbue(localeU);
2631 #endif
2632 regexp.assign(&ws[0], flagsRe);
2633 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2635 } else {
2636 std::regex regexp;
2637 regexp.assign(s, flagsRe);
2638 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2641 int posMatch = -1;
2642 if (matched) {
2643 posMatch = search.bopat[0];
2644 *length = search.eopat[0] - search.bopat[0];
2646 // Example - search in doc/ScintillaHistory.html for
2647 // [[:upper:]]eta[[:space:]]
2648 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2649 //double durSearch = et.Duration(true);
2650 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2651 return posMatch;
2652 } catch (std::regex_error &) {
2653 // Failed to create regular expression
2654 throw RegexError();
2655 } catch (...) {
2656 // Failed in some other way
2657 return -1;
2661 #endif
2665 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2666 bool caseSensitive, bool, bool, int flags,
2667 int *length) {
2669 #ifdef CXX11_REGEX
2670 if (flags & SCFIND_CXX11REGEX) {
2671 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2672 caseSensitive, length, search);
2674 #endif
2676 const RESearchRange resr(doc, minPos, maxPos);
2678 const bool posix = (flags & SCFIND_POSIX) != 0;
2680 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2681 if (errmsg) {
2682 return -1;
2684 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2685 // Replace first '.' with '-' in each property file variable reference:
2686 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2687 // Replace: $(\1-\2)
2688 int pos = -1;
2689 int lenRet = 0;
2690 const char searchEnd = s[*length - 1];
2691 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2692 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2693 int startOfLine = doc->LineStart(line);
2694 int endOfLine = doc->LineEnd(line);
2695 if (resr.increment == 1) {
2696 if (line == resr.lineRangeStart) {
2697 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2698 continue; // Can't match start of line if start position after start of line
2699 startOfLine = resr.startPos;
2701 if (line == resr.lineRangeEnd) {
2702 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2703 continue; // Can't match end of line if end position before end of line
2704 endOfLine = resr.endPos;
2706 } else {
2707 if (line == resr.lineRangeEnd) {
2708 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2709 continue; // Can't match start of line if end position after start of line
2710 startOfLine = resr.endPos;
2712 if (line == resr.lineRangeStart) {
2713 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2714 continue; // Can't match end of line if start position before end of line
2715 endOfLine = resr.startPos;
2719 DocumentIndexer di(doc, endOfLine);
2720 int success = search.Execute(di, startOfLine, endOfLine);
2721 if (success) {
2722 pos = search.bopat[0];
2723 // Ensure only whole characters selected
2724 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2725 lenRet = search.eopat[0] - search.bopat[0];
2726 // There can be only one start of a line, so no need to look for last match in line
2727 if ((resr.increment == -1) && (s[0] != '^')) {
2728 // Check for the last match on this line.
2729 int repetitions = 1000; // Break out of infinite loop
2730 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2731 success = search.Execute(di, pos+1, endOfLine);
2732 if (success) {
2733 if (search.eopat[0] <= minPos) {
2734 pos = search.bopat[0];
2735 lenRet = search.eopat[0] - search.bopat[0];
2736 } else {
2737 success = 0;
2742 break;
2745 *length = lenRet;
2746 return pos;
2749 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2750 substituted.clear();
2751 DocumentIndexer di(doc, doc->Length());
2752 search.GrabMatches(di);
2753 for (int j = 0; j < *length; j++) {
2754 if (text[j] == '\\') {
2755 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2756 unsigned int patNum = text[j + 1] - '0';
2757 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2758 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2759 substituted.append(search.pat[patNum].c_str(), len);
2760 j++;
2761 } else {
2762 j++;
2763 switch (text[j]) {
2764 case 'a':
2765 substituted.push_back('\a');
2766 break;
2767 case 'b':
2768 substituted.push_back('\b');
2769 break;
2770 case 'f':
2771 substituted.push_back('\f');
2772 break;
2773 case 'n':
2774 substituted.push_back('\n');
2775 break;
2776 case 'r':
2777 substituted.push_back('\r');
2778 break;
2779 case 't':
2780 substituted.push_back('\t');
2781 break;
2782 case 'v':
2783 substituted.push_back('\v');
2784 break;
2785 case '\\':
2786 substituted.push_back('\\');
2787 break;
2788 default:
2789 substituted.push_back('\\');
2790 j--;
2793 } else {
2794 substituted.push_back(text[j]);
2797 *length = static_cast<int>(substituted.length());
2798 return substituted.c_str();
2801 #ifndef SCI_OWNREGEX
2803 #ifdef SCI_NAMESPACE
2805 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2806 return new BuiltinRegex(charClassTable);
2809 #else
2811 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2812 return new BuiltinRegex(charClassTable);
2815 #endif
2817 #endif