Merge branch 'master' of https://github.com/konsolebox/geany into konsolebox-master
[geany-mirror.git] / scintilla / src / Document.cxx
blob9566641033e18e3d9af655ea9176ab1a58e77ff6
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "Position.h"
30 #include "SplitVector.h"
31 #include "Partitioning.h"
32 #include "RunStyles.h"
33 #include "CellBuffer.h"
34 #include "PerLine.h"
35 #include "CharClassify.h"
36 #include "Decoration.h"
37 #include "CaseFolder.h"
38 #include "Document.h"
39 #include "RESearch.h"
40 #include "UniConversion.h"
41 #include "UnicodeFromUTF8.h"
43 #ifdef SCI_NAMESPACE
44 using namespace Scintilla;
45 #endif
47 static inline bool IsPunctuation(char ch) {
48 return IsASCII(ch) && ispunct(ch);
51 void LexInterface::Colourise(int start, int end) {
52 if (pdoc && instance && !performingStyle) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle = true;
58 int lengthDoc = pdoc->Length();
59 if (end == -1)
60 end = lengthDoc;
61 int len = end - start;
63 PLATFORM_ASSERT(len >= 0);
64 PLATFORM_ASSERT(start + len <= lengthDoc);
66 int styleStart = 0;
67 if (start > 0)
68 styleStart = pdoc->StyleAt(start - 1);
70 if (len > 0) {
71 instance->Lex(start, len, styleStart, pdoc);
72 instance->Fold(start, len, styleStart, pdoc);
75 performingStyle = false;
79 int LexInterface::LineEndTypesSupported() {
80 if (instance) {
81 int interfaceVersion = instance->Version();
82 if (interfaceVersion >= lvSubStyles) {
83 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
84 return ssinstance->LineEndTypesSupported();
87 return 0;
90 Document::Document() {
91 refCount = 0;
92 pcf = NULL;
93 #ifdef _WIN32
94 eolMode = SC_EOL_CRLF;
95 #else
96 eolMode = SC_EOL_LF;
97 #endif
98 dbcsCodePage = 0;
99 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
100 endStyled = 0;
101 styleClock = 0;
102 enteredModification = 0;
103 enteredStyling = 0;
104 enteredReadOnlyCount = 0;
105 insertionSet = false;
106 tabInChars = 8;
107 indentInChars = 0;
108 actualIndentInChars = 8;
109 useTabs = true;
110 tabIndents = true;
111 backspaceUnindents = false;
113 matchesValid = false;
114 regex = 0;
116 UTF8BytesOfLeadInitialise();
118 perLineData[ldMarkers] = new LineMarkers();
119 perLineData[ldLevels] = new LineLevels();
120 perLineData[ldState] = new LineState();
121 perLineData[ldMargin] = new LineAnnotation();
122 perLineData[ldAnnotation] = new LineAnnotation();
124 cb.SetPerLine(this);
126 pli = 0;
129 Document::~Document() {
130 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
131 it->watcher->NotifyDeleted(this, it->userData);
133 for (int j=0; j<ldSize; j++) {
134 delete perLineData[j];
135 perLineData[j] = 0;
137 delete regex;
138 regex = 0;
139 delete pli;
140 pli = 0;
141 delete pcf;
142 pcf = 0;
145 void Document::Init() {
146 for (int j=0; j<ldSize; j++) {
147 if (perLineData[j])
148 perLineData[j]->Init();
152 int Document::LineEndTypesSupported() const {
153 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
154 return pli->LineEndTypesSupported();
155 else
156 return 0;
159 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
160 if (dbcsCodePage != dbcsCodePage_) {
161 dbcsCodePage = dbcsCodePage_;
162 SetCaseFolder(NULL);
163 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
164 return true;
165 } else {
166 return false;
170 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
171 if (lineEndBitSet != lineEndBitSet_) {
172 lineEndBitSet = lineEndBitSet_;
173 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
174 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
175 ModifiedAt(0);
176 cb.SetLineEndTypes(lineEndBitSetActive);
177 return true;
178 } else {
179 return false;
181 } else {
182 return false;
186 void Document::InsertLine(int line) {
187 for (int j=0; j<ldSize; j++) {
188 if (perLineData[j])
189 perLineData[j]->InsertLine(line);
193 void Document::RemoveLine(int line) {
194 for (int j=0; j<ldSize; j++) {
195 if (perLineData[j])
196 perLineData[j]->RemoveLine(line);
200 // Increase reference count and return its previous value.
201 int Document::AddRef() {
202 return refCount++;
205 // Decrease reference count and return its previous value.
206 // Delete the document if reference count reaches zero.
207 int SCI_METHOD Document::Release() {
208 int curRefCount = --refCount;
209 if (curRefCount == 0)
210 delete this;
211 return curRefCount;
214 void Document::SetSavePoint() {
215 cb.SetSavePoint();
216 NotifySavePoint(true);
219 void Document::TentativeUndo() {
220 if (!TentativeActive())
221 return;
222 CheckReadOnly();
223 if (enteredModification == 0) {
224 enteredModification++;
225 if (!cb.IsReadOnly()) {
226 bool startSavePoint = cb.IsSavePoint();
227 bool multiLine = false;
228 int steps = cb.TentativeSteps();
229 //Platform::DebugPrintf("Steps=%d\n", steps);
230 for (int step = 0; step < steps; step++) {
231 const int prevLinesTotal = LinesTotal();
232 const Action &action = cb.GetUndoStep();
233 if (action.at == removeAction) {
234 NotifyModified(DocModification(
235 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
236 } else if (action.at == containerAction) {
237 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
238 dm.token = action.position;
239 NotifyModified(dm);
240 } else {
241 NotifyModified(DocModification(
242 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
244 cb.PerformUndoStep();
245 if (action.at != containerAction) {
246 ModifiedAt(action.position);
249 int modFlags = SC_PERFORMED_UNDO;
250 // With undo, an insertion action becomes a deletion notification
251 if (action.at == removeAction) {
252 modFlags |= SC_MOD_INSERTTEXT;
253 } else if (action.at == insertAction) {
254 modFlags |= SC_MOD_DELETETEXT;
256 if (steps > 1)
257 modFlags |= SC_MULTISTEPUNDOREDO;
258 const int linesAdded = LinesTotal() - prevLinesTotal;
259 if (linesAdded != 0)
260 multiLine = true;
261 if (step == steps - 1) {
262 modFlags |= SC_LASTSTEPINUNDOREDO;
263 if (multiLine)
264 modFlags |= SC_MULTILINEUNDOREDO;
266 NotifyModified(DocModification(modFlags, action.position, action.lenData,
267 linesAdded, action.data));
270 bool endSavePoint = cb.IsSavePoint();
271 if (startSavePoint != endSavePoint)
272 NotifySavePoint(endSavePoint);
274 cb.TentativeCommit();
276 enteredModification--;
280 int Document::GetMark(int line) {
281 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
284 int Document::MarkerNext(int lineStart, int mask) const {
285 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
288 int Document::AddMark(int line, int markerNum) {
289 if (line >= 0 && line <= LinesTotal()) {
290 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
291 AddMark(line, markerNum, LinesTotal());
292 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
293 NotifyModified(mh);
294 return prev;
295 } else {
296 return 0;
300 void Document::AddMarkSet(int line, int valueSet) {
301 if (line < 0 || line > LinesTotal()) {
302 return;
304 unsigned int m = valueSet;
305 for (int i = 0; m; i++, m >>= 1)
306 if (m & 1)
307 static_cast<LineMarkers *>(perLineData[ldMarkers])->
308 AddMark(line, i, LinesTotal());
309 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
310 NotifyModified(mh);
313 void Document::DeleteMark(int line, int markerNum) {
314 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
315 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
316 NotifyModified(mh);
319 void Document::DeleteMarkFromHandle(int markerHandle) {
320 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
321 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
322 mh.line = -1;
323 NotifyModified(mh);
326 void Document::DeleteAllMarks(int markerNum) {
327 bool someChanges = false;
328 for (int line = 0; line < LinesTotal(); line++) {
329 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
330 someChanges = true;
332 if (someChanges) {
333 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
334 mh.line = -1;
335 NotifyModified(mh);
339 int Document::LineFromHandle(int markerHandle) {
340 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
343 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
344 return cb.LineStart(line);
347 bool Document::IsLineStartPosition(int position) const {
348 return LineStart(LineFromPosition(position)) == position;
351 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
352 if (line >= LinesTotal() - 1) {
353 return LineStart(line + 1);
354 } else {
355 int position = LineStart(line + 1);
356 if (SC_CP_UTF8 == dbcsCodePage) {
357 unsigned char bytes[] = {
358 static_cast<unsigned char>(cb.CharAt(position-3)),
359 static_cast<unsigned char>(cb.CharAt(position-2)),
360 static_cast<unsigned char>(cb.CharAt(position-1)),
362 if (UTF8IsSeparator(bytes)) {
363 return position - UTF8SeparatorLength;
365 if (UTF8IsNEL(bytes+1)) {
366 return position - UTF8NELLength;
369 position--; // Back over CR or LF
370 // When line terminator is CR+LF, may need to go back one more
371 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
372 position--;
374 return position;
378 void SCI_METHOD Document::SetErrorStatus(int status) {
379 // Tell the watchers an error has occurred.
380 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
381 it->watcher->NotifyErrorOccurred(this, it->userData, status);
385 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
386 return cb.LineFromPosition(pos);
389 int Document::LineEndPosition(int position) const {
390 return LineEnd(LineFromPosition(position));
393 bool Document::IsLineEndPosition(int position) const {
394 return LineEnd(LineFromPosition(position)) == position;
397 bool Document::IsPositionInLineEnd(int position) const {
398 return position >= LineEnd(LineFromPosition(position));
401 int Document::VCHomePosition(int position) const {
402 int line = LineFromPosition(position);
403 int startPosition = LineStart(line);
404 int endLine = LineEnd(line);
405 int startText = startPosition;
406 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
407 startText++;
408 if (position == startText)
409 return startPosition;
410 else
411 return startText;
414 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
415 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
416 if (prev != level) {
417 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
418 LineStart(line), 0, 0, 0, line);
419 mh.foldLevelNow = level;
420 mh.foldLevelPrev = prev;
421 NotifyModified(mh);
423 return prev;
426 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
427 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
430 void Document::ClearLevels() {
431 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
434 static bool IsSubordinate(int levelStart, int levelTry) {
435 if (levelTry & SC_FOLDLEVELWHITEFLAG)
436 return true;
437 else
438 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
441 int Document::GetLastChild(int lineParent, int level, int lastLine) {
442 if (level == -1)
443 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
444 int maxLine = LinesTotal();
445 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
446 int lineMaxSubord = lineParent;
447 while (lineMaxSubord < maxLine - 1) {
448 EnsureStyledTo(LineStart(lineMaxSubord + 2));
449 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
450 break;
451 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
452 break;
453 lineMaxSubord++;
455 if (lineMaxSubord > lineParent) {
456 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
457 // Have chewed up some whitespace that belongs to a parent so seek back
458 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
459 lineMaxSubord--;
463 return lineMaxSubord;
466 int Document::GetFoldParent(int line) const {
467 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
468 int lineLook = line - 1;
469 while ((lineLook > 0) && (
470 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
471 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
473 lineLook--;
475 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
476 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
477 return lineLook;
478 } else {
479 return -1;
483 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
484 int level = GetLevel(line);
485 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
487 int lookLine = line;
488 int lookLineLevel = level;
489 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
490 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
491 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
492 lookLineLevel = GetLevel(--lookLine);
493 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
496 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
497 if (beginFoldBlock == -1) {
498 highlightDelimiter.Clear();
499 return;
502 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
503 int firstChangeableLineBefore = -1;
504 if (endFoldBlock < line) {
505 lookLine = beginFoldBlock - 1;
506 lookLineLevel = GetLevel(lookLine);
507 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
508 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
509 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
510 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
511 beginFoldBlock = lookLine;
512 endFoldBlock = line;
513 firstChangeableLineBefore = line - 1;
516 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
517 break;
518 lookLineLevel = GetLevel(--lookLine);
519 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
522 if (firstChangeableLineBefore == -1) {
523 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
524 lookLine >= beginFoldBlock;
525 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
526 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
527 firstChangeableLineBefore = lookLine;
528 break;
532 if (firstChangeableLineBefore == -1)
533 firstChangeableLineBefore = beginFoldBlock - 1;
535 int firstChangeableLineAfter = -1;
536 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
537 lookLine <= endFoldBlock;
538 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
539 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
540 firstChangeableLineAfter = lookLine;
541 break;
544 if (firstChangeableLineAfter == -1)
545 firstChangeableLineAfter = endFoldBlock + 1;
547 highlightDelimiter.beginFoldBlock = beginFoldBlock;
548 highlightDelimiter.endFoldBlock = endFoldBlock;
549 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
550 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
553 int Document::ClampPositionIntoDocument(int pos) const {
554 return Platform::Clamp(pos, 0, Length());
557 bool Document::IsCrLf(int pos) const {
558 if (pos < 0)
559 return false;
560 if (pos >= (Length() - 1))
561 return false;
562 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
565 int Document::LenChar(int pos) {
566 if (pos < 0) {
567 return 1;
568 } else if (IsCrLf(pos)) {
569 return 2;
570 } else if (SC_CP_UTF8 == dbcsCodePage) {
571 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
572 const int widthCharBytes = UTF8BytesOfLead[leadByte];
573 int lengthDoc = Length();
574 if ((pos + widthCharBytes) > lengthDoc)
575 return lengthDoc - pos;
576 else
577 return widthCharBytes;
578 } else if (dbcsCodePage) {
579 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
580 } else {
581 return 1;
585 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
586 int trail = pos;
587 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
588 trail--;
589 start = (trail > 0) ? trail-1 : trail;
591 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
592 const int widthCharBytes = UTF8BytesOfLead[leadByte];
593 if (widthCharBytes == 1) {
594 return false;
595 } else {
596 int trailBytes = widthCharBytes - 1;
597 int len = pos - start;
598 if (len > trailBytes)
599 // pos too far from lead
600 return false;
601 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
602 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
603 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
604 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
605 if (utf8status & UTF8MaskInvalid)
606 return false;
607 end = start + widthCharBytes;
608 return true;
612 // Normalise a position so that it is not halfway through a two byte character.
613 // This can occur in two situations -
614 // When lines are terminated with \r\n pairs which should be treated as one character.
615 // When displaying DBCS text such as Japanese.
616 // If moving, move the position in the indicated direction.
617 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
618 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
619 // If out of range, just return minimum/maximum value.
620 if (pos <= 0)
621 return 0;
622 if (pos >= Length())
623 return Length();
625 // PLATFORM_ASSERT(pos > 0 && pos < Length());
626 if (checkLineEnd && IsCrLf(pos - 1)) {
627 if (moveDir > 0)
628 return pos + 1;
629 else
630 return pos - 1;
633 if (dbcsCodePage) {
634 if (SC_CP_UTF8 == dbcsCodePage) {
635 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
636 // If ch is not a trail byte then pos is valid intercharacter position
637 if (UTF8IsTrailByte(ch)) {
638 int startUTF = pos;
639 int endUTF = pos;
640 if (InGoodUTF8(pos, startUTF, endUTF)) {
641 // ch is a trail byte within a UTF-8 character
642 if (moveDir > 0)
643 pos = endUTF;
644 else
645 pos = startUTF;
647 // Else invalid UTF-8 so return position of isolated trail byte
649 } else {
650 // Anchor DBCS calculations at start of line because start of line can
651 // not be a DBCS trail byte.
652 int posStartLine = LineStart(LineFromPosition(pos));
653 if (pos == posStartLine)
654 return pos;
656 // Step back until a non-lead-byte is found.
657 int posCheck = pos;
658 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
659 posCheck--;
661 // Check from known start of character.
662 while (posCheck < pos) {
663 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
664 if (posCheck + mbsize == pos) {
665 return pos;
666 } else if (posCheck + mbsize > pos) {
667 if (moveDir > 0) {
668 return posCheck + mbsize;
669 } else {
670 return posCheck;
673 posCheck += mbsize;
678 return pos;
681 // NextPosition moves between valid positions - it can not handle a position in the middle of a
682 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
683 // A \r\n pair is treated as two characters.
684 int Document::NextPosition(int pos, int moveDir) const {
685 // If out of range, just return minimum/maximum value.
686 int increment = (moveDir > 0) ? 1 : -1;
687 if (pos + increment <= 0)
688 return 0;
689 if (pos + increment >= Length())
690 return Length();
692 if (dbcsCodePage) {
693 if (SC_CP_UTF8 == dbcsCodePage) {
694 if (increment == 1) {
695 // Simple forward movement case so can avoid some checks
696 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
697 if (UTF8IsAscii(leadByte)) {
698 // Single byte character or invalid
699 pos++;
700 } else {
701 const int widthCharBytes = UTF8BytesOfLead[leadByte];
702 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
703 for (int b=1; b<widthCharBytes; b++)
704 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
705 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
706 if (utf8status & UTF8MaskInvalid)
707 pos++;
708 else
709 pos += utf8status & UTF8MaskWidth;
711 } else {
712 // Examine byte before position
713 pos--;
714 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
715 // If ch is not a trail byte then pos is valid intercharacter position
716 if (UTF8IsTrailByte(ch)) {
717 // If ch is a trail byte in a valid UTF-8 character then return start of character
718 int startUTF = pos;
719 int endUTF = pos;
720 if (InGoodUTF8(pos, startUTF, endUTF)) {
721 pos = startUTF;
723 // Else invalid UTF-8 so return position of isolated trail byte
726 } else {
727 if (moveDir > 0) {
728 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
729 pos += mbsize;
730 if (pos > Length())
731 pos = Length();
732 } else {
733 // Anchor DBCS calculations at start of line because start of line can
734 // not be a DBCS trail byte.
735 int posStartLine = LineStart(LineFromPosition(pos));
736 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
737 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
738 if ((pos - 1) <= posStartLine) {
739 return pos - 1;
740 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
741 // Must actually be trail byte
742 return pos - 2;
743 } else {
744 // Otherwise, step back until a non-lead-byte is found.
745 int posTemp = pos - 1;
746 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
748 // Now posTemp+1 must point to the beginning of a character,
749 // so figure out whether we went back an even or an odd
750 // number of bytes and go back 1 or 2 bytes, respectively.
751 return (pos - 1 - ((pos - posTemp) & 1));
755 } else {
756 pos += increment;
759 return pos;
762 bool Document::NextCharacter(int &pos, int moveDir) const {
763 // Returns true if pos changed
764 int posNext = NextPosition(pos, moveDir);
765 if (posNext == pos) {
766 return false;
767 } else {
768 pos = posNext;
769 return true;
773 // Return -1 on out-of-bounds
774 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
775 int pos = positionStart;
776 if (dbcsCodePage) {
777 const int increment = (characterOffset > 0) ? 1 : -1;
778 while (characterOffset != 0) {
779 const int posNext = NextPosition(pos, increment);
780 if (posNext == pos)
781 return INVALID_POSITION;
782 pos = posNext;
783 characterOffset -= increment;
785 } else {
786 pos = positionStart + characterOffset;
787 if ((pos < 0) || (pos > Length()))
788 return INVALID_POSITION;
790 return pos;
793 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
794 int pos = positionStart;
795 if (dbcsCodePage) {
796 const int increment = (characterOffset > 0) ? 1 : -1;
797 while (characterOffset != 0) {
798 const int posNext = NextPosition(pos, increment);
799 if (posNext == pos)
800 return INVALID_POSITION;
801 if (abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
802 characterOffset -= increment;
803 pos = posNext;
804 characterOffset -= increment;
806 } else {
807 pos = positionStart + characterOffset;
808 if ((pos < 0) || (pos > Length()))
809 return INVALID_POSITION;
811 return pos;
814 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
815 int character;
816 int bytesInCharacter = 1;
817 if (dbcsCodePage) {
818 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
819 if (SC_CP_UTF8 == dbcsCodePage) {
820 if (UTF8IsAscii(leadByte)) {
821 // Single byte character or invalid
822 character = leadByte;
823 } else {
824 const int widthCharBytes = UTF8BytesOfLead[leadByte];
825 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
826 for (int b=1; b<widthCharBytes; b++)
827 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
828 int utf8status = UTF8Classify(charBytes, widthCharBytes);
829 if (utf8status & UTF8MaskInvalid) {
830 // Report as singleton surrogate values which are invalid Unicode
831 character = 0xDC80 + leadByte;
832 } else {
833 bytesInCharacter = utf8status & UTF8MaskWidth;
834 character = UnicodeFromUTF8(charBytes);
837 } else {
838 if (IsDBCSLeadByte(leadByte)) {
839 bytesInCharacter = 2;
840 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
841 } else {
842 character = leadByte;
845 } else {
846 character = cb.CharAt(position);
848 if (pWidth) {
849 *pWidth = bytesInCharacter;
851 return character;
854 int SCI_METHOD Document::CodePage() const {
855 return dbcsCodePage;
858 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
859 // Byte ranges found in Wikipedia articles with relevant search strings in each case
860 unsigned char uch = static_cast<unsigned char>(ch);
861 switch (dbcsCodePage) {
862 case 932:
863 // Shift_jis
864 return ((uch >= 0x81) && (uch <= 0x9F)) ||
865 ((uch >= 0xE0) && (uch <= 0xFC));
866 // Lead bytes F0 to FC may be a Microsoft addition.
867 case 936:
868 // GBK
869 return (uch >= 0x81) && (uch <= 0xFE);
870 case 949:
871 // Korean Wansung KS C-5601-1987
872 return (uch >= 0x81) && (uch <= 0xFE);
873 case 950:
874 // Big5
875 return (uch >= 0x81) && (uch <= 0xFE);
876 case 1361:
877 // Korean Johab KS C-5601-1992
878 return
879 ((uch >= 0x84) && (uch <= 0xD3)) ||
880 ((uch >= 0xD8) && (uch <= 0xDE)) ||
881 ((uch >= 0xE0) && (uch <= 0xF9));
883 return false;
886 static inline bool IsSpaceOrTab(int ch) {
887 return ch == ' ' || ch == '\t';
890 // Need to break text into segments near lengthSegment but taking into
891 // account the encoding to not break inside a UTF-8 or DBCS character
892 // and also trying to avoid breaking inside a pair of combining characters.
893 // The segment length must always be long enough (more than 4 bytes)
894 // so that there will be at least one whole character to make a segment.
895 // For UTF-8, text must consist only of valid whole characters.
896 // In preference order from best to worst:
897 // 1) Break after space
898 // 2) Break before punctuation
899 // 3) Break after whole character
901 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
902 if (length <= lengthSegment)
903 return length;
904 int lastSpaceBreak = -1;
905 int lastPunctuationBreak = -1;
906 int lastEncodingAllowedBreak = 0;
907 for (int j=0; j < lengthSegment;) {
908 unsigned char ch = static_cast<unsigned char>(text[j]);
909 if (j > 0) {
910 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
911 lastSpaceBreak = j;
913 if (ch < 'A') {
914 lastPunctuationBreak = j;
917 lastEncodingAllowedBreak = j;
919 if (dbcsCodePage == SC_CP_UTF8) {
920 j += UTF8BytesOfLead[ch];
921 } else if (dbcsCodePage) {
922 j += IsDBCSLeadByte(ch) ? 2 : 1;
923 } else {
924 j++;
927 if (lastSpaceBreak >= 0) {
928 return lastSpaceBreak;
929 } else if (lastPunctuationBreak >= 0) {
930 return lastPunctuationBreak;
932 return lastEncodingAllowedBreak;
935 EncodingFamily Document::CodePageFamily() const {
936 if (SC_CP_UTF8 == dbcsCodePage)
937 return efUnicode;
938 else if (dbcsCodePage)
939 return efDBCS;
940 else
941 return efEightBit;
944 void Document::ModifiedAt(int pos) {
945 if (endStyled > pos)
946 endStyled = pos;
949 void Document::CheckReadOnly() {
950 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
951 enteredReadOnlyCount++;
952 NotifyModifyAttempt();
953 enteredReadOnlyCount--;
957 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
958 // SetStyleAt does not change the persistent state of a document
960 bool Document::DeleteChars(int pos, int len) {
961 if (pos < 0)
962 return false;
963 if (len <= 0)
964 return false;
965 if ((pos + len) > Length())
966 return false;
967 CheckReadOnly();
968 if (enteredModification != 0) {
969 return false;
970 } else {
971 enteredModification++;
972 if (!cb.IsReadOnly()) {
973 NotifyModified(
974 DocModification(
975 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
976 pos, len,
977 0, 0));
978 int prevLinesTotal = LinesTotal();
979 bool startSavePoint = cb.IsSavePoint();
980 bool startSequence = false;
981 const char *text = cb.DeleteChars(pos, len, startSequence);
982 if (startSavePoint && cb.IsCollectingUndo())
983 NotifySavePoint(!startSavePoint);
984 if ((pos < Length()) || (pos == 0))
985 ModifiedAt(pos);
986 else
987 ModifiedAt(pos-1);
988 NotifyModified(
989 DocModification(
990 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
991 pos, len,
992 LinesTotal() - prevLinesTotal, text));
994 enteredModification--;
996 return !cb.IsReadOnly();
1000 * Insert a string with a length.
1002 int Document::InsertString(int position, const char *s, int insertLength) {
1003 if (insertLength <= 0) {
1004 return 0;
1006 CheckReadOnly(); // Application may change read only state here
1007 if (cb.IsReadOnly()) {
1008 return 0;
1010 if (enteredModification != 0) {
1011 return 0;
1013 enteredModification++;
1014 insertionSet = false;
1015 insertion.clear();
1016 NotifyModified(
1017 DocModification(
1018 SC_MOD_INSERTCHECK,
1019 position, insertLength,
1020 0, s));
1021 if (insertionSet) {
1022 s = insertion.c_str();
1023 insertLength = static_cast<int>(insertion.length());
1025 NotifyModified(
1026 DocModification(
1027 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1028 position, insertLength,
1029 0, s));
1030 int prevLinesTotal = LinesTotal();
1031 bool startSavePoint = cb.IsSavePoint();
1032 bool startSequence = false;
1033 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1034 if (startSavePoint && cb.IsCollectingUndo())
1035 NotifySavePoint(!startSavePoint);
1036 ModifiedAt(position);
1037 NotifyModified(
1038 DocModification(
1039 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1040 position, insertLength,
1041 LinesTotal() - prevLinesTotal, text));
1042 if (insertionSet) { // Free memory as could be large
1043 std::string().swap(insertion);
1045 enteredModification--;
1046 return insertLength;
1049 void Document::ChangeInsertion(const char *s, int length) {
1050 insertionSet = true;
1051 insertion.assign(s, length);
1054 int SCI_METHOD Document::AddData(char *data, Sci_Position length) {
1055 try {
1056 int position = Length();
1057 InsertString(position, data, length);
1058 } catch (std::bad_alloc &) {
1059 return SC_STATUS_BADALLOC;
1060 } catch (...) {
1061 return SC_STATUS_FAILURE;
1063 return 0;
1066 void * SCI_METHOD Document::ConvertToDocument() {
1067 return this;
1070 int Document::Undo() {
1071 int newPos = -1;
1072 CheckReadOnly();
1073 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1074 enteredModification++;
1075 if (!cb.IsReadOnly()) {
1076 bool startSavePoint = cb.IsSavePoint();
1077 bool multiLine = false;
1078 int steps = cb.StartUndo();
1079 //Platform::DebugPrintf("Steps=%d\n", steps);
1080 int coalescedRemovePos = -1;
1081 int coalescedRemoveLen = 0;
1082 int prevRemoveActionPos = -1;
1083 int prevRemoveActionLen = 0;
1084 for (int step = 0; step < steps; step++) {
1085 const int prevLinesTotal = LinesTotal();
1086 const Action &action = cb.GetUndoStep();
1087 if (action.at == removeAction) {
1088 NotifyModified(DocModification(
1089 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1090 } else if (action.at == containerAction) {
1091 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1092 dm.token = action.position;
1093 NotifyModified(dm);
1094 if (!action.mayCoalesce) {
1095 coalescedRemovePos = -1;
1096 coalescedRemoveLen = 0;
1097 prevRemoveActionPos = -1;
1098 prevRemoveActionLen = 0;
1100 } else {
1101 NotifyModified(DocModification(
1102 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1104 cb.PerformUndoStep();
1105 if (action.at != containerAction) {
1106 ModifiedAt(action.position);
1107 newPos = action.position;
1110 int modFlags = SC_PERFORMED_UNDO;
1111 // With undo, an insertion action becomes a deletion notification
1112 if (action.at == removeAction) {
1113 newPos += action.lenData;
1114 modFlags |= SC_MOD_INSERTTEXT;
1115 if ((coalescedRemoveLen > 0) &&
1116 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1117 coalescedRemoveLen += action.lenData;
1118 newPos = coalescedRemovePos + coalescedRemoveLen;
1119 } else {
1120 coalescedRemovePos = action.position;
1121 coalescedRemoveLen = action.lenData;
1123 prevRemoveActionPos = action.position;
1124 prevRemoveActionLen = action.lenData;
1125 } else if (action.at == insertAction) {
1126 modFlags |= SC_MOD_DELETETEXT;
1127 coalescedRemovePos = -1;
1128 coalescedRemoveLen = 0;
1129 prevRemoveActionPos = -1;
1130 prevRemoveActionLen = 0;
1132 if (steps > 1)
1133 modFlags |= SC_MULTISTEPUNDOREDO;
1134 const int linesAdded = LinesTotal() - prevLinesTotal;
1135 if (linesAdded != 0)
1136 multiLine = true;
1137 if (step == steps - 1) {
1138 modFlags |= SC_LASTSTEPINUNDOREDO;
1139 if (multiLine)
1140 modFlags |= SC_MULTILINEUNDOREDO;
1142 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1143 linesAdded, action.data));
1146 bool endSavePoint = cb.IsSavePoint();
1147 if (startSavePoint != endSavePoint)
1148 NotifySavePoint(endSavePoint);
1150 enteredModification--;
1152 return newPos;
1155 int Document::Redo() {
1156 int newPos = -1;
1157 CheckReadOnly();
1158 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1159 enteredModification++;
1160 if (!cb.IsReadOnly()) {
1161 bool startSavePoint = cb.IsSavePoint();
1162 bool multiLine = false;
1163 int steps = cb.StartRedo();
1164 for (int step = 0; step < steps; step++) {
1165 const int prevLinesTotal = LinesTotal();
1166 const Action &action = cb.GetRedoStep();
1167 if (action.at == insertAction) {
1168 NotifyModified(DocModification(
1169 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1170 } else if (action.at == containerAction) {
1171 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1172 dm.token = action.position;
1173 NotifyModified(dm);
1174 } else {
1175 NotifyModified(DocModification(
1176 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1178 cb.PerformRedoStep();
1179 if (action.at != containerAction) {
1180 ModifiedAt(action.position);
1181 newPos = action.position;
1184 int modFlags = SC_PERFORMED_REDO;
1185 if (action.at == insertAction) {
1186 newPos += action.lenData;
1187 modFlags |= SC_MOD_INSERTTEXT;
1188 } else if (action.at == removeAction) {
1189 modFlags |= SC_MOD_DELETETEXT;
1191 if (steps > 1)
1192 modFlags |= SC_MULTISTEPUNDOREDO;
1193 const int linesAdded = LinesTotal() - prevLinesTotal;
1194 if (linesAdded != 0)
1195 multiLine = true;
1196 if (step == steps - 1) {
1197 modFlags |= SC_LASTSTEPINUNDOREDO;
1198 if (multiLine)
1199 modFlags |= SC_MULTILINEUNDOREDO;
1201 NotifyModified(
1202 DocModification(modFlags, action.position, action.lenData,
1203 linesAdded, action.data));
1206 bool endSavePoint = cb.IsSavePoint();
1207 if (startSavePoint != endSavePoint)
1208 NotifySavePoint(endSavePoint);
1210 enteredModification--;
1212 return newPos;
1215 void Document::DelChar(int pos) {
1216 DeleteChars(pos, LenChar(pos));
1219 void Document::DelCharBack(int pos) {
1220 if (pos <= 0) {
1221 return;
1222 } else if (IsCrLf(pos - 2)) {
1223 DeleteChars(pos - 2, 2);
1224 } else if (dbcsCodePage) {
1225 int startChar = NextPosition(pos, -1);
1226 DeleteChars(startChar, pos - startChar);
1227 } else {
1228 DeleteChars(pos - 1, 1);
1232 static int NextTab(int pos, int tabSize) {
1233 return ((pos / tabSize) + 1) * tabSize;
1236 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1237 std::string indentation;
1238 if (!insertSpaces) {
1239 while (indent >= tabSize) {
1240 indentation += '\t';
1241 indent -= tabSize;
1244 while (indent > 0) {
1245 indentation += ' ';
1246 indent--;
1248 return indentation;
1251 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1252 int indent = 0;
1253 if ((line >= 0) && (line < LinesTotal())) {
1254 int lineStart = LineStart(line);
1255 int length = Length();
1256 for (int i = lineStart; i < length; i++) {
1257 char ch = cb.CharAt(i);
1258 if (ch == ' ')
1259 indent++;
1260 else if (ch == '\t')
1261 indent = NextTab(indent, tabInChars);
1262 else
1263 return indent;
1266 return indent;
1269 int Document::SetLineIndentation(int line, int indent) {
1270 int indentOfLine = GetLineIndentation(line);
1271 if (indent < 0)
1272 indent = 0;
1273 if (indent != indentOfLine) {
1274 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1275 int thisLineStart = LineStart(line);
1276 int indentPos = GetLineIndentPosition(line);
1277 UndoGroup ug(this);
1278 DeleteChars(thisLineStart, indentPos - thisLineStart);
1279 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1280 static_cast<int>(linebuf.length()));
1281 } else {
1282 return GetLineIndentPosition(line);
1286 int Document::GetLineIndentPosition(int line) const {
1287 if (line < 0)
1288 return 0;
1289 int pos = LineStart(line);
1290 int length = Length();
1291 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1292 pos++;
1294 return pos;
1297 int Document::GetColumn(int pos) {
1298 int column = 0;
1299 int line = LineFromPosition(pos);
1300 if ((line >= 0) && (line < LinesTotal())) {
1301 for (int i = LineStart(line); i < pos;) {
1302 char ch = cb.CharAt(i);
1303 if (ch == '\t') {
1304 column = NextTab(column, tabInChars);
1305 i++;
1306 } else if (ch == '\r') {
1307 return column;
1308 } else if (ch == '\n') {
1309 return column;
1310 } else if (i >= Length()) {
1311 return column;
1312 } else {
1313 column++;
1314 i = NextPosition(i, 1);
1318 return column;
1321 int Document::CountCharacters(int startPos, int endPos) const {
1322 startPos = MovePositionOutsideChar(startPos, 1, false);
1323 endPos = MovePositionOutsideChar(endPos, -1, false);
1324 int count = 0;
1325 int i = startPos;
1326 while (i < endPos) {
1327 count++;
1328 i = NextPosition(i, 1);
1330 return count;
1333 int Document::CountUTF16(int startPos, int endPos) const {
1334 startPos = MovePositionOutsideChar(startPos, 1, false);
1335 endPos = MovePositionOutsideChar(endPos, -1, false);
1336 int count = 0;
1337 int i = startPos;
1338 while (i < endPos) {
1339 count++;
1340 const int next = NextPosition(i, 1);
1341 if ((next - i) > 3)
1342 count++;
1343 i = next;
1345 return count;
1348 int Document::FindColumn(int line, int column) {
1349 int position = LineStart(line);
1350 if ((line >= 0) && (line < LinesTotal())) {
1351 int columnCurrent = 0;
1352 while ((columnCurrent < column) && (position < Length())) {
1353 char ch = cb.CharAt(position);
1354 if (ch == '\t') {
1355 columnCurrent = NextTab(columnCurrent, tabInChars);
1356 if (columnCurrent > column)
1357 return position;
1358 position++;
1359 } else if (ch == '\r') {
1360 return position;
1361 } else if (ch == '\n') {
1362 return position;
1363 } else {
1364 columnCurrent++;
1365 position = NextPosition(position, 1);
1369 return position;
1372 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1373 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1374 for (int line = lineBottom; line >= lineTop; line--) {
1375 int indentOfLine = GetLineIndentation(line);
1376 if (forwards) {
1377 if (LineStart(line) < LineEnd(line)) {
1378 SetLineIndentation(line, indentOfLine + IndentSize());
1380 } else {
1381 SetLineIndentation(line, indentOfLine - IndentSize());
1386 // Convert line endings for a piece of text to a particular mode.
1387 // Stop at len or when a NUL is found.
1388 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1389 std::string dest;
1390 for (size_t i = 0; (i < len) && (s[i]); i++) {
1391 if (s[i] == '\n' || s[i] == '\r') {
1392 if (eolModeWanted == SC_EOL_CR) {
1393 dest.push_back('\r');
1394 } else if (eolModeWanted == SC_EOL_LF) {
1395 dest.push_back('\n');
1396 } else { // eolModeWanted == SC_EOL_CRLF
1397 dest.push_back('\r');
1398 dest.push_back('\n');
1400 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1401 i++;
1403 } else {
1404 dest.push_back(s[i]);
1407 return dest;
1410 void Document::ConvertLineEnds(int eolModeSet) {
1411 UndoGroup ug(this);
1413 for (int pos = 0; pos < Length(); pos++) {
1414 if (cb.CharAt(pos) == '\r') {
1415 if (cb.CharAt(pos + 1) == '\n') {
1416 // CRLF
1417 if (eolModeSet == SC_EOL_CR) {
1418 DeleteChars(pos + 1, 1); // Delete the LF
1419 } else if (eolModeSet == SC_EOL_LF) {
1420 DeleteChars(pos, 1); // Delete the CR
1421 } else {
1422 pos++;
1424 } else {
1425 // CR
1426 if (eolModeSet == SC_EOL_CRLF) {
1427 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1428 } else if (eolModeSet == SC_EOL_LF) {
1429 pos += InsertString(pos, "\n", 1); // Insert LF
1430 DeleteChars(pos, 1); // Delete CR
1431 pos--;
1434 } else if (cb.CharAt(pos) == '\n') {
1435 // LF
1436 if (eolModeSet == SC_EOL_CRLF) {
1437 pos += InsertString(pos, "\r", 1); // Insert CR
1438 } else if (eolModeSet == SC_EOL_CR) {
1439 pos += InsertString(pos, "\r", 1); // Insert CR
1440 DeleteChars(pos, 1); // Delete LF
1441 pos--;
1448 bool Document::IsWhiteLine(int line) const {
1449 int currentChar = LineStart(line);
1450 int endLine = LineEnd(line);
1451 while (currentChar < endLine) {
1452 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1453 return false;
1455 ++currentChar;
1457 return true;
1460 int Document::ParaUp(int pos) const {
1461 int line = LineFromPosition(pos);
1462 line--;
1463 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1464 line--;
1466 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1467 line--;
1469 line++;
1470 return LineStart(line);
1473 int Document::ParaDown(int pos) const {
1474 int line = LineFromPosition(pos);
1475 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1476 line++;
1478 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1479 line++;
1481 if (line < LinesTotal())
1482 return LineStart(line);
1483 else // end of a document
1484 return LineEnd(line-1);
1487 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1488 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1489 return CharClassify::ccWord;
1490 return charClass.GetClass(ch);
1494 * Used by commmands that want to select whole words.
1495 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1497 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1498 CharClassify::cc ccStart = CharClassify::ccWord;
1499 if (delta < 0) {
1500 if (!onlyWordCharacters)
1501 ccStart = WordCharClass(cb.CharAt(pos-1));
1502 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1503 pos--;
1504 } else {
1505 if (!onlyWordCharacters && pos < Length())
1506 ccStart = WordCharClass(cb.CharAt(pos));
1507 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1508 pos++;
1510 return MovePositionOutsideChar(pos, delta, true);
1514 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1515 * (delta < 0).
1516 * This is looking for a transition between character classes although there is also some
1517 * additional movement to transit white space.
1518 * Used by cursor movement by word commands.
1520 int Document::NextWordStart(int pos, int delta) {
1521 if (delta < 0) {
1522 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1523 pos--;
1524 if (pos > 0) {
1525 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1526 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1527 pos--;
1530 } else {
1531 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1532 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1533 pos++;
1534 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1535 pos++;
1537 return pos;
1541 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1542 * (delta < 0).
1543 * This is looking for a transition between character classes although there is also some
1544 * additional movement to transit white space.
1545 * Used by cursor movement by word commands.
1547 int Document::NextWordEnd(int pos, int delta) {
1548 if (delta < 0) {
1549 if (pos > 0) {
1550 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1551 if (ccStart != CharClassify::ccSpace) {
1552 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1553 pos--;
1556 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1557 pos--;
1560 } else {
1561 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1562 pos++;
1564 if (pos < Length()) {
1565 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1566 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1567 pos++;
1571 return pos;
1575 * Check that the character at the given position is a word or punctuation character and that
1576 * the previous character is of a different character class.
1578 bool Document::IsWordStartAt(int pos) const {
1579 if (pos > 0) {
1580 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1581 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1582 (ccPos != WordCharClass(CharAt(pos - 1)));
1584 return true;
1588 * Check that the character at the given position is a word or punctuation character and that
1589 * the next character is of a different character class.
1591 bool Document::IsWordEndAt(int pos) const {
1592 if (pos < Length()) {
1593 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1594 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1595 (ccPrev != WordCharClass(CharAt(pos)));
1597 return true;
1601 * Check that the given range is has transitions between character classes at both
1602 * ends and where the characters on the inside are word or punctuation characters.
1604 bool Document::IsWordAt(int start, int end) const {
1605 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1608 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1609 return (!word && !wordStart) ||
1610 (word && IsWordAt(pos, pos + length)) ||
1611 (wordStart && IsWordStartAt(pos));
1614 bool Document::HasCaseFolder(void) const {
1615 return pcf != 0;
1618 void Document::SetCaseFolder(CaseFolder *pcf_) {
1619 delete pcf;
1620 pcf = pcf_;
1623 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1624 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1625 if (UTF8IsAscii(leadByte)) {
1626 // Common case: ASCII character
1627 return CharacterExtracted(leadByte, 1);
1629 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1630 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1631 for (int b=1; b<widthCharBytes; b++)
1632 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1633 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1634 if (utf8status & UTF8MaskInvalid) {
1635 // Treat as invalid and use up just one byte
1636 return CharacterExtracted(unicodeReplacementChar, 1);
1637 } else {
1638 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1643 * Find text in document, supporting both forward and backward
1644 * searches (just pass minPos > maxPos to do a backward search)
1645 * Has not been tested with backwards DBCS searches yet.
1647 long Document::FindText(int minPos, int maxPos, const char *search,
1648 int flags, int *length) {
1649 if (*length <= 0)
1650 return minPos;
1651 const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1652 const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1653 const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1654 const bool regExp = (flags & SCFIND_REGEXP) != 0;
1655 if (regExp) {
1656 if (!regex)
1657 regex = CreateRegexSearch(&charClass);
1658 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1659 } else {
1661 const bool forward = minPos <= maxPos;
1662 const int increment = forward ? 1 : -1;
1664 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1665 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1666 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1668 // Compute actual search ranges needed
1669 const int lengthFind = *length;
1671 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1672 const int limitPos = Platform::Maximum(startPos, endPos);
1673 int pos = startPos;
1674 if (!forward) {
1675 // Back all of a character
1676 pos = NextPosition(pos, increment);
1678 if (caseSensitive) {
1679 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1680 const char charStartSearch = search[0];
1681 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1682 if (CharAt(pos) == charStartSearch) {
1683 bool found = (pos + lengthFind) <= limitPos;
1684 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1685 found = CharAt(pos + indexSearch) == search[indexSearch];
1687 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1688 return pos;
1691 if (!NextCharacter(pos, increment))
1692 break;
1694 } else if (SC_CP_UTF8 == dbcsCodePage) {
1695 const size_t maxFoldingExpansion = 4;
1696 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1697 const int lenSearch = static_cast<int>(
1698 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1699 char bytes[UTF8MaxBytes + 1];
1700 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1701 while (forward ? (pos < endPos) : (pos >= endPos)) {
1702 int widthFirstCharacter = 0;
1703 int posIndexDocument = pos;
1704 int indexSearch = 0;
1705 bool characterMatches = true;
1706 for (;;) {
1707 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1708 bytes[0] = leadByte;
1709 int widthChar = 1;
1710 if (!UTF8IsAscii(leadByte)) {
1711 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1712 for (int b=1; b<widthCharBytes; b++) {
1713 bytes[b] = cb.CharAt(posIndexDocument+b);
1715 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1717 if (!widthFirstCharacter)
1718 widthFirstCharacter = widthChar;
1719 if ((posIndexDocument + widthChar) > limitPos)
1720 break;
1721 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1722 folded[lenFlat] = 0;
1723 // Does folded match the buffer
1724 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1725 if (!characterMatches)
1726 break;
1727 posIndexDocument += widthChar;
1728 indexSearch += lenFlat;
1729 if (indexSearch >= lenSearch)
1730 break;
1732 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1733 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1734 *length = posIndexDocument - pos;
1735 return pos;
1738 if (forward) {
1739 pos += widthFirstCharacter;
1740 } else {
1741 if (!NextCharacter(pos, increment))
1742 break;
1745 } else if (dbcsCodePage) {
1746 const size_t maxBytesCharacter = 2;
1747 const size_t maxFoldingExpansion = 4;
1748 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1749 const int lenSearch = static_cast<int>(
1750 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1751 while (forward ? (pos < endPos) : (pos >= endPos)) {
1752 int indexDocument = 0;
1753 int indexSearch = 0;
1754 bool characterMatches = true;
1755 while (characterMatches &&
1756 ((pos + indexDocument) < limitPos) &&
1757 (indexSearch < lenSearch)) {
1758 char bytes[maxBytesCharacter + 1];
1759 bytes[0] = cb.CharAt(pos + indexDocument);
1760 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1761 if (widthChar == 2)
1762 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1763 if ((pos + indexDocument + widthChar) > limitPos)
1764 break;
1765 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1766 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1767 folded[lenFlat] = 0;
1768 // Does folded match the buffer
1769 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1770 indexDocument += widthChar;
1771 indexSearch += lenFlat;
1773 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1774 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1775 *length = indexDocument;
1776 return pos;
1779 if (!NextCharacter(pos, increment))
1780 break;
1782 } else {
1783 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1784 std::vector<char> searchThing(lengthFind + 1);
1785 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1786 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1787 bool found = (pos + lengthFind) <= limitPos;
1788 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1789 char ch = CharAt(pos + indexSearch);
1790 char folded[2];
1791 pcf->Fold(folded, sizeof(folded), &ch, 1);
1792 found = folded[0] == searchThing[indexSearch];
1794 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1795 return pos;
1797 if (!NextCharacter(pos, increment))
1798 break;
1802 //Platform::DebugPrintf("Not found\n");
1803 return -1;
1806 const char *Document::SubstituteByPosition(const char *text, int *length) {
1807 if (regex)
1808 return regex->SubstituteByPosition(this, text, length);
1809 else
1810 return 0;
1813 int Document::LinesTotal() const {
1814 return cb.Lines();
1817 void Document::SetDefaultCharClasses(bool includeWordClass) {
1818 charClass.SetDefaultCharClasses(includeWordClass);
1821 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1822 charClass.SetCharClasses(chars, newCharClass);
1825 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1826 return charClass.GetCharsOfClass(characterClass, buffer);
1829 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
1830 endStyled = position;
1833 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
1834 if (enteredStyling != 0) {
1835 return false;
1836 } else {
1837 enteredStyling++;
1838 int prevEndStyled = endStyled;
1839 if (cb.SetStyleFor(endStyled, length, style)) {
1840 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1841 prevEndStyled, length);
1842 NotifyModified(mh);
1844 endStyled += length;
1845 enteredStyling--;
1846 return true;
1850 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
1851 if (enteredStyling != 0) {
1852 return false;
1853 } else {
1854 enteredStyling++;
1855 bool didChange = false;
1856 int startMod = 0;
1857 int endMod = 0;
1858 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1859 PLATFORM_ASSERT(endStyled < Length());
1860 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1861 if (!didChange) {
1862 startMod = endStyled;
1864 didChange = true;
1865 endMod = endStyled;
1868 if (didChange) {
1869 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1870 startMod, endMod - startMod + 1);
1871 NotifyModified(mh);
1873 enteredStyling--;
1874 return true;
1878 void Document::EnsureStyledTo(int pos) {
1879 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1880 IncrementStyleClock();
1881 if (pli && !pli->UseContainerLexing()) {
1882 int lineEndStyled = LineFromPosition(GetEndStyled());
1883 int endStyledTo = LineStart(lineEndStyled);
1884 pli->Colourise(endStyledTo, pos);
1885 } else {
1886 // Ask the watchers to style, and stop as soon as one responds.
1887 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1888 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1889 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1895 void Document::LexerChanged() {
1896 // Tell the watchers the lexer has changed.
1897 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1898 it->watcher->NotifyLexerChanged(this, it->userData);
1902 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
1903 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1904 if (state != statePrevious) {
1905 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1906 NotifyModified(mh);
1908 return statePrevious;
1911 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
1912 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1915 int Document::GetMaxLineState() {
1916 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1919 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
1920 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1921 NotifyModified(mh);
1924 StyledText Document::MarginStyledText(int line) const {
1925 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1926 return StyledText(pla->Length(line), pla->Text(line),
1927 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1930 void Document::MarginSetText(int line, const char *text) {
1931 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1932 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1933 NotifyModified(mh);
1936 void Document::MarginSetStyle(int line, int style) {
1937 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1938 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1941 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1942 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1943 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1946 void Document::MarginClearAll() {
1947 int maxEditorLine = LinesTotal();
1948 for (int l=0; l<maxEditorLine; l++)
1949 MarginSetText(l, 0);
1950 // Free remaining data
1951 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1954 StyledText Document::AnnotationStyledText(int line) const {
1955 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1956 return StyledText(pla->Length(line), pla->Text(line),
1957 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1960 void Document::AnnotationSetText(int line, const char *text) {
1961 if (line >= 0 && line < LinesTotal()) {
1962 const int linesBefore = AnnotationLines(line);
1963 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1964 const int linesAfter = AnnotationLines(line);
1965 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1966 mh.annotationLinesAdded = linesAfter - linesBefore;
1967 NotifyModified(mh);
1971 void Document::AnnotationSetStyle(int line, int style) {
1972 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1973 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1974 NotifyModified(mh);
1977 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1978 if (line >= 0 && line < LinesTotal()) {
1979 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1983 int Document::AnnotationLines(int line) const {
1984 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1987 void Document::AnnotationClearAll() {
1988 int maxEditorLine = LinesTotal();
1989 for (int l=0; l<maxEditorLine; l++)
1990 AnnotationSetText(l, 0);
1991 // Free remaining data
1992 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1995 void Document::IncrementStyleClock() {
1996 styleClock = (styleClock + 1) % 0x100000;
1999 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2000 if (decorations.FillRange(position, value, fillLength)) {
2001 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2002 position, fillLength);
2003 NotifyModified(mh);
2007 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2008 WatcherWithUserData wwud(watcher, userData);
2009 std::vector<WatcherWithUserData>::iterator it =
2010 std::find(watchers.begin(), watchers.end(), wwud);
2011 if (it != watchers.end())
2012 return false;
2013 watchers.push_back(wwud);
2014 return true;
2017 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2018 std::vector<WatcherWithUserData>::iterator it =
2019 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2020 if (it != watchers.end()) {
2021 watchers.erase(it);
2022 return true;
2024 return false;
2027 void Document::NotifyModifyAttempt() {
2028 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2029 it->watcher->NotifyModifyAttempt(this, it->userData);
2033 void Document::NotifySavePoint(bool atSavePoint) {
2034 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2035 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2039 void Document::NotifyModified(DocModification mh) {
2040 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2041 decorations.InsertSpace(mh.position, mh.length);
2042 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2043 decorations.DeleteRange(mh.position, mh.length);
2045 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2046 it->watcher->NotifyModified(this, mh, it->userData);
2050 bool Document::IsWordPartSeparator(char ch) const {
2051 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2054 int Document::WordPartLeft(int pos) {
2055 if (pos > 0) {
2056 --pos;
2057 char startChar = cb.CharAt(pos);
2058 if (IsWordPartSeparator(startChar)) {
2059 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2060 --pos;
2063 if (pos > 0) {
2064 startChar = cb.CharAt(pos);
2065 --pos;
2066 if (IsLowerCase(startChar)) {
2067 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2068 --pos;
2069 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2070 ++pos;
2071 } else if (IsUpperCase(startChar)) {
2072 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2073 --pos;
2074 if (!IsUpperCase(cb.CharAt(pos)))
2075 ++pos;
2076 } else if (IsADigit(startChar)) {
2077 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2078 --pos;
2079 if (!IsADigit(cb.CharAt(pos)))
2080 ++pos;
2081 } else if (IsPunctuation(startChar)) {
2082 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2083 --pos;
2084 if (!IsPunctuation(cb.CharAt(pos)))
2085 ++pos;
2086 } else if (isspacechar(startChar)) {
2087 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2088 --pos;
2089 if (!isspacechar(cb.CharAt(pos)))
2090 ++pos;
2091 } else if (!IsASCII(startChar)) {
2092 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2093 --pos;
2094 if (IsASCII(cb.CharAt(pos)))
2095 ++pos;
2096 } else {
2097 ++pos;
2101 return pos;
2104 int Document::WordPartRight(int pos) {
2105 char startChar = cb.CharAt(pos);
2106 int length = Length();
2107 if (IsWordPartSeparator(startChar)) {
2108 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2109 ++pos;
2110 startChar = cb.CharAt(pos);
2112 if (!IsASCII(startChar)) {
2113 while (pos < length && !IsASCII(cb.CharAt(pos)))
2114 ++pos;
2115 } else if (IsLowerCase(startChar)) {
2116 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2117 ++pos;
2118 } else if (IsUpperCase(startChar)) {
2119 if (IsLowerCase(cb.CharAt(pos + 1))) {
2120 ++pos;
2121 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2122 ++pos;
2123 } else {
2124 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2125 ++pos;
2127 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2128 --pos;
2129 } else if (IsADigit(startChar)) {
2130 while (pos < length && IsADigit(cb.CharAt(pos)))
2131 ++pos;
2132 } else if (IsPunctuation(startChar)) {
2133 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2134 ++pos;
2135 } else if (isspacechar(startChar)) {
2136 while (pos < length && isspacechar(cb.CharAt(pos)))
2137 ++pos;
2138 } else {
2139 ++pos;
2141 return pos;
2144 bool IsLineEndChar(char c) {
2145 return (c == '\n' || c == '\r');
2148 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2149 int sStart = cb.StyleAt(pos);
2150 if (delta < 0) {
2151 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2152 pos--;
2153 pos++;
2154 } else {
2155 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2156 pos++;
2158 return pos;
2161 static char BraceOpposite(char ch) {
2162 switch (ch) {
2163 case '(':
2164 return ')';
2165 case ')':
2166 return '(';
2167 case '[':
2168 return ']';
2169 case ']':
2170 return '[';
2171 case '{':
2172 return '}';
2173 case '}':
2174 return '{';
2175 case '<':
2176 return '>';
2177 case '>':
2178 return '<';
2179 default:
2180 return '\0';
2184 // TODO: should be able to extend styled region to find matching brace
2185 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2186 char chBrace = CharAt(position);
2187 char chSeek = BraceOpposite(chBrace);
2188 if (chSeek == '\0')
2189 return - 1;
2190 char styBrace = static_cast<char>(StyleAt(position));
2191 int direction = -1;
2192 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2193 direction = 1;
2194 int depth = 1;
2195 position = NextPosition(position, direction);
2196 while ((position >= 0) && (position < Length())) {
2197 char chAtPos = CharAt(position);
2198 char styAtPos = static_cast<char>(StyleAt(position));
2199 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2200 if (chAtPos == chBrace)
2201 depth++;
2202 if (chAtPos == chSeek)
2203 depth--;
2204 if (depth == 0)
2205 return position;
2207 int positionBeforeMove = position;
2208 position = NextPosition(position, direction);
2209 if (position == positionBeforeMove)
2210 break;
2212 return - 1;
2216 * Implementation of RegexSearchBase for the default built-in regular expression engine
2218 class BuiltinRegex : public RegexSearchBase {
2219 public:
2220 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2222 virtual ~BuiltinRegex() {
2225 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2226 bool caseSensitive, bool word, bool wordStart, int flags,
2227 int *length);
2229 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2231 private:
2232 RESearch search;
2233 std::string substituted;
2236 namespace {
2239 * RESearchRange keeps track of search range.
2241 class RESearchRange {
2242 public:
2243 const Document *doc;
2244 int increment;
2245 int startPos;
2246 int endPos;
2247 int lineRangeStart;
2248 int lineRangeEnd;
2249 int lineRangeBreak;
2250 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2251 increment = (minPos <= maxPos) ? 1 : -1;
2253 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2254 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2255 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2257 lineRangeStart = doc->LineFromPosition(startPos);
2258 lineRangeEnd = doc->LineFromPosition(endPos);
2259 if ((increment == 1) &&
2260 (startPos >= doc->LineEnd(lineRangeStart)) &&
2261 (lineRangeStart < lineRangeEnd)) {
2262 // the start position is at end of line or between line end characters.
2263 lineRangeStart++;
2264 startPos = doc->LineStart(lineRangeStart);
2265 } else if ((increment == -1) &&
2266 (startPos <= doc->LineStart(lineRangeStart)) &&
2267 (lineRangeStart > lineRangeEnd)) {
2268 // the start position is at beginning of line.
2269 lineRangeStart--;
2270 startPos = doc->LineEnd(lineRangeStart);
2272 lineRangeBreak = lineRangeEnd + increment;
2274 Range LineRange(int line) const {
2275 Range range(doc->LineStart(line), doc->LineEnd(line));
2276 if (increment == 1) {
2277 if (line == lineRangeStart)
2278 range.start = startPos;
2279 if (line == lineRangeEnd)
2280 range.end = endPos;
2281 } else {
2282 if (line == lineRangeEnd)
2283 range.start = endPos;
2284 if (line == lineRangeStart)
2285 range.end = startPos;
2287 return range;
2291 // Define a way for the Regular Expression code to access the document
2292 class DocumentIndexer : public CharacterIndexer {
2293 Document *pdoc;
2294 int end;
2295 public:
2296 DocumentIndexer(Document *pdoc_, int end_) :
2297 pdoc(pdoc_), end(end_) {
2300 virtual ~DocumentIndexer() {
2303 virtual char CharAt(int index) {
2304 if (index < 0 || index >= end)
2305 return 0;
2306 else
2307 return pdoc->CharAt(index);
2311 #ifdef CXX11_REGEX
2313 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2314 public:
2315 const Document *doc;
2316 Position position;
2317 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2319 ByteIterator(const ByteIterator &other) {
2320 doc = other.doc;
2321 position = other.position;
2323 ByteIterator &operator=(const ByteIterator &other) {
2324 if (this != &other) {
2325 doc = other.doc;
2326 position = other.position;
2328 return *this;
2330 char operator*() const {
2331 return doc->CharAt(position);
2333 ByteIterator &operator++() {
2334 position++;
2335 return *this;
2337 ByteIterator operator++(int) {
2338 ByteIterator retVal(*this);
2339 position++;
2340 return retVal;
2342 ByteIterator &operator--() {
2343 position--;
2344 return *this;
2346 bool operator==(const ByteIterator &other) const {
2347 return doc == other.doc && position == other.position;
2349 bool operator!=(const ByteIterator &other) const {
2350 return doc != other.doc || position != other.position;
2352 int Pos() const {
2353 return position;
2355 int PosRoundUp() const {
2356 return position;
2360 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2361 // Would be better to use sizeof(wchar_t) or similar to differentiate
2362 // but easier for now to hard-code platforms.
2363 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2364 // appear to allow specializing basic_regex over these.
2366 #ifdef _WIN32
2367 #define WCHAR_T_IS_16 1
2368 #else
2369 #define WCHAR_T_IS_16 0
2370 #endif
2372 #if WCHAR_T_IS_16
2374 // On Windows, report non-BMP characters as 2 separate surrogates as that
2375 // matches wregex since it is based on wchar_t.
2376 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2377 // These 3 fields determine the iterator position and are used for comparisons
2378 const Document *doc;
2379 Position position;
2380 size_t characterIndex;
2381 // Remaining fields are derived from the determining fields so are excluded in comparisons
2382 unsigned int lenBytes;
2383 size_t lenCharacters;
2384 wchar_t buffered[2];
2385 public:
2386 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2387 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2388 buffered[0] = 0;
2389 buffered[1] = 0;
2390 if (doc) {
2391 ReadCharacter();
2394 UTF8Iterator(const UTF8Iterator &other) {
2395 doc = other.doc;
2396 position = other.position;
2397 characterIndex = other.characterIndex;
2398 lenBytes = other.lenBytes;
2399 lenCharacters = other.lenCharacters;
2400 buffered[0] = other.buffered[0];
2401 buffered[1] = other.buffered[1];
2403 UTF8Iterator &operator=(const UTF8Iterator &other) {
2404 if (this != &other) {
2405 doc = other.doc;
2406 position = other.position;
2407 characterIndex = other.characterIndex;
2408 lenBytes = other.lenBytes;
2409 lenCharacters = other.lenCharacters;
2410 buffered[0] = other.buffered[0];
2411 buffered[1] = other.buffered[1];
2413 return *this;
2415 wchar_t operator*() const {
2416 assert(lenCharacters != 0);
2417 return buffered[characterIndex];
2419 UTF8Iterator &operator++() {
2420 if ((characterIndex + 1) < (lenCharacters)) {
2421 characterIndex++;
2422 } else {
2423 position += lenBytes;
2424 ReadCharacter();
2425 characterIndex = 0;
2427 return *this;
2429 UTF8Iterator operator++(int) {
2430 UTF8Iterator retVal(*this);
2431 if ((characterIndex + 1) < (lenCharacters)) {
2432 characterIndex++;
2433 } else {
2434 position += lenBytes;
2435 ReadCharacter();
2436 characterIndex = 0;
2438 return retVal;
2440 UTF8Iterator &operator--() {
2441 if (characterIndex) {
2442 characterIndex--;
2443 } else {
2444 position = doc->NextPosition(position, -1);
2445 ReadCharacter();
2446 characterIndex = lenCharacters - 1;
2448 return *this;
2450 bool operator==(const UTF8Iterator &other) const {
2451 // Only test the determining fields, not the character widths and values derived from this
2452 return doc == other.doc &&
2453 position == other.position &&
2454 characterIndex == other.characterIndex;
2456 bool operator!=(const UTF8Iterator &other) const {
2457 // Only test the determining fields, not the character widths and values derived from this
2458 return doc != other.doc ||
2459 position != other.position ||
2460 characterIndex != other.characterIndex;
2462 int Pos() const {
2463 return position;
2465 int PosRoundUp() const {
2466 if (characterIndex)
2467 return position + lenBytes; // Force to end of character
2468 else
2469 return position;
2471 private:
2472 void ReadCharacter() {
2473 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2474 lenBytes = charExtracted.widthBytes;
2475 if (charExtracted.character == unicodeReplacementChar) {
2476 lenCharacters = 1;
2477 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2478 } else {
2479 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2484 #else
2486 // On Unix, report non-BMP characters as single characters
2488 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2489 const Document *doc;
2490 Position position;
2491 public:
2492 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2494 UTF8Iterator(const UTF8Iterator &other) {
2495 doc = other.doc;
2496 position = other.position;
2498 UTF8Iterator &operator=(const UTF8Iterator &other) {
2499 if (this != &other) {
2500 doc = other.doc;
2501 position = other.position;
2503 return *this;
2505 wchar_t operator*() const {
2506 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2507 return charExtracted.character;
2509 UTF8Iterator &operator++() {
2510 position = doc->NextPosition(position, 1);
2511 return *this;
2513 UTF8Iterator operator++(int) {
2514 UTF8Iterator retVal(*this);
2515 position = doc->NextPosition(position, 1);
2516 return retVal;
2518 UTF8Iterator &operator--() {
2519 position = doc->NextPosition(position, -1);
2520 return *this;
2522 bool operator==(const UTF8Iterator &other) const {
2523 return doc == other.doc && position == other.position;
2525 bool operator!=(const UTF8Iterator &other) const {
2526 return doc != other.doc || position != other.position;
2528 int Pos() const {
2529 return position;
2531 int PosRoundUp() const {
2532 return position;
2536 #endif
2538 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2539 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2540 if (!doc->IsLineStartPosition(startPos))
2541 flagsMatch |= std::regex_constants::match_not_bol;
2542 if (!doc->IsLineEndPosition(endPos))
2543 flagsMatch |= std::regex_constants::match_not_eol;
2544 return flagsMatch;
2547 template<typename Iterator, typename Regex>
2548 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2549 bool matched = false;
2550 std::match_results<Iterator> match;
2552 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2553 // If they didn't then the line by line iteration could be removed for the forwards
2554 // case and replaced with the following 4 lines:
2555 // Iterator uiStart(doc, startPos);
2556 // Iterator uiEnd(doc, endPos);
2557 // flagsMatch = MatchFlags(doc, startPos, endPos);
2558 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2560 // Line by line.
2561 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2562 const Range lineRange = resr.LineRange(line);
2563 Iterator itStart(doc, lineRange.start);
2564 Iterator itEnd(doc, lineRange.end);
2565 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2566 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2567 // Check for the last match on this line.
2568 if (matched) {
2569 if (resr.increment == -1) {
2570 while (matched) {
2571 Iterator itNext(doc, match[0].second.PosRoundUp());
2572 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2573 std::match_results<Iterator> matchNext;
2574 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2575 if (matched) {
2576 if (match[0].first == match[0].second) {
2577 // Empty match means failure so exit
2578 return false;
2580 match = matchNext;
2583 matched = true;
2585 break;
2588 if (matched) {
2589 for (size_t co = 0; co < match.size(); co++) {
2590 search.bopat[co] = match[co].first.Pos();
2591 search.eopat[co] = match[co].second.PosRoundUp();
2592 size_t lenMatch = search.eopat[co] - search.bopat[co];
2593 search.pat[co].resize(lenMatch);
2594 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2595 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2599 return matched;
2602 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2603 bool caseSensitive, int *length, RESearch &search) {
2604 const RESearchRange resr(doc, minPos, maxPos);
2605 try {
2606 //ElapsedTime et;
2607 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2608 // Flags that apper to have no effect:
2609 // | std::regex::collate | std::regex::extended;
2610 if (!caseSensitive)
2611 flagsRe = flagsRe | std::regex::icase;
2613 // Clear the RESearch so can fill in matches
2614 search.Clear();
2616 bool matched = false;
2617 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2618 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2619 std::vector<wchar_t> ws(lenS + 1);
2620 #if WCHAR_T_IS_16
2621 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2622 #else
2623 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2624 #endif
2625 ws[outLen] = 0;
2626 std::wregex regexp;
2627 #if defined(__APPLE__)
2628 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2629 // is one byte not one character.
2630 // However, on OS X this makes wregex act as Unicode
2631 std::locale localeU("en_US.UTF-8");
2632 regexp.imbue(localeU);
2633 #endif
2634 regexp.assign(&ws[0], flagsRe);
2635 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2637 } else {
2638 std::regex regexp;
2639 regexp.assign(s, flagsRe);
2640 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2643 int posMatch = -1;
2644 if (matched) {
2645 posMatch = search.bopat[0];
2646 *length = search.eopat[0] - search.bopat[0];
2648 // Example - search in doc/ScintillaHistory.html for
2649 // [[:upper:]]eta[[:space:]]
2650 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2651 //double durSearch = et.Duration(true);
2652 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2653 return posMatch;
2654 } catch (std::regex_error &) {
2655 // Failed to create regular expression
2656 throw RegexError();
2657 } catch (...) {
2658 // Failed in some other way
2659 return -1;
2663 #endif
2667 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2668 bool caseSensitive, bool, bool, int flags,
2669 int *length) {
2671 #ifdef CXX11_REGEX
2672 if (flags & SCFIND_CXX11REGEX) {
2673 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2674 caseSensitive, length, search);
2676 #endif
2678 const RESearchRange resr(doc, minPos, maxPos);
2680 const bool posix = (flags & SCFIND_POSIX) != 0;
2682 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2683 if (errmsg) {
2684 return -1;
2686 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2687 // Replace first '.' with '-' in each property file variable reference:
2688 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2689 // Replace: $(\1-\2)
2690 int pos = -1;
2691 int lenRet = 0;
2692 const char searchEnd = s[*length - 1];
2693 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2694 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2695 int startOfLine = doc->LineStart(line);
2696 int endOfLine = doc->LineEnd(line);
2697 if (resr.increment == 1) {
2698 if (line == resr.lineRangeStart) {
2699 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2700 continue; // Can't match start of line if start position after start of line
2701 startOfLine = resr.startPos;
2703 if (line == resr.lineRangeEnd) {
2704 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2705 continue; // Can't match end of line if end position before end of line
2706 endOfLine = resr.endPos;
2708 } else {
2709 if (line == resr.lineRangeEnd) {
2710 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2711 continue; // Can't match start of line if end position after start of line
2712 startOfLine = resr.endPos;
2714 if (line == resr.lineRangeStart) {
2715 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2716 continue; // Can't match end of line if start position before end of line
2717 endOfLine = resr.startPos;
2721 DocumentIndexer di(doc, endOfLine);
2722 int success = search.Execute(di, startOfLine, endOfLine);
2723 if (success) {
2724 pos = search.bopat[0];
2725 // Ensure only whole characters selected
2726 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2727 lenRet = search.eopat[0] - search.bopat[0];
2728 // There can be only one start of a line, so no need to look for last match in line
2729 if ((resr.increment == -1) && (s[0] != '^')) {
2730 // Check for the last match on this line.
2731 int repetitions = 1000; // Break out of infinite loop
2732 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2733 success = search.Execute(di, pos+1, endOfLine);
2734 if (success) {
2735 if (search.eopat[0] <= minPos) {
2736 pos = search.bopat[0];
2737 lenRet = search.eopat[0] - search.bopat[0];
2738 } else {
2739 success = 0;
2744 break;
2747 *length = lenRet;
2748 return pos;
2751 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2752 substituted.clear();
2753 DocumentIndexer di(doc, doc->Length());
2754 search.GrabMatches(di);
2755 for (int j = 0; j < *length; j++) {
2756 if (text[j] == '\\') {
2757 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2758 unsigned int patNum = text[j + 1] - '0';
2759 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2760 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2761 substituted.append(search.pat[patNum].c_str(), len);
2762 j++;
2763 } else {
2764 j++;
2765 switch (text[j]) {
2766 case 'a':
2767 substituted.push_back('\a');
2768 break;
2769 case 'b':
2770 substituted.push_back('\b');
2771 break;
2772 case 'f':
2773 substituted.push_back('\f');
2774 break;
2775 case 'n':
2776 substituted.push_back('\n');
2777 break;
2778 case 'r':
2779 substituted.push_back('\r');
2780 break;
2781 case 't':
2782 substituted.push_back('\t');
2783 break;
2784 case 'v':
2785 substituted.push_back('\v');
2786 break;
2787 case '\\':
2788 substituted.push_back('\\');
2789 break;
2790 default:
2791 substituted.push_back('\\');
2792 j--;
2795 } else {
2796 substituted.push_back(text[j]);
2799 *length = static_cast<int>(substituted.length());
2800 return substituted.c_str();
2803 #ifndef SCI_OWNREGEX
2805 #ifdef SCI_NAMESPACE
2807 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2808 return new BuiltinRegex(charClassTable);
2811 #else
2813 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2814 return new BuiltinRegex(charClassTable);
2817 #endif
2819 #endif